From 10a4f0dd780c4f1854ee74287a1eead230f3ba64 Mon Sep 17 00:00:00 2001 From: lhenry15 Date: Tue, 8 Sep 2020 01:19:53 -0500 Subject: [PATCH] first commit Former-commit-id: 08bc23ba02cffbce3cf63962390a65459a132e48 [formerly 0795edd4834b9b7dc66db8d10d4cbaf42bbf82cb] [formerly b5010b42541add7e2ea2578bf2da537efc457757 [formerly a7ca09c2c34c4fc8b3d8e01fcfa08eeeb2cae99d]] [formerly 615058473a2177ca5b89e9edbb797f4c2a59c7e5 [formerly 743d8dfc6843c4c205051a8ab309fbb2116c895e] [formerly bb0ea98b1e14154ef464e2f7a16738705894e54b [formerly 960a69da74b81ef8093820e003f2d6c59a34974c]]] [formerly 2fa3be52c1b44665bc81a7cc7d4cea4bbf0d91d5 [formerly 2054589f0898627e0a17132fd9d4cc78efc91867] [formerly 3b53730e8a895e803dfdd6ca72bc05e17a4164c1 [formerly 8a2fa8ab7baf6686d21af1f322df46fd58c60e69]] [formerly 87d1e3a07a19d03c7d7c94d93ab4fa9f58dada7c [formerly f331916385a5afac1234854ee8d7f160f34b668f] [formerly 69fb3c78a483343f5071da4f7e2891b83a49dd18 [formerly 386086f05aa9487f65bce2ee54438acbdce57650]]]] Former-commit-id: a00aed8c934a6460c4d9ac902b9a74a3d6864697 [formerly 26fdeca29c2f07916d837883983ca2982056c78e] [formerly 0e3170d41a2f99ecf5c918183d361d4399d793bf [formerly 3c12ad4c88ac5192e0f5606ac0d88dd5bf8602dc]] [formerly d5894f84f2fd2e77a6913efdc5ae388cf1be0495 [formerly ad3e7bc670ff92c992730d29c9d3aa1598d844e8] [formerly 69fb3c78a483343f5071da4f7e2891b83a49dd18]] Former-commit-id: 3c19c9fae64f6106415fbc948a4dc613b9ee12f8 [formerly 467ddc0549c74bb007e8f01773bb6dc9103b417d] [formerly 5fa518345d958e2760e443b366883295de6d991c [formerly 3530e130b9fdb7280f638dbc2e785d2165ba82aa]] Former-commit-id: 9f5d473d42a435ec0d60149939d09be1acc25d92 [formerly be0b25c4ec2cde052a041baf0e11f774a158105d] Former-commit-id: 9eca71cb73ba9edccd70ac06a3b636b8d4093b04 --- .gitignore | 116 + README.md | 143 + axolotl/.gitignore | 108 + axolotl/.gitlab-ci.yml | 33 + axolotl/.gitmodules | 3 + axolotl/LICENSE | 201 + axolotl/README.md | 41 + axolotl/axolotl/__init__.py | 2 + axolotl/axolotl/algorithms/__init__.py | 0 .../algorithms/autokeras_integration/__init__.py | 82 + .../algorithms/autokeras_integration/block.py | 205 + .../algorithms/autokeras_integration/constants.py | 23 + .../algorithms/autokeras_integration/mapping.py | 122 + .../algorithms/autokeras_integration/steps.py | 126 + axolotl/axolotl/algorithms/autokeras_search.py | 145 + axolotl/axolotl/algorithms/base.py | 241 + axolotl/axolotl/algorithms/bayesian_search.py | 27 + axolotl/axolotl/algorithms/data_driven_search.py | 1086 +++ axolotl/axolotl/algorithms/dummy.py | 87 + axolotl/axolotl/algorithms/random_search.py | 27 + axolotl/axolotl/algorithms/tuners/__init__.py | 0 .../axolotl/algorithms/tuners/bayesian_oracle.py | 198 + axolotl/axolotl/algorithms/tuners/custom_hps.py | 535 ++ .../axolotl/algorithms/tuners/hyperparameters.py | 195 + axolotl/axolotl/algorithms/tuners/oracle.py | 104 + .../algorithms/tuners/random_search_oracle.py | 66 + axolotl/axolotl/algorithms/tuners/tunable_base.py | 258 + axolotl/axolotl/backend/__init__.py | 0 axolotl/axolotl/backend/base.py | 313 + axolotl/axolotl/backend/ray.py | 269 + axolotl/axolotl/backend/simple.py | 178 + axolotl/axolotl/d3m_grpc/__init__.py | 0 axolotl/axolotl/d3m_grpc/constants.py | 127 + axolotl/axolotl/d3m_grpc/server.py | 854 +++ axolotl/axolotl/predefined_pipelines/__init__.py | 133 + .../predefined_pipelines/base_preprocessor.py | 278 + .../axolotl/predefined_pipelines/preprocessor.py | 350 + axolotl/axolotl/utils/__init__.py | 0 axolotl/axolotl/utils/data_problem.py | 340 + axolotl/axolotl/utils/pipeline.py | 542 ++ axolotl/axolotl/utils/resources.py | 31 + axolotl/axolotl/utils/resources/blocklist.json | 31 + .../axolotl/utils/resources/default_pipelines.json | 64 + .../axolotl/utils/resources/scoring_pipeline.yml | 31 + .../utils/resources/splitting_pipelines.json | 7 + axolotl/axolotl/utils/schemas.py | 472 ++ axolotl/examples/build_search_algorithm.ipynb | 284 + axolotl/examples/load_csv.ipynb | 424 ++ axolotl/examples/random_search/oracle.json | 1 + axolotl/examples/run.py | 31 + ...c_data_bayesian_hp_tunning.ipynb.REMOVED.git-id | 1 + axolotl/failed_installation_repos.txt | 11 + axolotl/images/Devd3mStart.sh | 39 + axolotl/images/axolotl.dockerfile | 13 + axolotl/images/base.dockerfile | 3 + axolotl/images/build-images.sh | 21 + axolotl/run_tests.py | 11 + axolotl/setup.py | 53 + axolotl/tests/__init__.py | 0 axolotl/tests/_server_test.py | 383 ++ axolotl/tests/data/.gitignore | 10 + axolotl/tests/data/.gitlab-ci.yml | 42 + axolotl/tests/data/README.md | 10 + axolotl/tests/data/add.sh | 20 + .../data/datasets/audio_dataset_1/datasetDoc.json | 82 + .../datasets/audio_dataset_1/media/test_audio.mp3 | Bin 0 -> 1271 bytes .../audio_dataset_1/tables/learningData.csv | 2 + .../data/datasets/boston_dataset_1/datasetDoc.json | 164 + .../boston_dataset_1/tables/learningData.csv | 507 ++ .../datasets/database_dataset_1/datasetDoc.json | 200 + .../datasets/database_dataset_1/tables/authors.csv | 4 + .../datasets/database_dataset_1/tables/codes.csv | 4 + .../database_dataset_1/tables/learningData.csv | 46 + .../datasets/database_dataset_1/tables/values.csv | 65 + .../datasets/database_dataset_2/datasetDoc.json | 196 + .../database_dataset_2/tables/comments.csv | 1001 +++ .../database_dataset_2/tables/learningData.csv | 101 + .../datasets/database_dataset_2/tables/posts.csv | 1001 +++ .../datasets/database_dataset_2/tables/users.csv | 101 + .../datasets/database_dataset_3/datasetDoc.json | 188 + .../database_dataset_3/tables/comments.csv | 1001 +++ .../database_dataset_3/tables/learningData.csv | 1001 +++ .../datasets/database_dataset_3/tables/posts.csv | 1001 +++ .../datasets/database_dataset_3/tables/users.csv | 101 + .../datasets/database_dataset_4/datasetDoc.json | 202 + .../database_dataset_4/tables/comments.csv | 1001 +++ .../database_dataset_4/tables/learningData.csv | 201 + .../datasets/database_dataset_4/tables/posts.csv | 1001 +++ .../datasets/database_dataset_4/tables/users.csv | 101 + .../data/datasets/graph_dataset_1/datasetDoc.json | 68 + .../data/datasets/graph_dataset_1/graphs/G1.gml | 98 + .../graph_dataset_1/tables/learningData.csv | 12 + .../data/datasets/graph_dataset_2/datasetDoc.json | 118 + .../datasets/graph_dataset_2/tables/edgeList.csv | 7 + .../graph_dataset_2/tables/learningData.csv | 12 + .../data/datasets/image_dataset_1/datasetDoc.json | 71 + .../media/001_HandPhoto_left_01.jpg | Bin 0 -> 6733 bytes .../image_dataset_1/media/cifar10_bird_1.png | Bin 0 -> 2276 bytes .../image_dataset_1/media/cifar10_bird_2.png | Bin 0 -> 2034 bytes .../datasets/image_dataset_1/media/mnist_0_2.png | Bin 0 -> 289 bytes .../datasets/image_dataset_1/media/mnist_1_1.png | Bin 0 -> 208 bytes .../image_dataset_1/tables/learningData.csv | 6 + .../data/datasets/image_dataset_2/datasetDoc.json | 66 + .../datasets/image_dataset_2/media/img_00000.png | Bin 0 -> 313 bytes .../datasets/image_dataset_2/media/img_00001.png | Bin 0 -> 312 bytes .../datasets/image_dataset_2/media/img_00002.png | Bin 0 -> 269 bytes .../datasets/image_dataset_2/media/img_00003.png | Bin 0 -> 213 bytes .../datasets/image_dataset_2/media/img_00004.png | Bin 0 -> 269 bytes .../datasets/image_dataset_2/media/img_00005.png | Bin 0 -> 328 bytes .../datasets/image_dataset_2/media/img_00006.png | Bin 0 -> 192 bytes .../datasets/image_dataset_2/media/img_00007.png | Bin 0 -> 331 bytes .../datasets/image_dataset_2/media/img_00008.png | Bin 0 -> 172 bytes .../datasets/image_dataset_2/media/img_00009.png | Bin 0 -> 295 bytes .../datasets/image_dataset_2/media/img_00010.png | Bin 0 -> 287 bytes .../datasets/image_dataset_2/media/img_00011.png | Bin 0 -> 212 bytes .../datasets/image_dataset_2/media/img_00012.png | Bin 0 -> 327 bytes .../datasets/image_dataset_2/media/img_00013.png | Bin 0 -> 306 bytes .../datasets/image_dataset_2/media/img_00014.png | Bin 0 -> 165 bytes .../datasets/image_dataset_2/media/img_00015.png | Bin 0 -> 286 bytes .../datasets/image_dataset_2/media/img_00016.png | Bin 0 -> 307 bytes .../datasets/image_dataset_2/media/img_00017.png | Bin 0 -> 304 bytes .../datasets/image_dataset_2/media/img_00018.png | Bin 0 -> 252 bytes .../datasets/image_dataset_2/media/img_00019.png | Bin 0 -> 252 bytes .../datasets/image_dataset_2/media/img_00020.png | Bin 0 -> 313 bytes .../datasets/image_dataset_2/media/img_00021.png | Bin 0 -> 327 bytes .../datasets/image_dataset_2/media/img_00022.png | Bin 0 -> 255 bytes .../datasets/image_dataset_2/media/img_00023.png | Bin 0 -> 223 bytes .../datasets/image_dataset_2/media/img_00024.png | Bin 0 -> 261 bytes .../datasets/image_dataset_2/media/img_00025.png | Bin 0 -> 331 bytes .../datasets/image_dataset_2/media/img_00026.png | Bin 0 -> 263 bytes .../datasets/image_dataset_2/media/img_00027.png | Bin 0 -> 337 bytes .../datasets/image_dataset_2/media/img_00028.png | Bin 0 -> 306 bytes .../datasets/image_dataset_2/media/img_00029.png | Bin 0 -> 233 bytes .../datasets/image_dataset_2/media/img_00030.png | Bin 0 -> 283 bytes .../datasets/image_dataset_2/media/img_00031.png | Bin 0 -> 299 bytes .../datasets/image_dataset_2/media/img_00032.png | Bin 0 -> 262 bytes .../datasets/image_dataset_2/media/img_00033.png | Bin 0 -> 263 bytes .../datasets/image_dataset_2/media/img_00034.png | Bin 0 -> 295 bytes .../datasets/image_dataset_2/media/img_00035.png | Bin 0 -> 191 bytes .../datasets/image_dataset_2/media/img_00036.png | Bin 0 -> 271 bytes .../datasets/image_dataset_2/media/img_00037.png | Bin 0 -> 318 bytes .../datasets/image_dataset_2/media/img_00038.png | Bin 0 -> 255 bytes .../datasets/image_dataset_2/media/img_00039.png | Bin 0 -> 282 bytes .../datasets/image_dataset_2/media/img_00040.png | Bin 0 -> 171 bytes .../datasets/image_dataset_2/media/img_00041.png | Bin 0 -> 317 bytes .../datasets/image_dataset_2/media/img_00042.png | Bin 0 -> 215 bytes .../datasets/image_dataset_2/media/img_00043.png | Bin 0 -> 233 bytes .../datasets/image_dataset_2/media/img_00044.png | Bin 0 -> 225 bytes .../datasets/image_dataset_2/media/img_00045.png | Bin 0 -> 284 bytes .../datasets/image_dataset_2/media/img_00046.png | Bin 0 -> 317 bytes .../datasets/image_dataset_2/media/img_00047.png | Bin 0 -> 176 bytes .../datasets/image_dataset_2/media/img_00048.png | Bin 0 -> 223 bytes .../datasets/image_dataset_2/media/img_00049.png | Bin 0 -> 317 bytes .../datasets/image_dataset_2/media/img_00050.png | Bin 0 -> 269 bytes .../datasets/image_dataset_2/media/img_00051.png | Bin 0 -> 323 bytes .../datasets/image_dataset_2/media/img_00052.png | Bin 0 -> 272 bytes .../datasets/image_dataset_2/media/img_00053.png | Bin 0 -> 277 bytes .../datasets/image_dataset_2/media/img_00054.png | Bin 0 -> 303 bytes .../datasets/image_dataset_2/media/img_00055.png | Bin 0 -> 313 bytes .../datasets/image_dataset_2/media/img_00056.png | Bin 0 -> 326 bytes .../datasets/image_dataset_2/media/img_00057.png | Bin 0 -> 253 bytes .../datasets/image_dataset_2/media/img_00058.png | Bin 0 -> 236 bytes .../datasets/image_dataset_2/media/img_00059.png | Bin 0 -> 222 bytes .../datasets/image_dataset_2/media/img_00060.png | Bin 0 -> 298 bytes .../datasets/image_dataset_2/media/img_00061.png | Bin 0 -> 254 bytes .../datasets/image_dataset_2/media/img_00062.png | Bin 0 -> 280 bytes .../datasets/image_dataset_2/media/img_00063.png | Bin 0 -> 345 bytes .../datasets/image_dataset_2/media/img_00064.png | Bin 0 -> 323 bytes .../datasets/image_dataset_2/media/img_00065.png | Bin 0 -> 235 bytes .../datasets/image_dataset_2/media/img_00066.png | Bin 0 -> 182 bytes .../datasets/image_dataset_2/media/img_00067.png | Bin 0 -> 249 bytes .../datasets/image_dataset_2/media/img_00068.png | Bin 0 -> 220 bytes .../datasets/image_dataset_2/media/img_00069.png | Bin 0 -> 306 bytes .../datasets/image_dataset_2/media/img_00070.png | Bin 0 -> 254 bytes .../datasets/image_dataset_2/media/img_00071.png | Bin 0 -> 256 bytes .../datasets/image_dataset_2/media/img_00072.png | Bin 0 -> 176 bytes .../datasets/image_dataset_2/media/img_00073.png | Bin 0 -> 301 bytes .../datasets/image_dataset_2/media/img_00074.png | Bin 0 -> 281 bytes .../datasets/image_dataset_2/media/img_00075.png | Bin 0 -> 301 bytes .../datasets/image_dataset_2/media/img_00076.png | Bin 0 -> 301 bytes .../datasets/image_dataset_2/media/img_00077.png | Bin 0 -> 196 bytes .../datasets/image_dataset_2/media/img_00078.png | Bin 0 -> 217 bytes .../datasets/image_dataset_2/media/img_00079.png | Bin 0 -> 265 bytes .../datasets/image_dataset_2/media/img_00080.png | Bin 0 -> 320 bytes .../datasets/image_dataset_2/media/img_00081.png | Bin 0 -> 320 bytes .../datasets/image_dataset_2/media/img_00082.png | Bin 0 -> 317 bytes .../datasets/image_dataset_2/media/img_00083.png | Bin 0 -> 195 bytes .../datasets/image_dataset_2/media/img_00084.png | Bin 0 -> 234 bytes .../datasets/image_dataset_2/media/img_00085.png | Bin 0 -> 324 bytes .../datasets/image_dataset_2/media/img_00086.png | Bin 0 -> 284 bytes .../datasets/image_dataset_2/media/img_00087.png | Bin 0 -> 296 bytes .../datasets/image_dataset_2/media/img_00088.png | Bin 0 -> 334 bytes .../datasets/image_dataset_2/media/img_00089.png | Bin 0 -> 324 bytes .../datasets/image_dataset_2/media/img_00090.png | Bin 0 -> 321 bytes .../datasets/image_dataset_2/media/img_00091.png | Bin 0 -> 274 bytes .../datasets/image_dataset_2/media/img_00092.png | Bin 0 -> 231 bytes .../datasets/image_dataset_2/media/img_00093.png | Bin 0 -> 305 bytes .../datasets/image_dataset_2/media/img_00094.png | Bin 0 -> 301 bytes .../datasets/image_dataset_2/media/img_00095.png | Bin 0 -> 300 bytes .../datasets/image_dataset_2/media/img_00096.png | Bin 0 -> 243 bytes .../datasets/image_dataset_2/media/img_00097.png | Bin 0 -> 316 bytes .../datasets/image_dataset_2/media/img_00098.png | Bin 0 -> 265 bytes .../image_dataset_2/tables/learningData.csv | 100 + .../data/datasets/iris_dataset_1/datasetDoc.json | 76 + .../iris_dataset_1/tables/learningData.csv | 151 + .../data/datasets/iris_dataset_2/datasetDoc.json | 25 + .../iris_dataset_2/tables/learningData.csv | 151 + .../data/datasets/iris_dataset_3/datasetDoc.json | 36 + .../iris_dataset_3/tables/learningData.csv | 151 + .../multivariate_dataset_1/datasetDoc.json | 93 + .../tables/gp_data_tables/train_data_934.csv | 1001 +++ .../tables/gp_data_tables/train_data_935.csv | 1001 +++ .../tables/gp_data_tables/train_data_936.csv | 1001 +++ .../tables/gp_data_tables/train_data_937.csv | 1001 +++ .../tables/gp_data_tables/train_data_938.csv | 1001 +++ .../tables/gp_data_tables/train_data_939.csv | 1001 +++ .../multivariate_dataset_1/tables/learningData.csv | 7 + .../data/datasets/object_dataset_1/datasetDoc.json | 82 + .../media/img_00225.png.REMOVED.git-id | 1 + .../media/img_00285.png.REMOVED.git-id | 1 + .../object_dataset_1/tables/learningData.csv | 5 + .../data/datasets/raw_dataset_1/datasetDoc.json | 23 + .../raw_dataset_1/raw/complementaryData.csv | 3 + .../score_dataset_1/dataset_TEST/datasetDoc.json | 82 + .../media/img_00225.png.REMOVED.git-id | 1 + .../media/img_00285.png.REMOVED.git-id | 1 + .../dataset_TEST/tables/learningData.csv | 5 + .../data/datasets/score_dataset_1/targets.csv | 5 + .../data/datasets/text_dataset_1/datasetDoc.json | 98 + .../text_dataset_1/tables/learningData.csv | 5 + .../tests/data/datasets/text_dataset_1/text/1.txt | 1 + .../tests/data/datasets/text_dataset_1/text/2.txt | 1 + .../tests/data/datasets/text_dataset_1/text/3.txt | 1 + .../tests/data/datasets/text_dataset_1/text/4.txt | 1 + .../datasets/timeseries_dataset_1/datasetDoc.json | 71 + .../timeseries_dataset_1/tables/learningData.csv | 41 + .../datasets/timeseries_dataset_2/datasetDoc.json | 85 + .../timeseries_dataset_2/tables/learningData.csv | 6 + .../timeseries/0000_train_ts.csv | 167 + .../timeseries/0001_train_ts.csv | 167 + .../timeseries/0002_train_ts.csv | 167 + .../timeseries/0003_train_ts.csv | 167 + .../timeseries/0004_train_ts.csv | 167 + .../datasets/timeseries_dataset_3/datasetDoc.json | 74 + .../timeseries_dataset_3/tables/learningData.csv | 41 + .../datasets/timeseries_dataset_4/datasetDoc.json | 71 + .../timeseries_dataset_4/tables/learningData.csv | 41 + .../data/datasets/video_dataset_1/datasetDoc.json | 71 + .../April_09_brush_hair_u_nm_np1_ba_goo_0.avi.mp4 | Bin 0 -> 214711 bytes ...Competition_cartwheel_f_cm_np1_ba_med_1.avi.mp4 | Bin 0 -> 187017 bytes .../video_dataset_1/tables/learningData.csv | 3 + axolotl/tests/data/docker/summing/Dockerfile | 17 + axolotl/tests/data/docker/summing/README.md | 3 + axolotl/tests/data/docker/summing/code/server.py | 59 + .../data/docker/summing/etc/service/summing/run | 4 + axolotl/tests/data/docker/summing/requirements.txt | 2 + axolotl/tests/data/docker/summing/runsvdir-start | 5 + axolotl/tests/data/generate-database-datasets.py | 403 ++ .../data/pipelines/data-preparation-no-split.yml | 36 + .../data-preparation-train-test-split.yml | 37 + .../tests/data/pipelines/fake_compute_score.yml | 31 + .../tests/data/pipelines/increment-dataframe.yml | 55 + axolotl/tests/data/pipelines/multi-input-test.json | 85 + axolotl/tests/data/pipelines/random-classifier.yml | 58 + .../data/pipelines/random-forest-classifier.yml | 74 + axolotl/tests/data/pipelines/random-sample.yml | 32 + .../data/pipelines/semi-standard-pipeline.json | 67 + axolotl/tests/data/primitives/setup.cfg | 25 + axolotl/tests/data/primitives/setup.py | 42 + .../data/primitives/test_primitives/__init__.py | 2 + .../data/primitives/test_primitives/abs_sum.py | 80 + .../test_primitives/container_hyperparam.py | 68 + .../primitives/test_primitives/data_hyperparam.py | 66 + .../tests/data/primitives/test_primitives/fail.py | 106 + .../data/primitives/test_primitives/fake_score.py | 100 + .../data/primitives/test_primitives/file_reader.py | 71 + .../data/primitives/test_primitives/increment.py | 99 + .../data/primitives/test_primitives/monomial.py | 127 + .../test_primitives/multi_data_hyperparam.py | 70 + .../tests/data/primitives/test_primitives/null.py | 219 + .../data/primitives/test_primitives/postgresql.py | 222 + .../test_primitives/primitive_hyperparam.py | 76 + .../primitives/test_primitives/primitive_sum.py | 139 + .../data/primitives/test_primitives/random.py | 154 + .../test_primitives/random_classifier.py | 130 + .../tests/data/primitives/test_primitives/sum.py | 151 + .../data/problems/boston_problem_1/problemDoc.json | 36 + .../data/problems/boston_problem_2/problemDoc.json | 36 + .../problems/database_problem_2/problemDoc.json | 42 + .../problems/database_problem_3/problemDoc.json | 36 + .../problems/database_problem_4/problemDoc.json | 37 + .../data/problems/image_problem_2/problemDoc.json | 36 + .../data/problems/iris_problem_1/dataSplits.csv | 151 + .../data/problems/iris_problem_1/problemDoc.json | 45 + .../data/problems/iris_problem_2/problemDoc.json | 36 + .../problems/multi_dataset_problem/problemDoc.json | 48 + axolotl/tests/resources/logistic_regeression.json | 146 + axolotl/tests/resources/svc_pipeline.json | 146 + axolotl/tests/test_algorithms_dummy.py | 55 + axolotl/tests/test_autokeras.py | 82 + axolotl/tests/test_backend_ray.py | 105 + axolotl/tests/test_backend_simple.py | 82 + axolotl/tests/test_bayesian.py | 93 + axolotl/tests/test_predefine_pipelines.py | 85 + axolotl/tests/test_preprocessor.py | 246 + axolotl/tests/test_random_search.py | 91 + d3m/CODE_STYLE.md | 258 + d3m/HISTORY.md | 1823 +++++ d3m/HOW_TO_RELEASE.md | 35 + d3m/LICENSE.txt | 201 + d3m/MANIFEST.in | 2 + d3m/README.md | 56 + d3m/d3m/__init__.py | 8 + d3m/d3m/__main__.py | 6 + d3m/d3m/base/__init__.py | 0 d3m/d3m/base/primitives.py | 451 ++ d3m/d3m/base/utils.py | 342 + d3m/d3m/cli.py | 1172 ++++ d3m/d3m/container/__init__.py | 8 + d3m/d3m/container/dataset.py | 3297 +++++++++ d3m/d3m/container/list.py | 170 + d3m/d3m/container/numpy.py | 128 + d3m/d3m/container/pandas.py | 495 ++ d3m/d3m/container/utils.py | 50 + d3m/d3m/contrib/__init__.py | 0 .../f596cd77-25f8-4d4c-a350-bb30ab1e58f6.yml | 31 + d3m/d3m/contrib/primitives/__init__.py | 0 d3m/d3m/contrib/primitives/compute_scores.py | 369 + d3m/d3m/deprecate.py | 143 + d3m/d3m/environment_variables.py | 22 + d3m/d3m/exceptions.py | 187 + d3m/d3m/index.py | 538 ++ d3m/d3m/metadata/__init__.py | 0 d3m/d3m/metadata/base.py | 4034 +++++++++++ d3m/d3m/metadata/hyperparams.py | 3370 ++++++++++ d3m/d3m/metadata/params.py | 138 + d3m/d3m/metadata/pipeline.py | 2970 +++++++++ d3m/d3m/metadata/pipeline_run.py | 1683 +++++ d3m/d3m/metadata/primitive_names.py | 392 ++ d3m/d3m/metadata/problem.py | 1039 +++ d3m/d3m/metadata/schemas/v0/container.json | 62 + d3m/d3m/metadata/schemas/v0/data.json | 64 + d3m/d3m/metadata/schemas/v0/definitions.json | 4415 ++++++++++++ d3m/d3m/metadata/schemas/v0/pipeline.json | 56 + d3m/d3m/metadata/schemas/v0/pipeline_run.json | 66 + d3m/d3m/metadata/schemas/v0/primitive.json | 94 + d3m/d3m/metadata/schemas/v0/problem.json | 50 + d3m/d3m/metrics.py | 1100 +++ d3m/d3m/namespace.py | 195 + d3m/d3m/primitive_interfaces/__init__.py | 0 d3m/d3m/primitive_interfaces/base.py | 1293 ++++ d3m/d3m/primitive_interfaces/clustering.py | 103 + d3m/d3m/primitive_interfaces/distance.py | 197 + d3m/d3m/primitive_interfaces/featurization.py | 22 + d3m/d3m/primitive_interfaces/generator.py | 62 + .../primitive_interfaces/supervised_learning.py | 10 + d3m/d3m/primitive_interfaces/transformer.py | 71 + .../primitive_interfaces/unsupervised_learning.py | 48 + d3m/d3m/runtime.py | 2911 ++++++++ d3m/d3m/types.py | 24 + d3m/d3m/utils.py | 1823 +++++ d3m/docs/_static/custom.css | 38 + d3m/docs/_templates/toc.html | 8 + d3m/docs/_templates/versions.html | 11 + d3m/docs/about.rst | 12 + d3m/docs/conf.py | 210 + d3m/docs/discovery.rst | 140 + d3m/docs/index.rst | 34 + d3m/docs/installation.rst | 112 + d3m/docs/interfaces.rst | 248 + d3m/docs/metadata.rst | 718 ++ d3m/docs/pipeline.rst | 443 ++ d3m/docs/primitive-checklist.rst | 139 + d3m/docs/primitives_base_classes.rst | 40 + d3m/docs/quickstart.rst | 817 +++ d3m/docs/reference.rst | 9 + d3m/docs/repostructure.rst | 17 + d3m/docs/tutorial.rst | 493 ++ d3m/entry_points.ini | 2 + d3m/oldest_dependencies.py | 27 + d3m/run_benchmarks.sh | 18 + d3m/run_tests.py | 11 + d3m/setup.cfg | 25 + d3m/setup.py | 87 + d3m/site/.gitignore | 4 + d3m/site/Makefile | 14 + d3m/site/build_site.sh | 47 + d3m/site/build_site_types.py | 284 + d3m/site/client.js | 334 + d3m/site/client.less | 106 + d3m/site/html_construction.js | 526 ++ d3m/site/package-lock.json | 2495 +++++++ d3m/site/package.json | 11 + d3m/site/requirements.txt | 2 + d3m/site/schema-org.css | 152 + d3m/site/static/index.html | 23 + d3m/site/static/schemas | 1 + d3m/tests/asv.conf.json | 161 + d3m/tests/benchmarks/__init__.py | 0 d3m/tests/benchmarks/base_utils.py | 41 + d3m/tests/benchmarks/containers.py | 38 + d3m/tests/benchmarks/metadata.py | 195 + d3m/tests/benchmarks/primitive.py | 32 + d3m/tests/benchmarks/sampling.py | 36 + d3m/tests/test_base_utils.py | 1035 +++ d3m/tests/test_cli_runtime.py | 1680 +++++ d3m/tests/test_compute_scores.py | 321 + d3m/tests/test_container_metadata.py | 1258 ++++ d3m/tests/test_containers.py | 2608 ++++++++ d3m/tests/test_dataset.py | 2094 ++++++ d3m/tests/test_file_reader.py | 171 + d3m/tests/test_hyperparams.py | 1795 +++++ d3m/tests/test_increment.py | 256 + d3m/tests/test_index.py | 133 + d3m/tests/test_metadata.py | 2211 ++++++ d3m/tests/test_metrics.py | 1261 ++++ d3m/tests/test_monomial.py | 358 + d3m/tests/test_null.py | 267 + d3m/tests/test_params.py | 53 + d3m/tests/test_pipeline.py | 1487 +++++ d3m/tests/test_pipeline_run.py | 280 + d3m/tests/test_plasma.py | 115 + d3m/tests/test_primitive_metadata.py | 334 + d3m/tests/test_primitive_sum.py | 283 + d3m/tests/test_primitive_validation.py | 808 +++ d3m/tests/test_problem.py | 232 + d3m/tests/test_random.py | 269 + d3m/tests/test_runtime.py | 1534 +++++ d3m/tests/test_split.py | 22 + d3m/tests/test_sum.py | 319 + d3m/tests/test_utils.py | 506 ++ .../anomaly/kpi/SCORE/dataset_TEST/datasetDoc.json | 63 + .../kpi/SCORE/dataset_TEST/tables/learningData.csv | 1758 +++++ .../anomaly/kpi/SCORE/problem_TEST/dataSplits.csv | 7028 ++++++++++++++++++++ .../anomaly/kpi/SCORE/problem_TEST/problemDoc.json | 65 + datasets/anomaly/kpi/SCORE/targets.csv | 0 .../anomaly/kpi/TEST/dataset_TEST/datasetDoc.json | 63 + .../kpi/TEST/dataset_TEST/tables/learningData.csv | 1758 +++++ .../anomaly/kpi/TEST/problem_TEST/dataSplits.csv | 7028 ++++++++++++++++++++ .../anomaly/kpi/TEST/problem_TEST/problemDoc.json | 65 + .../kpi/TRAIN/dataset_TRAIN/datasetDoc.json | 63 + .../tables/learningData.csv.REMOVED.git-id | 1 + .../anomaly/kpi/TRAIN/problem_TRAIN/dataSplits.csv | 7028 ++++++++++++++++++++ .../kpi/TRAIN/problem_TRAIN/problemDoc.json | 65 + datasets/anomaly/kpi/kpi_dataset/datasetDoc.json | 63 + .../tables/learningData.csv.REMOVED.git-id | 1 + .../kpi/kpi_problem/dataSplits.csv.REMOVED.git-id | 1 + datasets/anomaly/kpi/kpi_problem/problemDoc.json | 65 + datasets/anomaly/raw_data/kpi.csv.REMOVED.git-id | 1 + datasets/anomaly/raw_data/yahoo_sub_5.csv | 1401 ++++ datasets/anomaly/template/datasetDoc.json | 183 + datasets/anomaly/template/problemDoc.json | 65 + datasets/anomaly/transform_kpi.py | 160 + datasets/anomaly/transform_yahoo.py | 160 + .../yahoo_sub_5/SCORE/dataset_TEST/datasetDoc.json | 95 + .../SCORE/dataset_TEST/tables/learningData.csv | 141 + .../yahoo_sub_5/SCORE/problem_TEST/dataSplits.csv | 1261 ++++ .../yahoo_sub_5/SCORE/problem_TEST/problemDoc.json | 65 + datasets/anomaly/yahoo_sub_5/SCORE/targets.csv | 0 .../yahoo_sub_5/TEST/dataset_TEST/datasetDoc.json | 95 + .../TEST/dataset_TEST/tables/learningData.csv | 141 + .../yahoo_sub_5/TEST/problem_TEST/dataSplits.csv | 1261 ++++ .../yahoo_sub_5/TEST/problem_TEST/problemDoc.json | 65 + .../TRAIN/dataset_TRAIN/datasetDoc.json | 95 + .../TRAIN/dataset_TRAIN/tables/learningData.csv | 1261 ++++ .../yahoo_sub_5/TRAIN/problem_TRAIN/dataSplits.csv | 1261 ++++ .../TRAIN/problem_TRAIN/problemDoc.json | 65 + .../yahoo_sub_5_dataset/datasetDoc.json | 95 + .../yahoo_sub_5_dataset/tables/learningData.csv | 1401 ++++ .../yahoo_sub_5/yahoo_sub_5_problem/dataSplits.csv | 1261 ++++ .../yahoo_sub_5_problem/problemDoc.json | 65 + .../kpi/SCORE/dataset_TEST/datasetDoc.json | 63 + .../kpi/SCORE/dataset_TEST/tables/learningData.csv | 1758 +++++ .../kpi/SCORE/problem_TEST/dataSplits.csv | 7028 ++++++++++++++++++++ .../kpi/SCORE/problem_TEST/problemDoc.json | 65 + datasets/anomaly_reserve/kpi/SCORE/targets.csv | 0 .../kpi/TEST/dataset_TEST/datasetDoc.json | 63 + .../kpi/TEST/dataset_TEST/tables/learningData.csv | 1758 +++++ .../kpi/TEST/problem_TEST/dataSplits.csv | 7028 ++++++++++++++++++++ .../kpi/TEST/problem_TEST/problemDoc.json | 65 + .../kpi/TRAIN/dataset_TRAIN/datasetDoc.json | 63 + .../tables/learningData.csv.REMOVED.git-id | 1 + .../kpi/TRAIN/problem_TRAIN/dataSplits.csv | 7028 ++++++++++++++++++++ .../kpi/TRAIN/problem_TRAIN/problemDoc.json | 65 + .../kpi/kpi_dataset/datasetDoc.json | 63 + .../tables/learningData.csv.REMOVED.git-id | 1 + .../kpi/kpi_problem/dataSplits.csv.REMOVED.git-id | 1 + .../kpi/kpi_problem/problemDoc.json | 65 + .../raw_data/kpi.csv.REMOVED.git-id | 1 + datasets/anomaly_reserve/template/datasetDoc.json | 183 + datasets/anomaly_reserve/template/problemDoc.json | 65 + datasets/anomaly_reserve/transform.py | 160 + .../yahoo_sub_5/SCORE/dataset_TEST/datasetDoc.json | 95 + .../SCORE/dataset_TEST/tables/learningData.csv | 141 + .../yahoo_sub_5/SCORE/problem_TEST/dataSplits.csv | 1261 ++++ .../yahoo_sub_5/SCORE/problem_TEST/problemDoc.json | 65 + .../anomaly_reserve/yahoo_sub_5/SCORE/targets.csv | 0 .../yahoo_sub_5/TEST/dataset_TEST/datasetDoc.json | 95 + .../TEST/dataset_TEST/tables/learningData.csv | 141 + .../yahoo_sub_5/TEST/problem_TEST/dataSplits.csv | 1261 ++++ .../yahoo_sub_5/TEST/problem_TEST/problemDoc.json | 65 + .../TRAIN/dataset_TRAIN/datasetDoc.json | 95 + .../TRAIN/dataset_TRAIN/tables/learningData.csv | 1261 ++++ .../yahoo_sub_5/TRAIN/problem_TRAIN/dataSplits.csv | 1261 ++++ .../TRAIN/problem_TRAIN/problemDoc.json | 65 + .../yahoo_sub_5_dataset/datasetDoc.json | 95 + .../yahoo_sub_5_dataset/tables/learningData.csv | 1401 ++++ .../yahoo_sub_5/yahoo_sub_5_problem/dataSplits.csv | 1261 ++++ .../yahoo_sub_5_problem/problemDoc.json | 65 + datasets/data-supply/README.md | 4 + datasets/data-supply/documentation/README.md | 6 + .../code/consolidated-new-metrics.ipynb | 906 +++ .../data-supply/documentation/code/d3m_eval.py | 246 + .../data-supply/documentation/datasetSchema.md | 590 ++ .../documentation/examples/image.datasetDoc.json | 52 + .../documentation/examples/iris.datasetDoc.json | 59 + .../examples/multitable.datasetDoc.json | 232 + .../data-supply/documentation/minimalMetadata.md | 53 + datasets/data-supply/documentation/overview.md | 119 + .../data-supply/documentation/problemSchema.md | 595 ++ .../data-supply/documentation/standardValues.json | 137 + .../data-supply/documentation/static/Drawing1.vsdx | Bin 0 -> 69870 bytes .../data-supply/documentation/static/allViews.PNG | Bin 0 -> 53292 bytes .../data-supply/documentation/static/examples.txt | 149 + .../static/objDetection_scoring_GT.PNG | Bin 0 -> 6818 bytes .../static/objDetection_scoring_PRED.PNG | Bin 0 -> 18229 bytes .../documentation/static/sampleDataSplitsFile.PNG | Bin 0 -> 3864 bytes .../documentation/static/sampleDataset.PNG | Bin 0 -> 5245 bytes .../documentation/static/sampleProblem.PNG | Bin 0 -> 3268 bytes .../documentation/static/sampleProblemTestView.PNG | Bin 0 -> 2542 bytes .../static/sampleProblemTrainView.PNG | Bin 0 -> 2667 bytes .../static/sampleProblem_objectDetection.PNG | Bin 0 -> 41224 bytes .../documentation/static/sampleSupply.PNG | Bin 0 -> 80481 bytes .../documentation/static/sampleTestView.PNG | Bin 0 -> 3703 bytes .../documentation/static/sampleTrainView.PNG | Bin 0 -> 3846 bytes .../static/schema fields spreadsheet.xlsx | Bin 0 -> 8667 bytes .../data-supply/documentation/static/testView.PNG | Bin 0 -> 62513 bytes .../data-supply/documentation/static/trainView.PNG | Bin 0 -> 65221 bytes .../supportedResourceTypesFormats.json | 60 + datasets/data-supply/schemas/README.md | 5 + datasets/data-supply/schemas/datasetSchema.json | 74 + datasets/data-supply/schemas/problemSchema.json | 72 + datasets/validate.py | 1296 ++++ examples/build_AutoEncoder_pipeline.py | 70 + examples/build_IsolationForest_pipline.py | 107 + examples/build_LODA_pipline.py | 78 + examples/run_automl.py | 33 + examples/run_certain_pipeline.py | 30 + examples/run_predefined_pipeline.py | 51 + examples/test_axolotl.py | 194 + install.sh | 23 + requirements.txt | 40 + test.sh | 40 + tested_file.txt | 130 + tests/build_ABOD_pipline.py | 70 + tests/build_AutoEncoder.py | 67 + tests/build_AutoRegODetect_pipeline.py | 71 + tests/build_AxiswiseScale_pipline.py | 50 + tests/build_BKFilter_pipline.py | 44 + tests/build_CBLOF_pipline.py | 51 + tests/build_CategoricalToBinary.py | 48 + tests/build_ColumnFilter_pipeline.py | 49 + tests/build_ContinuityValidation_pipline.py | 43 + tests/build_DeepLog_pipeline.py | 49 + tests/build_DiscreteCosineTransform.py | 50 + tests/build_DuplicationValidation_pipline.py | 42 + tests/build_FastFourierTransform.py | 48 + tests/build_HBOS_pipline.py | 68 + tests/build_HBOS_score_pipline.py | 71 + tests/build_HPFilter_pipline.py | 46 + tests/build_HoltSmoothing_pipline.py | 76 + ...uild_HoltWintersExponentialSmoothing_pipline.py | 76 + tests/build_IsolationForest_pipline.py | 59 + tests/build_KDiscord_pipeline.py | 71 + tests/build_KNN_pipline.py | 51 + tests/build_LODA_pipline.py | 51 + tests/build_LOF_pipline.py | 51 + tests/build_LSTMOD_pipline.py | 70 + tests/build_MatrixProfile_pipeline.py | 49 + tests/build_MeanAverageTransform_pipline.py | 77 + tests/build_NonNegativeMatrixFactorization.py | 50 + tests/build_OCSVM_pipline.py | 51 + tests/build_PCAODetect_pipeline.py | 71 + tests/build_PowerTransform_pipline.py | 49 + tests/build_PyodCOF.py | 51 + tests/build_QuantileTransform_pipline.py | 49 + tests/build_RuleBasedFilter_pipline.py | 54 + tests/build_SOD_pipeline.py | 49 + tests/build_SimpleExponentialSmoothing_pipline.py | 76 + tests/build_Standardize_pipline.py | 49 + tests/build_TRMF_pipline.py | 44 + tests/build_Telemanom.py | 48 + tests/build_TimeIntervalTransform_pipeline.py | 86 + tests/build_TruncatedSVD_pipline.py | 44 + tests/build_VariationalAutoEncoder.py | 67 + tests/build_WaveletTransform_pipline.py | 64 + tests/build_test_detection_algorithm_PyodMoGaal.py | 50 + tests/build_test_detection_algorithm_PyodSoGaal.py | 50 + ...nalysis_spectral_residual_transform_pipeline.py | 61 + ...test_feature_analysis_statistical_abs_energy.py | 62 + ...ld_test_feature_analysis_statistical_abs_sum.py | 62 + ...uild_test_feature_analysis_statistical_gmean.py | 62 + ...uild_test_feature_analysis_statistical_hmean.py | 62 + ...d_test_feature_analysis_statistical_kurtosis.py | 62 + ...ld_test_feature_analysis_statistical_maximum.py | 62 + ...build_test_feature_analysis_statistical_mean.py | 62 + ...d_test_feature_analysis_statistical_mean_abs.py | 62 + ...sis_statistical_mean_abs_temporal_derivative.py | 62 + ...nalysis_statistical_mean_temporal_derivative.py | 62 + ...ild_test_feature_analysis_statistical_median.py | 62 + ...alysis_statistical_median_absolute_deviation.py | 63 + ...ld_test_feature_analysis_statistical_minimum.py | 62 + ...build_test_feature_analysis_statistical_skew.py | 62 + .../build_test_feature_analysis_statistical_std.py | 62 + .../build_test_feature_analysis_statistical_var.py | 62 + ..._test_feature_analysis_statistical_variation.py | 62 + ...ld_test_feature_analysis_statistical_vec_sum.py | 62 + ...ture_analysis_statistical_willison_amplitude.py | 62 + ...t_feature_analysis_statistical_zero_crossing.py | 62 + ..._time_series_seasonality_trend_decomposition.py | 61 + tods/.gitignore | 2 + tods/__init__.py | 0 tods/common-primitives/HISTORY.md | 363 + tods/common-primitives/HOW_TO_MANAGE.md | 94 + tods/common-primitives/LICENSE.txt | 201 + tods/common-primitives/MANIFEST.in | 2 + tods/common-primitives/README.md | 83 + tods/common-primitives/add.sh | 24 + .../common_primitives/__init__.py | 2 + .../common_primitives/add_semantic_types.py | 78 + .../common_primitives/audio_reader.py | 137 + tods/common-primitives/common_primitives/base.py | 437 ++ .../common_primitives/cast_to_type.py | 122 + .../common_primitives/column_map.py | 361 + .../common_primitives/column_parser.py | 398 ++ .../common_primitives/compute_metafeatures.py | 600 ++ .../common_primitives/construct_predictions.py | 262 + .../common_primitives/csv_reader.py | 145 + .../common_primitives/cut_audio.py | 319 + .../common_primitives/dataframe_flatten.py | 201 + .../common_primitives/dataframe_image_reader.py | 94 + .../common_primitives/dataframe_to_list.py | 54 + .../common_primitives/dataframe_to_ndarray.py | 54 + .../common_primitives/dataframe_utils.py | 46 + .../common_primitives/datamart_augment.py | 106 + .../common_primitives/datamart_download.py | 91 + .../common_primitives/dataset_map.py | 375 ++ .../common_primitives/dataset_sample.py | 141 + .../common_primitives/dataset_to_dataframe.py | 88 + .../common_primitives/dataset_utils.py | 52 + .../common_primitives/datetime_field_compose.py | 83 + .../common_primitives/datetime_range_filter.py | 161 + .../common_primitives/denormalize.py | 556 ++ .../common_primitives/extract_columns.py | 58 + .../extract_columns_semantic_types.py | 141 + .../extract_columns_structural_types.py | 135 + .../common_primitives/fixed_split.py | 124 + .../common_primitives/grouping_field_compose.py | 101 + .../common_primitives/holt_smoothing.py | 345 + .../holt_winters_exponential_smoothing.py | 345 + .../common_primitives/horizontal_concat.py | 78 + .../common_primitives/kfold_split.py | 94 + .../common_primitives/kfold_split_timeseries.py | 198 + .../common_primitives/lgbm_classifier.py | 658 ++ .../common_primitives/list_to_dataframe.py | 53 + .../common_primitives/list_to_ndarray.py | 78 + .../common_primitives/mean_average_transform.py | 348 + .../common_primitives/ndarray_to_dataframe.py | 64 + .../common_primitives/ndarray_to_list.py | 53 + .../common_primitives/no_split.py | 59 + .../normalize_column_references.py | 112 + .../common_primitives/normalize_graphs.py | 360 + .../common_primitives/numeric_range_filter.py | 138 + .../common_primitives/one_hot_maker.py | 313 + .../common_primitives/pandas_onehot_encoder.py | 238 + .../common_primitives/random_forest.py | 733 ++ tods/common-primitives/common_primitives/ravel.py | 119 + .../common_primitives/redact_columns.py | 162 + .../common_primitives/regex_filter.py | 86 + .../common_primitives/remove_columns.py | 59 + .../common_primitives/remove_duplicate_columns.py | 169 + .../common_primitives/remove_semantic_types.py | 78 + .../common_primitives/rename_duplicate_columns.py | 73 + .../common_primitives/replace_semantic_types.py | 165 + .../simple_exponential_smoothing.py | 354 + .../common_primitives/simple_profiler.py | 795 +++ .../common_primitives/slacker/README.md | 13 + .../common_primitives/slacker/__init__.py | 0 .../common_primitives/slacker/base.py | 102 + .../common_primitives/slacker/estimation.py | 105 + .../slacker/feature_extraction.py | 184 + .../common_primitives/slacker/feature_selection.py | 179 + .../common_primitives/stack_ndarray_column.py | 133 + .../common_primitives/tabular_extractor.py | 232 + .../common_primitives/term_filter.py | 100 + .../common_primitives/text_reader.py | 70 + .../common_primitives/train_score_split.py | 89 + .../common_primitives/unseen_label_decoder.py | 137 + .../common_primitives/unseen_label_encoder.py | 203 + tods/common-primitives/common_primitives/utils.py | 192 + .../common_primitives/video_reader.py | 87 + .../common_primitives/xgboost_dart.py | 684 ++ .../common_primitives/xgboost_gbtree.py | 665 ++ .../common_primitives/xgboost_regressor.py | 588 ++ tods/common-primitives/entry_points.ini | 63 + tods/common-primitives/git-add.sh | 5 + tods/common-primitives/git-check.sh | 21 + tods/common-primitives/list_primitives.py | 32 + .../1.yaml.gz | Bin 0 -> 8605 bytes .../1.yaml.gz | Bin 0 -> 8790 bytes .../pipeline_run_extract_structural_types.yml.gz | 1 + .../1.yaml.gz | Bin 0 -> 8599 bytes .../1.yaml.gz | Bin 0 -> 8575 bytes .../2.yaml.gz | Bin 0 -> 95641 bytes .../1.yaml.gz | Bin 0 -> 7503 bytes .../pipeline_run_extract_structural_types.yml.gz | 1 + .../1.yaml.gz | 1 + .../pipeline_run_extract_structural_types.yml.gz | 1 + .../pipeline_run_group_field_compose.yml.gz | 1 + .../1.yaml.gz | 1 + .../pipeline_run_extract_structural_types.yml.gz | 1 + .../1.yaml.gz | 1 + .../pipeline_run_extract_structural_types.yml.gz | 1 + .../pipeline_run_group_field_compose.yml.gz | 1 + .../1.yaml.gz | 1 + .../pipeline_run_extract_structural_types.yml.gz | 1 + .../pipeline_run.yml.gz | Bin 0 -> 59789 bytes .../pipeline_run.yml.gz | Bin 0 -> 165228 bytes .../1.yaml.gz | 1 + .../pipeline_run_extract_structural_types.yml.gz | 1 + .../1.yml | 4729 +++++++++++++ .../pipeline_run_extract_structural_types.yml.gz | 1 + .../pipeline_run_group_field_compose.yml.gz | 1 + .../d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json | 246 + .../b523335c-0c47-4d02-a582-f69609cde1e8.json | 1 + .../ccad0f9c-130e-4063-a91e-ea65a18cb041.yaml | 110 + .../b7a24816-2518-4073-9c45-b97f2b2fee30.json | 246 + .../4d402450-2562-48cc-93fd-719fb658c43c.json | 246 + .../3afd2bd2-7ba1-4ac1-928f-fad0c39a05e5.json | 522 ++ .../4ff2f21d-1bba-4c44-bb96-e05728bcf6ed.json | 342 + .../387d432a-9893-4558-b190-1c5e9e399dbf.yaml | 123 + .../2b307634-f01e-412e-8d95-7e54afd4731f.json | 300 + .../b523335c-0c47-4d02-a582-f69609cde1e8.json | 1 + .../4ec215d1-6484-4502-a6dd-f659943ccb94.json | 1 + .../a8c40699-c48d-4f12-aa18-639c5fb6baae.json | 1 + .../b523335c-0c47-4d02-a582-f69609cde1e8.json | 1 + .../d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json | 1 + .../4ec215d1-6484-4502-a6dd-f659943ccb94.json | 1 + .../b523335c-0c47-4d02-a582-f69609cde1e8.json | 1 + .../d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json | 1 + .../4ec215d1-6484-4502-a6dd-f659943ccb94.json | 1 + .../a8c40699-c48d-4f12-aa18-639c5fb6baae.json | 1 + .../b523335c-0c47-4d02-a582-f69609cde1e8.json | 1 + .../d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json | 1 + .../4ec215d1-6484-4502-a6dd-f659943ccb94.json | 1 + .../pipeline.py | 71 + .../b523335c-0c47-4d02-a582-f69609cde1e8.json | 1 + .../d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json | 1 + .../b523335c-0c47-4d02-a582-f69609cde1e8.json | 1 + .../pipeline.py | 83 + .../a8c40699-c48d-4f12-aa18-639c5fb6baae.json | 1 + .../pipeline.py | 100 + .../2b307634-f01e-412e-8d95-7e54afd4731f.json | 1 + .../b523335c-0c47-4d02-a582-f69609cde1e8.json | 1 + .../11ee9290-992d-4e48-97ed-1a6e4c15f92f.json | 272 + .../k-fold-timeseries-split.yml | 83 + .../k-fold-timeseries-split-raw.yml | 108 + .../0f636602-6299-411b-9873-4b974cd393ba.json | 247 + .../4ec215d1-6484-4502-a6dd-f659943ccb94.json | 1 + .../a8c40699-c48d-4f12-aa18-639c5fb6baae.json | 1 + .../b523335c-0c47-4d02-a582-f69609cde1e8.json | 1 + tods/common-primitives/run_pipelines.sh | 44 + tods/common-primitives/run_tests.py | 11 + tods/common-primitives/setup.cfg | 28 + tods/common-primitives/setup.py | 65 + tods/common-primitives/sklearn-wrap/.gitignore | 2 + .../sklearn-wrap/requirements.txt | 31 + tods/common-primitives/sklearn-wrap/setup.py | 106 + .../sklearn-wrap/sklearn_wrap/SKARDRegression.py | 470 ++ .../sklearn_wrap/SKAdaBoostClassifier.py | 498 ++ .../sklearn_wrap/SKAdaBoostRegressor.py | 437 ++ .../sklearn_wrap/SKBaggingClassifier.py | 589 ++ .../sklearn_wrap/SKBaggingRegressor.py | 533 ++ .../sklearn-wrap/sklearn_wrap/SKBernoulliNB.py | 508 ++ .../sklearn-wrap/sklearn_wrap/SKBinarizer.py | 330 + .../sklearn-wrap/sklearn_wrap/SKCountVectorizer.py | 490 ++ .../sklearn_wrap/SKDecisionTreeClassifier.py | 621 ++ .../sklearn_wrap/SKDecisionTreeRegressor.py | 565 ++ .../sklearn-wrap/sklearn_wrap/SKDummyClassifier.py | 503 ++ .../sklearn-wrap/sklearn_wrap/SKDummyRegressor.py | 442 ++ .../sklearn-wrap/sklearn_wrap/SKElasticNet.py | 466 ++ .../sklearn_wrap/SKExtraTreesClassifier.py | 675 ++ .../sklearn_wrap/SKExtraTreesRegressor.py | 607 ++ .../sklearn-wrap/sklearn_wrap/SKFastICA.py | 439 ++ .../sklearn_wrap/SKFeatureAgglomeration.py | 361 + .../sklearn-wrap/sklearn_wrap/SKGaussianNB.py | 492 ++ .../sklearn_wrap/SKGaussianProcessRegressor.py | 463 ++ .../sklearn_wrap/SKGaussianRandomProjection.py | 344 + .../sklearn_wrap/SKGenericUnivariateSelect.py | 443 ++ .../sklearn_wrap/SKGradientBoostingClassifier.py | 707 ++ .../sklearn_wrap/SKGradientBoostingRegressor.py | 673 ++ .../sklearn-wrap/sklearn_wrap/SKImputer.py | 391 ++ .../sklearn_wrap/SKKNeighborsClassifier.py | 497 ++ .../sklearn_wrap/SKKNeighborsRegressor.py | 475 ++ .../sklearn-wrap/sklearn_wrap/SKKernelPCA.py | 536 ++ .../sklearn-wrap/sklearn_wrap/SKKernelRidge.py | 491 ++ .../sklearn-wrap/sklearn_wrap/SKLars.py | 460 ++ .../sklearn-wrap/sklearn_wrap/SKLasso.py | 474 ++ .../sklearn-wrap/sklearn_wrap/SKLassoCV.py | 526 ++ .../sklearn_wrap/SKLinearDiscriminantAnalysis.py | 535 ++ .../sklearn_wrap/SKLinearRegression.py | 431 ++ .../sklearn-wrap/sklearn_wrap/SKLinearSVC.py | 478 ++ .../sklearn-wrap/sklearn_wrap/SKLinearSVR.py | 452 ++ .../sklearn_wrap/SKLogisticRegression.py | 582 ++ .../sklearn-wrap/sklearn_wrap/SKMLPClassifier.py | 730 ++ .../sklearn-wrap/sklearn_wrap/SKMLPRegressor.py | 669 ++ .../sklearn-wrap/sklearn_wrap/SKMaxAbsScaler.py | 339 + .../sklearn-wrap/sklearn_wrap/SKMinMaxScaler.py | 366 + .../sklearn_wrap/SKMissingIndicator.py | 373 ++ .../sklearn-wrap/sklearn_wrap/SKMultinomialNB.py | 488 ++ .../sklearn-wrap/sklearn_wrap/SKNearestCentroid.py | 408 ++ .../sklearn-wrap/sklearn_wrap/SKNormalizer.py | 329 + .../sklearn-wrap/sklearn_wrap/SKNystroem.py | 522 ++ .../sklearn-wrap/sklearn_wrap/SKOneHotEncoder.py | 420 ++ .../sklearn-wrap/sklearn_wrap/SKOrdinalEncoder.py | 343 + .../sklearn-wrap/sklearn_wrap/SKPCA.py | 468 ++ .../sklearn_wrap/SKPassiveAggressiveClassifier.py | 648 ++ .../sklearn_wrap/SKPassiveAggressiveRegressor.py | 583 ++ .../sklearn_wrap/SKPolynomialFeatures.py | 346 + .../SKQuadraticDiscriminantAnalysis.py | 473 ++ .../sklearn_wrap/SKQuantileTransformer.py | 364 + .../sklearn-wrap/sklearn_wrap/SKRBFSampler.py | 349 + .../sklearn_wrap/SKRandomForestClassifier.py | 682 ++ .../sklearn_wrap/SKRandomForestRegressor.py | 609 ++ .../sklearn_wrap/SKRandomTreesEmbedding.py | 482 ++ .../sklearn-wrap/sklearn_wrap/SKRidge.py | 444 ++ .../sklearn-wrap/sklearn_wrap/SKRobustScaler.py | 354 + .../sklearn-wrap/sklearn_wrap/SKSGDClassifier.py | 661 ++ .../sklearn-wrap/sklearn_wrap/SKSGDRegressor.py | 643 ++ .../sklearn-wrap/sklearn_wrap/SKSVC.py | 635 ++ .../sklearn-wrap/sklearn_wrap/SKSVR.py | 616 ++ .../sklearn-wrap/sklearn_wrap/SKSelectFwe.py | 428 ++ .../sklearn_wrap/SKSelectPercentile.py | 428 ++ .../sklearn_wrap/SKSparseRandomProjection.py | 375 ++ .../sklearn-wrap/sklearn_wrap/SKStandardScaler.py | 357 + .../sklearn-wrap/sklearn_wrap/SKStringImputer.py | 371 ++ .../sklearn-wrap/sklearn_wrap/SKTfidfVectorizer.py | 530 ++ .../sklearn-wrap/sklearn_wrap/SKTruncatedSVD.py | 369 + .../sklearn_wrap/SKVarianceThreshold.py | 414 ++ .../sklearn-wrap/sklearn_wrap/__init__.py | 2 + tods/common-primitives/tests/test_audio_reader.py | 105 + tods/common-primitives/tests/test_cast_to_type.py | 131 + tods/common-primitives/tests/test_column_map.py | 75 + tods/common-primitives/tests/test_column_parser.py | 474 ++ .../tests/test_compute_metafeatures.py | 1106 +++ .../tests/test_construct_predictions.py | 233 + tods/common-primitives/tests/test_csv_reader.py | 50 + tods/common-primitives/tests/test_cut_audio.py | 122 + .../tests/test_dataframe_flatten.py | 132 + .../tests/test_dataframe_image_reader.py | 46 + .../tests/test_dataframe_to_list.py | 41 + .../tests/test_dataframe_to_ndarray.py | 40 + .../tests/test_dataframe_utils.py | 27 + tods/common-primitives/tests/test_dataset_map.py | 73 + .../common-primitives/tests/test_dataset_sample.py | 58 + .../tests/test_dataset_to_dataframe.py | 93 + .../tests/test_datetime_field_compose.py | 67 + .../tests/test_datetime_range_filter.py | 149 + tods/common-primitives/tests/test_denormalize.py | 469 ++ .../tests/test_extract_columns_semantic_types.py | 203 + .../tests/test_extract_columns_structural_types.py | 89 + tods/common-primitives/tests/test_fixed_split.py | 148 + .../tests/test_grouping_field_compose.py | 56 + .../tests/test_horizontal_concat.py | 183 + tods/common-primitives/tests/test_kfold_split.py | 100 + .../tests/test_kfold_timeseries_split.py | 223 + .../tests/test_lgbm_classifier.py | 571 ++ .../tests/test_list_to_dataframe.py | 185 + .../tests/test_list_to_ndarray.py | 132 + .../tests/test_ndarray_to_dataframe.py | 99 + .../tests/test_ndarray_to_list.py | 116 + tods/common-primitives/tests/test_no_split.py | 71 + .../tests/test_normalize_column_references.py | 597 ++ .../tests/test_normalize_graphs.py | 207 + .../tests/test_numeric_range_filter.py | 143 + tods/common-primitives/tests/test_one_hot_maker.py | 516 ++ .../tests/test_pandas_onehot_encoder.py | 178 + tods/common-primitives/tests/test_random_forest.py | 701 ++ tods/common-primitives/tests/test_ravel.py | 125 + .../common-primitives/tests/test_redact_columns.py | 173 + tods/common-primitives/tests/test_regex_filter.py | 114 + .../tests/test_remove_duplicate_columns.py | 123 + .../tests/test_rename_duplicate_columns.py | 136 + .../tests/test_replace_semantic_types.py | 97 + .../tests/test_simple_profiler.py | 446 ++ .../tests/test_stack_ndarray_column.py | 77 + .../tests/test_tabular_extractor.py | 173 + tods/common-primitives/tests/test_term_filter.py | 136 + tods/common-primitives/tests/test_text_reader.py | 30 + .../tests/test_train_score_split.py | 88 + .../tests/test_unseen_label_decoder.py | 51 + .../tests/test_unseen_label_encoder.py | 46 + tods/common-primitives/tests/test_video_reader.py | 35 + tods/common-primitives/tests/test_xgboost_dart.py | 687 ++ .../common-primitives/tests/test_xgboost_gbtree.py | 733 ++ .../tests/test_xgboost_regressor.py | 617 ++ tods/common-primitives/tests/utils.py | 112 + tods/data_processing/CategoricalToBinary.py | 395 ++ tods/data_processing/ColumnFilter.py | 149 + tods/data_processing/ContinuityValidation.py | 178 + tods/data_processing/DatasetToDataframe.py | 87 + tods/data_processing/DuplicationValidation.py | 97 + tods/data_processing/TimeIntervalTransform.py | 169 + tods/data_processing/TimeStampValidation.py | 99 + tods/data_processing/__init__.py | 0 tods/detection_algorithm/AutoRegODetect.py | 226 + tods/detection_algorithm/DeepLog.py | 413 ++ tods/detection_algorithm/KDiscordODetect.py | 347 + tods/detection_algorithm/LSTMODetect.py | 288 + tods/detection_algorithm/MatrixProfile.py | 420 ++ tods/detection_algorithm/PCAODetect.py | 364 + tods/detection_algorithm/PyodABOD.py | 207 + tods/detection_algorithm/PyodAE.py | 368 + tods/detection_algorithm/PyodCBLOF.py | 283 + tods/detection_algorithm/PyodCOF.py | 198 + tods/detection_algorithm/PyodHBOS.py | 216 + tods/detection_algorithm/PyodIsolationForest.py | 276 + tods/detection_algorithm/PyodKNN.py | 317 + tods/detection_algorithm/PyodLODA.py | 187 + tods/detection_algorithm/PyodLOF.py | 294 + tods/detection_algorithm/PyodMoGaal.py | 273 + tods/detection_algorithm/PyodOCSVM.py | 289 + tods/detection_algorithm/PyodSOD.py | 196 + tods/detection_algorithm/PyodSoGaal.py | 263 + tods/detection_algorithm/PyodVAE.py | 389 ++ tods/detection_algorithm/Telemanom.py | 473 ++ tods/detection_algorithm/UODBasePrimitive.py | 687 ++ tods/detection_algorithm/core/AutoRegOD.py | 171 + tods/detection_algorithm/core/CollectiveBase.py | 476 ++ .../core/CollectiveCommonTest.py | 169 + tods/detection_algorithm/core/KDiscord.py | 266 + tods/detection_algorithm/core/LSTMOD.py | 233 + tods/detection_algorithm/core/MultiAutoRegOD.py | 226 + tods/detection_algorithm/core/PCA.py | 264 + tods/detection_algorithm/core/UODCommonTest.py | 148 + .../core/algorithm_implementation.py | 0 .../core/test_CollectiveBase.py | 211 + tods/detection_algorithm/core/utility.py | 179 + tods/detection_algorithm/core/utils/channel.py | 114 + tods/detection_algorithm/core/utils/errors.py | 532 ++ tods/detection_algorithm/core/utils/modeling.py | 205 + tods/detection_algorithm/core/utils/utils.py | 0 tods/entry_points.ini | 81 + tods/feature_analysis/AutoCorrelation.py | 387 ++ tods/feature_analysis/BKFilter.py | 376 ++ tods/feature_analysis/DiscreteCosineTransform.py | 480 ++ tods/feature_analysis/FastFourierTransform.py | 470 ++ tods/feature_analysis/HPFilter.py | 353 + .../NonNegativeMatrixFactorization.py | 523 ++ tods/feature_analysis/SKTruncatedSVD.py | 510 ++ tods/feature_analysis/SpectralResidualTransform.py | 364 + tods/feature_analysis/StatisticalAbsEnergy.py | 331 + tods/feature_analysis/StatisticalAbsSum.py | 330 + tods/feature_analysis/StatisticalGmean.py | 337 + tods/feature_analysis/StatisticalHmean.py | 334 + tods/feature_analysis/StatisticalKurtosis.py | 331 + tods/feature_analysis/StatisticalMaximum.py | 330 + tods/feature_analysis/StatisticalMean.py | 330 + tods/feature_analysis/StatisticalMeanAbs.py | 330 + .../StatisticalMeanAbsTemporalDerivative.py | 330 + .../StatisticalMeanTemporalDerivative.py | 330 + tods/feature_analysis/StatisticalMedian.py | 330 + .../StatisticalMedianAbsoluteDeviation.py | 331 + tods/feature_analysis/StatisticalMinimum.py | 330 + tods/feature_analysis/StatisticalSkew.py | 331 + tods/feature_analysis/StatisticalStd.py | 330 + tods/feature_analysis/StatisticalVar.py | 330 + tods/feature_analysis/StatisticalVariation.py | 331 + tods/feature_analysis/StatisticalVecSum.py | 330 + .../StatisticalWillisonAmplitude.py | 342 + tods/feature_analysis/StatisticalZeroCrossing.py | 322 + tods/feature_analysis/TRMF.py | 746 +++ tods/feature_analysis/WaveletTransform.py | 557 ++ tods/feature_analysis/__init__.py | 0 tods/reinforcement/RuleBasedFilter.py | 348 + tods/requirements.txt | 31 + tods/run_tests.py | 11 + tods/setup.py | 45 + tods/tests/test_AutoRegODetect.py | 114 + tods/tests/test_BKFilter.py | 127 + tods/tests/test_CategoricalBinary.py | 146 + tods/tests/test_ColumnFilter.py | 106 + tods/tests/test_ContinuityValidation.py | 137 + tods/tests/test_DeepLog.py | 105 + tods/tests/test_DiscreteCosineTransform.py | 124 + tods/tests/test_DuplicationValidation.py | 112 + tods/tests/test_FastFourierTransform.py | 133 + tods/tests/test_HPFilter.py | 148 + tods/tests/test_HoltSmoothing.py | 71 + tods/tests/test_HoltWintersExponentialSmoothing.py | 71 + tods/tests/test_KDiscordODetect.py | 114 + tods/tests/test_LSTMODetector.py | 96 + tods/tests/test_MatrixProfile.py | 104 + tods/tests/test_MovingAverageTransform.py | 69 + tods/tests/test_NonNegativeMatrixFactorization.py | 188 + tods/tests/test_PCAODetect.py | 114 + tods/tests/test_PyodABOD.py | 136 + tods/tests/test_PyodAE.py | 104 + tods/tests/test_PyodCBLOF.py | 103 + tods/tests/test_PyodCOF.py | 105 + tods/tests/test_PyodHBOS.py | 135 + tods/tests/test_PyodIsolationForest.py | 104 + tods/tests/test_PyodKNN.py | 102 + tods/tests/test_PyodLODA.py | 103 + tods/tests/test_PyodLOF.py | 104 + tods/tests/test_PyodMoGaal.py | 103 + tods/tests/test_PyodOCSVM.py | 103 + tods/tests/test_PyodSOD.py | 102 + tods/tests/test_PyodSoGaal.py | 102 + tods/tests/test_PyodVAE.py | 104 + tods/tests/test_SKAxiswiseScaler.py | 157 + tods/tests/test_SKPowerTransformer.py | 110 + tods/tests/test_SKQuantileTransformer.py | 109 + tods/tests/test_SKStandardizer.py | 159 + tods/tests/test_SKTruncatedSVD.py | 126 + tods/tests/test_SimpleExponentialSmoothing.py | 69 + tods/tests/test_SpectralResidualTransform.py | 107 + tods/tests/test_StastiticalStd.py | 108 + tods/tests/test_StatisticalAbsEnergy.py | 108 + tods/tests/test_StatisticalAbsSum.py | 108 + tods/tests/test_StatisticalGmean.py | 109 + tods/tests/test_StatisticalHmean.py | 109 + tods/tests/test_StatisticalKurtosis.py | 109 + tods/tests/test_StatisticalMaximum.py | 108 + tods/tests/test_StatisticalMean.py | 108 + tods/tests/test_StatisticalMeanAbs.py | 108 + .../test_StatisticalMeanAbsTemporalDerivative.py | 108 + .../test_StatisticalMeanTemporalDerivative.py | 108 + tods/tests/test_StatisticalMedian.py | 108 + .../test_StatisticalMedianAbsoluteDeviation.py | 109 + tods/tests/test_StatisticalMinimum.py | 108 + tods/tests/test_StatisticalSkew.py | 109 + tods/tests/test_StatisticalVar.py | 108 + tods/tests/test_StatisticalVariation.py | 109 + tods/tests/test_StatisticalVecSum.py | 108 + tods/tests/test_StatisticalWillisonAmplitude.py | 109 + tods/tests/test_StatisticalZeroCrossing.py | 107 + tods/tests/test_TRMF.py | 122 + tods/tests/test_Telemanom.py | 120 + ...test_TimeSeriesSeasonalityTrendDecomposition.py | 114 + tods/tests/test_TimeStampValidation.py | 100 + tods/tests/test_WaveletTransformer.py | 135 + tods/tests/utils.py | 112 + tods/timeseries_processing/.HoltSmoothing.py.swo | Bin 0 -> 16384 bytes tods/timeseries_processing/HoltSmoothing.py | 340 + .../HoltWintersExponentialSmoothing.py | 338 + .../MovingAverageTransform.py | 341 + tods/timeseries_processing/SKAxiswiseScaler.py | 398 ++ tods/timeseries_processing/SKPowerTransformer.py | 500 ++ .../timeseries_processing/SKQuantileTransformer.py | 501 ++ tods/timeseries_processing/SKStandardScaler.py | 545 ++ .../SimpleExponentialSmoothing.py | 349 + .../TimeSeriesSeasonalityTrendDecomposition.py | 348 + tods/timeseries_processing/__init__.py | 0 tods/tods/__init__.py | 0 tods/tods/search/__init__.py | 1 + tods/tods/search/brute_force_search.py | 36 + 1067 files changed, 282897 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 axolotl/.gitignore create mode 100644 axolotl/.gitlab-ci.yml create mode 100644 axolotl/.gitmodules create mode 100644 axolotl/LICENSE create mode 100644 axolotl/README.md create mode 100644 axolotl/axolotl/__init__.py create mode 100644 axolotl/axolotl/algorithms/__init__.py create mode 100644 axolotl/axolotl/algorithms/autokeras_integration/__init__.py create mode 100644 axolotl/axolotl/algorithms/autokeras_integration/block.py create mode 100644 axolotl/axolotl/algorithms/autokeras_integration/constants.py create mode 100644 axolotl/axolotl/algorithms/autokeras_integration/mapping.py create mode 100644 axolotl/axolotl/algorithms/autokeras_integration/steps.py create mode 100644 axolotl/axolotl/algorithms/autokeras_search.py create mode 100644 axolotl/axolotl/algorithms/base.py create mode 100644 axolotl/axolotl/algorithms/bayesian_search.py create mode 100644 axolotl/axolotl/algorithms/data_driven_search.py create mode 100644 axolotl/axolotl/algorithms/dummy.py create mode 100644 axolotl/axolotl/algorithms/random_search.py create mode 100644 axolotl/axolotl/algorithms/tuners/__init__.py create mode 100644 axolotl/axolotl/algorithms/tuners/bayesian_oracle.py create mode 100644 axolotl/axolotl/algorithms/tuners/custom_hps.py create mode 100644 axolotl/axolotl/algorithms/tuners/hyperparameters.py create mode 100644 axolotl/axolotl/algorithms/tuners/oracle.py create mode 100644 axolotl/axolotl/algorithms/tuners/random_search_oracle.py create mode 100644 axolotl/axolotl/algorithms/tuners/tunable_base.py create mode 100644 axolotl/axolotl/backend/__init__.py create mode 100644 axolotl/axolotl/backend/base.py create mode 100644 axolotl/axolotl/backend/ray.py create mode 100644 axolotl/axolotl/backend/simple.py create mode 100644 axolotl/axolotl/d3m_grpc/__init__.py create mode 100644 axolotl/axolotl/d3m_grpc/constants.py create mode 100644 axolotl/axolotl/d3m_grpc/server.py create mode 100644 axolotl/axolotl/predefined_pipelines/__init__.py create mode 100644 axolotl/axolotl/predefined_pipelines/base_preprocessor.py create mode 100644 axolotl/axolotl/predefined_pipelines/preprocessor.py create mode 100644 axolotl/axolotl/utils/__init__.py create mode 100644 axolotl/axolotl/utils/data_problem.py create mode 100644 axolotl/axolotl/utils/pipeline.py create mode 100644 axolotl/axolotl/utils/resources.py create mode 100644 axolotl/axolotl/utils/resources/blocklist.json create mode 100644 axolotl/axolotl/utils/resources/default_pipelines.json create mode 100644 axolotl/axolotl/utils/resources/scoring_pipeline.yml create mode 100644 axolotl/axolotl/utils/resources/splitting_pipelines.json create mode 100644 axolotl/axolotl/utils/schemas.py create mode 100644 axolotl/examples/build_search_algorithm.ipynb create mode 100644 axolotl/examples/load_csv.ipynb create mode 100644 axolotl/examples/random_search/oracle.json create mode 100644 axolotl/examples/run.py create mode 100644 axolotl/examples/synthetic_data_bayesian_hp_tunning.ipynb.REMOVED.git-id create mode 100644 axolotl/failed_installation_repos.txt create mode 100755 axolotl/images/Devd3mStart.sh create mode 100644 axolotl/images/axolotl.dockerfile create mode 100755 axolotl/images/base.dockerfile create mode 100755 axolotl/images/build-images.sh create mode 100755 axolotl/run_tests.py create mode 100644 axolotl/setup.py create mode 100644 axolotl/tests/__init__.py create mode 100644 axolotl/tests/_server_test.py create mode 100644 axolotl/tests/data/.gitignore create mode 100644 axolotl/tests/data/.gitlab-ci.yml create mode 100644 axolotl/tests/data/README.md create mode 100755 axolotl/tests/data/add.sh create mode 100644 axolotl/tests/data/datasets/audio_dataset_1/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/audio_dataset_1/media/test_audio.mp3 create mode 100644 axolotl/tests/data/datasets/audio_dataset_1/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/boston_dataset_1/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/boston_dataset_1/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/database_dataset_1/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/database_dataset_1/tables/authors.csv create mode 100644 axolotl/tests/data/datasets/database_dataset_1/tables/codes.csv create mode 100644 axolotl/tests/data/datasets/database_dataset_1/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/database_dataset_1/tables/values.csv create mode 100644 axolotl/tests/data/datasets/database_dataset_2/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/database_dataset_2/tables/comments.csv create mode 100644 axolotl/tests/data/datasets/database_dataset_2/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/database_dataset_2/tables/posts.csv create mode 100644 axolotl/tests/data/datasets/database_dataset_2/tables/users.csv create mode 100644 axolotl/tests/data/datasets/database_dataset_3/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/database_dataset_3/tables/comments.csv create mode 100644 axolotl/tests/data/datasets/database_dataset_3/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/database_dataset_3/tables/posts.csv create mode 100644 axolotl/tests/data/datasets/database_dataset_3/tables/users.csv create mode 100644 axolotl/tests/data/datasets/database_dataset_4/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/database_dataset_4/tables/comments.csv create mode 100644 axolotl/tests/data/datasets/database_dataset_4/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/database_dataset_4/tables/posts.csv create mode 100644 axolotl/tests/data/datasets/database_dataset_4/tables/users.csv create mode 100644 axolotl/tests/data/datasets/graph_dataset_1/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/graph_dataset_1/graphs/G1.gml create mode 100644 axolotl/tests/data/datasets/graph_dataset_1/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/graph_dataset_2/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/graph_dataset_2/tables/edgeList.csv create mode 100644 axolotl/tests/data/datasets/graph_dataset_2/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/image_dataset_1/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/image_dataset_1/media/001_HandPhoto_left_01.jpg create mode 100644 axolotl/tests/data/datasets/image_dataset_1/media/cifar10_bird_1.png create mode 100644 axolotl/tests/data/datasets/image_dataset_1/media/cifar10_bird_2.png create mode 100644 axolotl/tests/data/datasets/image_dataset_1/media/mnist_0_2.png create mode 100644 axolotl/tests/data/datasets/image_dataset_1/media/mnist_1_1.png create mode 100644 axolotl/tests/data/datasets/image_dataset_1/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/image_dataset_2/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00000.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00001.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00002.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00003.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00004.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00005.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00006.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00007.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00008.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00009.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00010.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00011.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00012.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00013.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00014.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00015.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00016.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00017.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00018.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00019.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00020.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00021.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00022.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00023.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00024.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00025.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00026.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00027.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00028.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00029.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00030.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00031.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00032.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00033.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00034.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00035.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00036.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00037.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00038.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00039.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00040.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00041.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00042.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00043.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00044.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00045.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00046.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00047.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00048.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00049.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00050.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00051.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00052.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00053.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00054.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00055.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00056.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00057.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00058.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00059.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00060.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00061.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00062.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00063.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00064.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00065.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00066.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00067.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00068.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00069.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00070.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00071.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00072.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00073.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00074.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00075.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00076.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00077.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00078.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00079.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00080.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00081.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00082.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00083.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00084.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00085.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00086.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00087.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00088.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00089.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00090.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00091.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00092.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00093.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00094.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00095.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00096.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00097.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/media/img_00098.png create mode 100644 axolotl/tests/data/datasets/image_dataset_2/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/iris_dataset_1/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/iris_dataset_1/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/iris_dataset_2/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/iris_dataset_2/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/iris_dataset_3/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/iris_dataset_3/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/multivariate_dataset_1/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/multivariate_dataset_1/tables/gp_data_tables/train_data_934.csv create mode 100644 axolotl/tests/data/datasets/multivariate_dataset_1/tables/gp_data_tables/train_data_935.csv create mode 100644 axolotl/tests/data/datasets/multivariate_dataset_1/tables/gp_data_tables/train_data_936.csv create mode 100644 axolotl/tests/data/datasets/multivariate_dataset_1/tables/gp_data_tables/train_data_937.csv create mode 100644 axolotl/tests/data/datasets/multivariate_dataset_1/tables/gp_data_tables/train_data_938.csv create mode 100644 axolotl/tests/data/datasets/multivariate_dataset_1/tables/gp_data_tables/train_data_939.csv create mode 100644 axolotl/tests/data/datasets/multivariate_dataset_1/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/object_dataset_1/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/object_dataset_1/media/img_00225.png.REMOVED.git-id create mode 100644 axolotl/tests/data/datasets/object_dataset_1/media/img_00285.png.REMOVED.git-id create mode 100644 axolotl/tests/data/datasets/object_dataset_1/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/raw_dataset_1/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/raw_dataset_1/raw/complementaryData.csv create mode 100644 axolotl/tests/data/datasets/score_dataset_1/dataset_TEST/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/score_dataset_1/dataset_TEST/media/img_00225.png.REMOVED.git-id create mode 100644 axolotl/tests/data/datasets/score_dataset_1/dataset_TEST/media/img_00285.png.REMOVED.git-id create mode 100644 axolotl/tests/data/datasets/score_dataset_1/dataset_TEST/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/score_dataset_1/targets.csv create mode 100644 axolotl/tests/data/datasets/text_dataset_1/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/text_dataset_1/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/text_dataset_1/text/1.txt create mode 100644 axolotl/tests/data/datasets/text_dataset_1/text/2.txt create mode 100644 axolotl/tests/data/datasets/text_dataset_1/text/3.txt create mode 100644 axolotl/tests/data/datasets/text_dataset_1/text/4.txt create mode 100644 axolotl/tests/data/datasets/timeseries_dataset_1/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/timeseries_dataset_1/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/timeseries_dataset_2/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/timeseries_dataset_2/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/timeseries_dataset_2/timeseries/0000_train_ts.csv create mode 100644 axolotl/tests/data/datasets/timeseries_dataset_2/timeseries/0001_train_ts.csv create mode 100644 axolotl/tests/data/datasets/timeseries_dataset_2/timeseries/0002_train_ts.csv create mode 100644 axolotl/tests/data/datasets/timeseries_dataset_2/timeseries/0003_train_ts.csv create mode 100644 axolotl/tests/data/datasets/timeseries_dataset_2/timeseries/0004_train_ts.csv create mode 100644 axolotl/tests/data/datasets/timeseries_dataset_3/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/timeseries_dataset_3/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/timeseries_dataset_4/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/timeseries_dataset_4/tables/learningData.csv create mode 100644 axolotl/tests/data/datasets/video_dataset_1/datasetDoc.json create mode 100644 axolotl/tests/data/datasets/video_dataset_1/media/April_09_brush_hair_u_nm_np1_ba_goo_0.avi.mp4 create mode 100644 axolotl/tests/data/datasets/video_dataset_1/media/Jessica_and_Gregs_Cartwheel_Competition_cartwheel_f_cm_np1_ba_med_1.avi.mp4 create mode 100644 axolotl/tests/data/datasets/video_dataset_1/tables/learningData.csv create mode 100644 axolotl/tests/data/docker/summing/Dockerfile create mode 100644 axolotl/tests/data/docker/summing/README.md create mode 100755 axolotl/tests/data/docker/summing/code/server.py create mode 100755 axolotl/tests/data/docker/summing/etc/service/summing/run create mode 100644 axolotl/tests/data/docker/summing/requirements.txt create mode 100755 axolotl/tests/data/docker/summing/runsvdir-start create mode 100755 axolotl/tests/data/generate-database-datasets.py create mode 100644 axolotl/tests/data/pipelines/data-preparation-no-split.yml create mode 100644 axolotl/tests/data/pipelines/data-preparation-train-test-split.yml create mode 100644 axolotl/tests/data/pipelines/fake_compute_score.yml create mode 100644 axolotl/tests/data/pipelines/increment-dataframe.yml create mode 100644 axolotl/tests/data/pipelines/multi-input-test.json create mode 100644 axolotl/tests/data/pipelines/random-classifier.yml create mode 100644 axolotl/tests/data/pipelines/random-forest-classifier.yml create mode 100644 axolotl/tests/data/pipelines/random-sample.yml create mode 100644 axolotl/tests/data/pipelines/semi-standard-pipeline.json create mode 100644 axolotl/tests/data/primitives/setup.cfg create mode 100644 axolotl/tests/data/primitives/setup.py create mode 100644 axolotl/tests/data/primitives/test_primitives/__init__.py create mode 100644 axolotl/tests/data/primitives/test_primitives/abs_sum.py create mode 100644 axolotl/tests/data/primitives/test_primitives/container_hyperparam.py create mode 100644 axolotl/tests/data/primitives/test_primitives/data_hyperparam.py create mode 100644 axolotl/tests/data/primitives/test_primitives/fail.py create mode 100644 axolotl/tests/data/primitives/test_primitives/fake_score.py create mode 100644 axolotl/tests/data/primitives/test_primitives/file_reader.py create mode 100644 axolotl/tests/data/primitives/test_primitives/increment.py create mode 100644 axolotl/tests/data/primitives/test_primitives/monomial.py create mode 100644 axolotl/tests/data/primitives/test_primitives/multi_data_hyperparam.py create mode 100644 axolotl/tests/data/primitives/test_primitives/null.py create mode 100644 axolotl/tests/data/primitives/test_primitives/postgresql.py create mode 100644 axolotl/tests/data/primitives/test_primitives/primitive_hyperparam.py create mode 100644 axolotl/tests/data/primitives/test_primitives/primitive_sum.py create mode 100644 axolotl/tests/data/primitives/test_primitives/random.py create mode 100644 axolotl/tests/data/primitives/test_primitives/random_classifier.py create mode 100644 axolotl/tests/data/primitives/test_primitives/sum.py create mode 100644 axolotl/tests/data/problems/boston_problem_1/problemDoc.json create mode 100644 axolotl/tests/data/problems/boston_problem_2/problemDoc.json create mode 100644 axolotl/tests/data/problems/database_problem_2/problemDoc.json create mode 100644 axolotl/tests/data/problems/database_problem_3/problemDoc.json create mode 100644 axolotl/tests/data/problems/database_problem_4/problemDoc.json create mode 100644 axolotl/tests/data/problems/image_problem_2/problemDoc.json create mode 100644 axolotl/tests/data/problems/iris_problem_1/dataSplits.csv create mode 100644 axolotl/tests/data/problems/iris_problem_1/problemDoc.json create mode 100644 axolotl/tests/data/problems/iris_problem_2/problemDoc.json create mode 100644 axolotl/tests/data/problems/multi_dataset_problem/problemDoc.json create mode 100644 axolotl/tests/resources/logistic_regeression.json create mode 100644 axolotl/tests/resources/svc_pipeline.json create mode 100644 axolotl/tests/test_algorithms_dummy.py create mode 100644 axolotl/tests/test_autokeras.py create mode 100644 axolotl/tests/test_backend_ray.py create mode 100644 axolotl/tests/test_backend_simple.py create mode 100644 axolotl/tests/test_bayesian.py create mode 100644 axolotl/tests/test_predefine_pipelines.py create mode 100644 axolotl/tests/test_preprocessor.py create mode 100644 axolotl/tests/test_random_search.py create mode 100644 d3m/CODE_STYLE.md create mode 100644 d3m/HISTORY.md create mode 100644 d3m/HOW_TO_RELEASE.md create mode 100644 d3m/LICENSE.txt create mode 100644 d3m/MANIFEST.in create mode 100644 d3m/README.md create mode 100644 d3m/d3m/__init__.py create mode 100644 d3m/d3m/__main__.py create mode 100644 d3m/d3m/base/__init__.py create mode 100644 d3m/d3m/base/primitives.py create mode 100644 d3m/d3m/base/utils.py create mode 100644 d3m/d3m/cli.py create mode 100644 d3m/d3m/container/__init__.py create mode 100644 d3m/d3m/container/dataset.py create mode 100644 d3m/d3m/container/list.py create mode 100644 d3m/d3m/container/numpy.py create mode 100644 d3m/d3m/container/pandas.py create mode 100644 d3m/d3m/container/utils.py create mode 100644 d3m/d3m/contrib/__init__.py create mode 100644 d3m/d3m/contrib/pipelines/f596cd77-25f8-4d4c-a350-bb30ab1e58f6.yml create mode 100644 d3m/d3m/contrib/primitives/__init__.py create mode 100644 d3m/d3m/contrib/primitives/compute_scores.py create mode 100644 d3m/d3m/deprecate.py create mode 100644 d3m/d3m/environment_variables.py create mode 100644 d3m/d3m/exceptions.py create mode 100644 d3m/d3m/index.py create mode 100644 d3m/d3m/metadata/__init__.py create mode 100644 d3m/d3m/metadata/base.py create mode 100644 d3m/d3m/metadata/hyperparams.py create mode 100644 d3m/d3m/metadata/params.py create mode 100644 d3m/d3m/metadata/pipeline.py create mode 100644 d3m/d3m/metadata/pipeline_run.py create mode 100644 d3m/d3m/metadata/primitive_names.py create mode 100644 d3m/d3m/metadata/problem.py create mode 100644 d3m/d3m/metadata/schemas/v0/container.json create mode 100644 d3m/d3m/metadata/schemas/v0/data.json create mode 100644 d3m/d3m/metadata/schemas/v0/definitions.json create mode 100644 d3m/d3m/metadata/schemas/v0/pipeline.json create mode 100644 d3m/d3m/metadata/schemas/v0/pipeline_run.json create mode 100644 d3m/d3m/metadata/schemas/v0/primitive.json create mode 100644 d3m/d3m/metadata/schemas/v0/problem.json create mode 100644 d3m/d3m/metrics.py create mode 100644 d3m/d3m/namespace.py create mode 100644 d3m/d3m/primitive_interfaces/__init__.py create mode 100644 d3m/d3m/primitive_interfaces/base.py create mode 100644 d3m/d3m/primitive_interfaces/clustering.py create mode 100644 d3m/d3m/primitive_interfaces/distance.py create mode 100644 d3m/d3m/primitive_interfaces/featurization.py create mode 100644 d3m/d3m/primitive_interfaces/generator.py create mode 100644 d3m/d3m/primitive_interfaces/supervised_learning.py create mode 100644 d3m/d3m/primitive_interfaces/transformer.py create mode 100644 d3m/d3m/primitive_interfaces/unsupervised_learning.py create mode 100644 d3m/d3m/runtime.py create mode 100644 d3m/d3m/types.py create mode 100644 d3m/d3m/utils.py create mode 100644 d3m/docs/_static/custom.css create mode 100644 d3m/docs/_templates/toc.html create mode 100644 d3m/docs/_templates/versions.html create mode 100644 d3m/docs/about.rst create mode 100644 d3m/docs/conf.py create mode 100644 d3m/docs/discovery.rst create mode 100644 d3m/docs/index.rst create mode 100644 d3m/docs/installation.rst create mode 100644 d3m/docs/interfaces.rst create mode 100644 d3m/docs/metadata.rst create mode 100644 d3m/docs/pipeline.rst create mode 100644 d3m/docs/primitive-checklist.rst create mode 100644 d3m/docs/primitives_base_classes.rst create mode 100644 d3m/docs/quickstart.rst create mode 100644 d3m/docs/reference.rst create mode 100644 d3m/docs/repostructure.rst create mode 100644 d3m/docs/tutorial.rst create mode 100644 d3m/entry_points.ini create mode 100755 d3m/oldest_dependencies.py create mode 100755 d3m/run_benchmarks.sh create mode 100755 d3m/run_tests.py create mode 100644 d3m/setup.cfg create mode 100644 d3m/setup.py create mode 100644 d3m/site/.gitignore create mode 100644 d3m/site/Makefile create mode 100755 d3m/site/build_site.sh create mode 100644 d3m/site/build_site_types.py create mode 100644 d3m/site/client.js create mode 100644 d3m/site/client.less create mode 100644 d3m/site/html_construction.js create mode 100644 d3m/site/package-lock.json create mode 100644 d3m/site/package.json create mode 100644 d3m/site/requirements.txt create mode 100644 d3m/site/schema-org.css create mode 100644 d3m/site/static/index.html create mode 120000 d3m/site/static/schemas create mode 100644 d3m/tests/asv.conf.json create mode 100644 d3m/tests/benchmarks/__init__.py create mode 100644 d3m/tests/benchmarks/base_utils.py create mode 100644 d3m/tests/benchmarks/containers.py create mode 100644 d3m/tests/benchmarks/metadata.py create mode 100644 d3m/tests/benchmarks/primitive.py create mode 100644 d3m/tests/benchmarks/sampling.py create mode 100644 d3m/tests/test_base_utils.py create mode 100644 d3m/tests/test_cli_runtime.py create mode 100644 d3m/tests/test_compute_scores.py create mode 100644 d3m/tests/test_container_metadata.py create mode 100644 d3m/tests/test_containers.py create mode 100644 d3m/tests/test_dataset.py create mode 100644 d3m/tests/test_file_reader.py create mode 100644 d3m/tests/test_hyperparams.py create mode 100644 d3m/tests/test_increment.py create mode 100644 d3m/tests/test_index.py create mode 100644 d3m/tests/test_metadata.py create mode 100644 d3m/tests/test_metrics.py create mode 100644 d3m/tests/test_monomial.py create mode 100644 d3m/tests/test_null.py create mode 100644 d3m/tests/test_params.py create mode 100644 d3m/tests/test_pipeline.py create mode 100644 d3m/tests/test_pipeline_run.py create mode 100644 d3m/tests/test_plasma.py create mode 100644 d3m/tests/test_primitive_metadata.py create mode 100644 d3m/tests/test_primitive_sum.py create mode 100644 d3m/tests/test_primitive_validation.py create mode 100644 d3m/tests/test_problem.py create mode 100644 d3m/tests/test_random.py create mode 100644 d3m/tests/test_runtime.py create mode 100644 d3m/tests/test_split.py create mode 100644 d3m/tests/test_sum.py create mode 100644 d3m/tests/test_utils.py create mode 100644 datasets/anomaly/kpi/SCORE/dataset_TEST/datasetDoc.json create mode 100644 datasets/anomaly/kpi/SCORE/dataset_TEST/tables/learningData.csv create mode 100644 datasets/anomaly/kpi/SCORE/problem_TEST/dataSplits.csv create mode 100644 datasets/anomaly/kpi/SCORE/problem_TEST/problemDoc.json create mode 100644 datasets/anomaly/kpi/SCORE/targets.csv create mode 100644 datasets/anomaly/kpi/TEST/dataset_TEST/datasetDoc.json create mode 100644 datasets/anomaly/kpi/TEST/dataset_TEST/tables/learningData.csv create mode 100644 datasets/anomaly/kpi/TEST/problem_TEST/dataSplits.csv create mode 100644 datasets/anomaly/kpi/TEST/problem_TEST/problemDoc.json create mode 100644 datasets/anomaly/kpi/TRAIN/dataset_TRAIN/datasetDoc.json create mode 100644 datasets/anomaly/kpi/TRAIN/dataset_TRAIN/tables/learningData.csv.REMOVED.git-id create mode 100644 datasets/anomaly/kpi/TRAIN/problem_TRAIN/dataSplits.csv create mode 100644 datasets/anomaly/kpi/TRAIN/problem_TRAIN/problemDoc.json create mode 100644 datasets/anomaly/kpi/kpi_dataset/datasetDoc.json create mode 100644 datasets/anomaly/kpi/kpi_dataset/tables/learningData.csv.REMOVED.git-id create mode 100644 datasets/anomaly/kpi/kpi_problem/dataSplits.csv.REMOVED.git-id create mode 100644 datasets/anomaly/kpi/kpi_problem/problemDoc.json create mode 100644 datasets/anomaly/raw_data/kpi.csv.REMOVED.git-id create mode 100644 datasets/anomaly/raw_data/yahoo_sub_5.csv create mode 100644 datasets/anomaly/template/datasetDoc.json create mode 100644 datasets/anomaly/template/problemDoc.json create mode 100644 datasets/anomaly/transform_kpi.py create mode 100644 datasets/anomaly/transform_yahoo.py create mode 100644 datasets/anomaly/yahoo_sub_5/SCORE/dataset_TEST/datasetDoc.json create mode 100644 datasets/anomaly/yahoo_sub_5/SCORE/dataset_TEST/tables/learningData.csv create mode 100644 datasets/anomaly/yahoo_sub_5/SCORE/problem_TEST/dataSplits.csv create mode 100644 datasets/anomaly/yahoo_sub_5/SCORE/problem_TEST/problemDoc.json create mode 100644 datasets/anomaly/yahoo_sub_5/SCORE/targets.csv create mode 100644 datasets/anomaly/yahoo_sub_5/TEST/dataset_TEST/datasetDoc.json create mode 100644 datasets/anomaly/yahoo_sub_5/TEST/dataset_TEST/tables/learningData.csv create mode 100644 datasets/anomaly/yahoo_sub_5/TEST/problem_TEST/dataSplits.csv create mode 100644 datasets/anomaly/yahoo_sub_5/TEST/problem_TEST/problemDoc.json create mode 100644 datasets/anomaly/yahoo_sub_5/TRAIN/dataset_TRAIN/datasetDoc.json create mode 100644 datasets/anomaly/yahoo_sub_5/TRAIN/dataset_TRAIN/tables/learningData.csv create mode 100644 datasets/anomaly/yahoo_sub_5/TRAIN/problem_TRAIN/dataSplits.csv create mode 100644 datasets/anomaly/yahoo_sub_5/TRAIN/problem_TRAIN/problemDoc.json create mode 100644 datasets/anomaly/yahoo_sub_5/yahoo_sub_5_dataset/datasetDoc.json create mode 100644 datasets/anomaly/yahoo_sub_5/yahoo_sub_5_dataset/tables/learningData.csv create mode 100644 datasets/anomaly/yahoo_sub_5/yahoo_sub_5_problem/dataSplits.csv create mode 100644 datasets/anomaly/yahoo_sub_5/yahoo_sub_5_problem/problemDoc.json create mode 100644 datasets/anomaly_reserve/kpi/SCORE/dataset_TEST/datasetDoc.json create mode 100644 datasets/anomaly_reserve/kpi/SCORE/dataset_TEST/tables/learningData.csv create mode 100644 datasets/anomaly_reserve/kpi/SCORE/problem_TEST/dataSplits.csv create mode 100644 datasets/anomaly_reserve/kpi/SCORE/problem_TEST/problemDoc.json create mode 100644 datasets/anomaly_reserve/kpi/SCORE/targets.csv create mode 100644 datasets/anomaly_reserve/kpi/TEST/dataset_TEST/datasetDoc.json create mode 100644 datasets/anomaly_reserve/kpi/TEST/dataset_TEST/tables/learningData.csv create mode 100644 datasets/anomaly_reserve/kpi/TEST/problem_TEST/dataSplits.csv create mode 100644 datasets/anomaly_reserve/kpi/TEST/problem_TEST/problemDoc.json create mode 100644 datasets/anomaly_reserve/kpi/TRAIN/dataset_TRAIN/datasetDoc.json create mode 100644 datasets/anomaly_reserve/kpi/TRAIN/dataset_TRAIN/tables/learningData.csv.REMOVED.git-id create mode 100644 datasets/anomaly_reserve/kpi/TRAIN/problem_TRAIN/dataSplits.csv create mode 100644 datasets/anomaly_reserve/kpi/TRAIN/problem_TRAIN/problemDoc.json create mode 100644 datasets/anomaly_reserve/kpi/kpi_dataset/datasetDoc.json create mode 100644 datasets/anomaly_reserve/kpi/kpi_dataset/tables/learningData.csv.REMOVED.git-id create mode 100644 datasets/anomaly_reserve/kpi/kpi_problem/dataSplits.csv.REMOVED.git-id create mode 100644 datasets/anomaly_reserve/kpi/kpi_problem/problemDoc.json create mode 100644 datasets/anomaly_reserve/raw_data/kpi.csv.REMOVED.git-id create mode 100644 datasets/anomaly_reserve/template/datasetDoc.json create mode 100644 datasets/anomaly_reserve/template/problemDoc.json create mode 100644 datasets/anomaly_reserve/transform.py create mode 100644 datasets/anomaly_reserve/yahoo_sub_5/SCORE/dataset_TEST/datasetDoc.json create mode 100644 datasets/anomaly_reserve/yahoo_sub_5/SCORE/dataset_TEST/tables/learningData.csv create mode 100644 datasets/anomaly_reserve/yahoo_sub_5/SCORE/problem_TEST/dataSplits.csv create mode 100644 datasets/anomaly_reserve/yahoo_sub_5/SCORE/problem_TEST/problemDoc.json create mode 100644 datasets/anomaly_reserve/yahoo_sub_5/SCORE/targets.csv create mode 100644 datasets/anomaly_reserve/yahoo_sub_5/TEST/dataset_TEST/datasetDoc.json create mode 100644 datasets/anomaly_reserve/yahoo_sub_5/TEST/dataset_TEST/tables/learningData.csv create mode 100644 datasets/anomaly_reserve/yahoo_sub_5/TEST/problem_TEST/dataSplits.csv create mode 100644 datasets/anomaly_reserve/yahoo_sub_5/TEST/problem_TEST/problemDoc.json create mode 100644 datasets/anomaly_reserve/yahoo_sub_5/TRAIN/dataset_TRAIN/datasetDoc.json create mode 100644 datasets/anomaly_reserve/yahoo_sub_5/TRAIN/dataset_TRAIN/tables/learningData.csv create mode 100644 datasets/anomaly_reserve/yahoo_sub_5/TRAIN/problem_TRAIN/dataSplits.csv create mode 100644 datasets/anomaly_reserve/yahoo_sub_5/TRAIN/problem_TRAIN/problemDoc.json create mode 100644 datasets/anomaly_reserve/yahoo_sub_5/yahoo_sub_5_dataset/datasetDoc.json create mode 100644 datasets/anomaly_reserve/yahoo_sub_5/yahoo_sub_5_dataset/tables/learningData.csv create mode 100644 datasets/anomaly_reserve/yahoo_sub_5/yahoo_sub_5_problem/dataSplits.csv create mode 100644 datasets/anomaly_reserve/yahoo_sub_5/yahoo_sub_5_problem/problemDoc.json create mode 100644 datasets/data-supply/README.md create mode 100644 datasets/data-supply/documentation/README.md create mode 100644 datasets/data-supply/documentation/code/consolidated-new-metrics.ipynb create mode 100644 datasets/data-supply/documentation/code/d3m_eval.py create mode 100644 datasets/data-supply/documentation/datasetSchema.md create mode 100644 datasets/data-supply/documentation/examples/image.datasetDoc.json create mode 100644 datasets/data-supply/documentation/examples/iris.datasetDoc.json create mode 100644 datasets/data-supply/documentation/examples/multitable.datasetDoc.json create mode 100644 datasets/data-supply/documentation/minimalMetadata.md create mode 100644 datasets/data-supply/documentation/overview.md create mode 100644 datasets/data-supply/documentation/problemSchema.md create mode 100644 datasets/data-supply/documentation/standardValues.json create mode 100644 datasets/data-supply/documentation/static/Drawing1.vsdx create mode 100644 datasets/data-supply/documentation/static/allViews.PNG create mode 100644 datasets/data-supply/documentation/static/examples.txt create mode 100644 datasets/data-supply/documentation/static/objDetection_scoring_GT.PNG create mode 100644 datasets/data-supply/documentation/static/objDetection_scoring_PRED.PNG create mode 100644 datasets/data-supply/documentation/static/sampleDataSplitsFile.PNG create mode 100644 datasets/data-supply/documentation/static/sampleDataset.PNG create mode 100644 datasets/data-supply/documentation/static/sampleProblem.PNG create mode 100644 datasets/data-supply/documentation/static/sampleProblemTestView.PNG create mode 100644 datasets/data-supply/documentation/static/sampleProblemTrainView.PNG create mode 100644 datasets/data-supply/documentation/static/sampleProblem_objectDetection.PNG create mode 100644 datasets/data-supply/documentation/static/sampleSupply.PNG create mode 100644 datasets/data-supply/documentation/static/sampleTestView.PNG create mode 100644 datasets/data-supply/documentation/static/sampleTrainView.PNG create mode 100644 datasets/data-supply/documentation/static/schema fields spreadsheet.xlsx create mode 100644 datasets/data-supply/documentation/static/testView.PNG create mode 100644 datasets/data-supply/documentation/static/trainView.PNG create mode 100644 datasets/data-supply/documentation/supportedResourceTypesFormats.json create mode 100644 datasets/data-supply/schemas/README.md create mode 100644 datasets/data-supply/schemas/datasetSchema.json create mode 100644 datasets/data-supply/schemas/problemSchema.json create mode 100644 datasets/validate.py create mode 100644 examples/build_AutoEncoder_pipeline.py create mode 100644 examples/build_IsolationForest_pipline.py create mode 100644 examples/build_LODA_pipline.py create mode 100644 examples/run_automl.py create mode 100644 examples/run_certain_pipeline.py create mode 100644 examples/run_predefined_pipeline.py create mode 100644 examples/test_axolotl.py create mode 100644 install.sh create mode 100644 requirements.txt create mode 100644 test.sh create mode 100644 tested_file.txt create mode 100644 tests/build_ABOD_pipline.py create mode 100644 tests/build_AutoEncoder.py create mode 100644 tests/build_AutoRegODetect_pipeline.py create mode 100644 tests/build_AxiswiseScale_pipline.py create mode 100644 tests/build_BKFilter_pipline.py create mode 100644 tests/build_CBLOF_pipline.py create mode 100644 tests/build_CategoricalToBinary.py create mode 100644 tests/build_ColumnFilter_pipeline.py create mode 100644 tests/build_ContinuityValidation_pipline.py create mode 100644 tests/build_DeepLog_pipeline.py create mode 100644 tests/build_DiscreteCosineTransform.py create mode 100644 tests/build_DuplicationValidation_pipline.py create mode 100644 tests/build_FastFourierTransform.py create mode 100644 tests/build_HBOS_pipline.py create mode 100644 tests/build_HBOS_score_pipline.py create mode 100644 tests/build_HPFilter_pipline.py create mode 100644 tests/build_HoltSmoothing_pipline.py create mode 100644 tests/build_HoltWintersExponentialSmoothing_pipline.py create mode 100644 tests/build_IsolationForest_pipline.py create mode 100644 tests/build_KDiscord_pipeline.py create mode 100644 tests/build_KNN_pipline.py create mode 100644 tests/build_LODA_pipline.py create mode 100644 tests/build_LOF_pipline.py create mode 100644 tests/build_LSTMOD_pipline.py create mode 100644 tests/build_MatrixProfile_pipeline.py create mode 100644 tests/build_MeanAverageTransform_pipline.py create mode 100644 tests/build_NonNegativeMatrixFactorization.py create mode 100644 tests/build_OCSVM_pipline.py create mode 100644 tests/build_PCAODetect_pipeline.py create mode 100644 tests/build_PowerTransform_pipline.py create mode 100644 tests/build_PyodCOF.py create mode 100644 tests/build_QuantileTransform_pipline.py create mode 100644 tests/build_RuleBasedFilter_pipline.py create mode 100644 tests/build_SOD_pipeline.py create mode 100644 tests/build_SimpleExponentialSmoothing_pipline.py create mode 100644 tests/build_Standardize_pipline.py create mode 100644 tests/build_TRMF_pipline.py create mode 100644 tests/build_Telemanom.py create mode 100644 tests/build_TimeIntervalTransform_pipeline.py create mode 100644 tests/build_TruncatedSVD_pipline.py create mode 100644 tests/build_VariationalAutoEncoder.py create mode 100644 tests/build_WaveletTransform_pipline.py create mode 100644 tests/build_test_detection_algorithm_PyodMoGaal.py create mode 100644 tests/build_test_detection_algorithm_PyodSoGaal.py create mode 100644 tests/build_test_feature_analysis_spectral_residual_transform_pipeline.py create mode 100644 tests/build_test_feature_analysis_statistical_abs_energy.py create mode 100644 tests/build_test_feature_analysis_statistical_abs_sum.py create mode 100644 tests/build_test_feature_analysis_statistical_gmean.py create mode 100644 tests/build_test_feature_analysis_statistical_hmean.py create mode 100644 tests/build_test_feature_analysis_statistical_kurtosis.py create mode 100644 tests/build_test_feature_analysis_statistical_maximum.py create mode 100644 tests/build_test_feature_analysis_statistical_mean.py create mode 100644 tests/build_test_feature_analysis_statistical_mean_abs.py create mode 100644 tests/build_test_feature_analysis_statistical_mean_abs_temporal_derivative.py create mode 100644 tests/build_test_feature_analysis_statistical_mean_temporal_derivative.py create mode 100644 tests/build_test_feature_analysis_statistical_median.py create mode 100644 tests/build_test_feature_analysis_statistical_median_absolute_deviation.py create mode 100644 tests/build_test_feature_analysis_statistical_minimum.py create mode 100644 tests/build_test_feature_analysis_statistical_skew.py create mode 100644 tests/build_test_feature_analysis_statistical_std.py create mode 100644 tests/build_test_feature_analysis_statistical_var.py create mode 100644 tests/build_test_feature_analysis_statistical_variation.py create mode 100644 tests/build_test_feature_analysis_statistical_vec_sum.py create mode 100644 tests/build_test_feature_analysis_statistical_willison_amplitude.py create mode 100644 tests/build_test_feature_analysis_statistical_zero_crossing.py create mode 100644 tests/build_test_time_series_seasonality_trend_decomposition.py create mode 100644 tods/.gitignore create mode 100644 tods/__init__.py create mode 100644 tods/common-primitives/HISTORY.md create mode 100644 tods/common-primitives/HOW_TO_MANAGE.md create mode 100644 tods/common-primitives/LICENSE.txt create mode 100644 tods/common-primitives/MANIFEST.in create mode 100644 tods/common-primitives/README.md create mode 100755 tods/common-primitives/add.sh create mode 100644 tods/common-primitives/common_primitives/__init__.py create mode 100644 tods/common-primitives/common_primitives/add_semantic_types.py create mode 100644 tods/common-primitives/common_primitives/audio_reader.py create mode 100644 tods/common-primitives/common_primitives/base.py create mode 100644 tods/common-primitives/common_primitives/cast_to_type.py create mode 100644 tods/common-primitives/common_primitives/column_map.py create mode 100644 tods/common-primitives/common_primitives/column_parser.py create mode 100644 tods/common-primitives/common_primitives/compute_metafeatures.py create mode 100644 tods/common-primitives/common_primitives/construct_predictions.py create mode 100644 tods/common-primitives/common_primitives/csv_reader.py create mode 100644 tods/common-primitives/common_primitives/cut_audio.py create mode 100644 tods/common-primitives/common_primitives/dataframe_flatten.py create mode 100644 tods/common-primitives/common_primitives/dataframe_image_reader.py create mode 100644 tods/common-primitives/common_primitives/dataframe_to_list.py create mode 100644 tods/common-primitives/common_primitives/dataframe_to_ndarray.py create mode 100644 tods/common-primitives/common_primitives/dataframe_utils.py create mode 100644 tods/common-primitives/common_primitives/datamart_augment.py create mode 100644 tods/common-primitives/common_primitives/datamart_download.py create mode 100644 tods/common-primitives/common_primitives/dataset_map.py create mode 100644 tods/common-primitives/common_primitives/dataset_sample.py create mode 100644 tods/common-primitives/common_primitives/dataset_to_dataframe.py create mode 100644 tods/common-primitives/common_primitives/dataset_utils.py create mode 100644 tods/common-primitives/common_primitives/datetime_field_compose.py create mode 100644 tods/common-primitives/common_primitives/datetime_range_filter.py create mode 100644 tods/common-primitives/common_primitives/denormalize.py create mode 100644 tods/common-primitives/common_primitives/extract_columns.py create mode 100644 tods/common-primitives/common_primitives/extract_columns_semantic_types.py create mode 100644 tods/common-primitives/common_primitives/extract_columns_structural_types.py create mode 100644 tods/common-primitives/common_primitives/fixed_split.py create mode 100644 tods/common-primitives/common_primitives/grouping_field_compose.py create mode 100644 tods/common-primitives/common_primitives/holt_smoothing.py create mode 100644 tods/common-primitives/common_primitives/holt_winters_exponential_smoothing.py create mode 100644 tods/common-primitives/common_primitives/horizontal_concat.py create mode 100644 tods/common-primitives/common_primitives/kfold_split.py create mode 100644 tods/common-primitives/common_primitives/kfold_split_timeseries.py create mode 100644 tods/common-primitives/common_primitives/lgbm_classifier.py create mode 100644 tods/common-primitives/common_primitives/list_to_dataframe.py create mode 100644 tods/common-primitives/common_primitives/list_to_ndarray.py create mode 100644 tods/common-primitives/common_primitives/mean_average_transform.py create mode 100644 tods/common-primitives/common_primitives/ndarray_to_dataframe.py create mode 100644 tods/common-primitives/common_primitives/ndarray_to_list.py create mode 100644 tods/common-primitives/common_primitives/no_split.py create mode 100644 tods/common-primitives/common_primitives/normalize_column_references.py create mode 100644 tods/common-primitives/common_primitives/normalize_graphs.py create mode 100644 tods/common-primitives/common_primitives/numeric_range_filter.py create mode 100644 tods/common-primitives/common_primitives/one_hot_maker.py create mode 100644 tods/common-primitives/common_primitives/pandas_onehot_encoder.py create mode 100644 tods/common-primitives/common_primitives/random_forest.py create mode 100644 tods/common-primitives/common_primitives/ravel.py create mode 100644 tods/common-primitives/common_primitives/redact_columns.py create mode 100644 tods/common-primitives/common_primitives/regex_filter.py create mode 100644 tods/common-primitives/common_primitives/remove_columns.py create mode 100644 tods/common-primitives/common_primitives/remove_duplicate_columns.py create mode 100644 tods/common-primitives/common_primitives/remove_semantic_types.py create mode 100644 tods/common-primitives/common_primitives/rename_duplicate_columns.py create mode 100644 tods/common-primitives/common_primitives/replace_semantic_types.py create mode 100644 tods/common-primitives/common_primitives/simple_exponential_smoothing.py create mode 100644 tods/common-primitives/common_primitives/simple_profiler.py create mode 100644 tods/common-primitives/common_primitives/slacker/README.md create mode 100644 tods/common-primitives/common_primitives/slacker/__init__.py create mode 100644 tods/common-primitives/common_primitives/slacker/base.py create mode 100644 tods/common-primitives/common_primitives/slacker/estimation.py create mode 100644 tods/common-primitives/common_primitives/slacker/feature_extraction.py create mode 100644 tods/common-primitives/common_primitives/slacker/feature_selection.py create mode 100644 tods/common-primitives/common_primitives/stack_ndarray_column.py create mode 100644 tods/common-primitives/common_primitives/tabular_extractor.py create mode 100644 tods/common-primitives/common_primitives/term_filter.py create mode 100644 tods/common-primitives/common_primitives/text_reader.py create mode 100644 tods/common-primitives/common_primitives/train_score_split.py create mode 100644 tods/common-primitives/common_primitives/unseen_label_decoder.py create mode 100644 tods/common-primitives/common_primitives/unseen_label_encoder.py create mode 100644 tods/common-primitives/common_primitives/utils.py create mode 100644 tods/common-primitives/common_primitives/video_reader.py create mode 100644 tods/common-primitives/common_primitives/xgboost_dart.py create mode 100644 tods/common-primitives/common_primitives/xgboost_gbtree.py create mode 100644 tods/common-primitives/common_primitives/xgboost_regressor.py create mode 100644 tods/common-primitives/entry_points.ini create mode 100755 tods/common-primitives/git-add.sh create mode 100755 tods/common-primitives/git-check.sh create mode 100755 tods/common-primitives/list_primitives.py create mode 100644 tods/common-primitives/pipeline_runs/classification.light_gbm.DataFrameCommon/1.yaml.gz create mode 100644 tods/common-primitives/pipeline_runs/classification.random_forest.DataFrameCommon/1.yaml.gz create mode 120000 tods/common-primitives/pipeline_runs/classification.random_forest.DataFrameCommon/pipeline_run_extract_structural_types.yml.gz create mode 100644 tods/common-primitives/pipeline_runs/classification.xgboost_dart.DataFrameCommon/1.yaml.gz create mode 100644 tods/common-primitives/pipeline_runs/classification.xgboost_gbtree.DataFrameCommon/1.yaml.gz create mode 100644 tods/common-primitives/pipeline_runs/data_augmentation.datamart_augmentation.Common/2.yaml.gz create mode 100644 tods/common-primitives/pipeline_runs/data_preprocessing.dataset_sample.Common/1.yaml.gz create mode 120000 tods/common-primitives/pipeline_runs/data_preprocessing.one_hot_encoder.PandasCommon/pipeline_run_extract_structural_types.yml.gz create mode 120000 tods/common-primitives/pipeline_runs/data_transformation.column_parser.DataFrameCommon/1.yaml.gz create mode 120000 tods/common-primitives/pipeline_runs/data_transformation.column_parser.DataFrameCommon/pipeline_run_extract_structural_types.yml.gz create mode 120000 tods/common-primitives/pipeline_runs/data_transformation.column_parser.DataFrameCommon/pipeline_run_group_field_compose.yml.gz create mode 120000 tods/common-primitives/pipeline_runs/data_transformation.construct_predictions.DataFrameCommon/1.yaml.gz create mode 120000 tods/common-primitives/pipeline_runs/data_transformation.construct_predictions.DataFrameCommon/pipeline_run_extract_structural_types.yml.gz create mode 120000 tods/common-primitives/pipeline_runs/data_transformation.dataset_to_dataframe.Common/1.yaml.gz create mode 120000 tods/common-primitives/pipeline_runs/data_transformation.dataset_to_dataframe.Common/pipeline_run_extract_structural_types.yml.gz create mode 120000 tods/common-primitives/pipeline_runs/data_transformation.dataset_to_dataframe.Common/pipeline_run_group_field_compose.yml.gz create mode 120000 tods/common-primitives/pipeline_runs/data_transformation.extract_columns_by_semantic_types.DataFrameCommon/1.yaml.gz create mode 120000 tods/common-primitives/pipeline_runs/data_transformation.extract_columns_by_semantic_types.DataFrameCommon/pipeline_run_extract_structural_types.yml.gz create mode 100644 tods/common-primitives/pipeline_runs/data_transformation.extract_columns_by_structural_types.Common/pipeline_run.yml.gz create mode 100644 tods/common-primitives/pipeline_runs/data_transformation.grouping_field_compose.Common/pipeline_run.yml.gz create mode 120000 tods/common-primitives/pipeline_runs/data_transformation.horizontal_concat.DataFrameConcat/1.yaml.gz create mode 120000 tods/common-primitives/pipeline_runs/data_transformation.remove_columns.Common/pipeline_run_extract_structural_types.yml.gz create mode 100644 tods/common-primitives/pipeline_runs/regression.xgboost_gbtree.DataFrameCommon/1.yml create mode 120000 tods/common-primitives/pipeline_runs/schema_discovery.profiler.Common/pipeline_run_extract_structural_types.yml.gz create mode 120000 tods/common-primitives/pipeline_runs/schema_discovery.profiler.Common/pipeline_run_group_field_compose.yml.gz create mode 100644 tods/common-primitives/pipelines/classification.light_gbm.DataFrameCommon/d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json create mode 120000 tods/common-primitives/pipelines/classification.random_forest.DataFrameCommon/b523335c-0c47-4d02-a582-f69609cde1e8.json create mode 100644 tods/common-primitives/pipelines/classification.random_forest.DataFrameCommon/ccad0f9c-130e-4063-a91e-ea65a18cb041.yaml create mode 100644 tods/common-primitives/pipelines/classification.xgboost_dart.DataFrameCommon/b7a24816-2518-4073-9c45-b97f2b2fee30.json create mode 100644 tods/common-primitives/pipelines/classification.xgboost_gbtree.DataFrameCommon/4d402450-2562-48cc-93fd-719fb658c43c.json create mode 100644 tods/common-primitives/pipelines/data_augmentation.datamart_augmentation.Common/3afd2bd2-7ba1-4ac1-928f-fad0c39a05e5.json create mode 100644 tods/common-primitives/pipelines/data_augmentation.datamart_augmentation.Common/4ff2f21d-1bba-4c44-bb96-e05728bcf6ed.json create mode 100644 tods/common-primitives/pipelines/data_preprocessing.dataset_sample.Common/387d432a-9893-4558-b190-1c5e9e399dbf.yaml create mode 100644 tods/common-primitives/pipelines/data_preprocessing.one_hot_encoder.MakerCommon/2b307634-f01e-412e-8d95-7e54afd4731f.json create mode 120000 tods/common-primitives/pipelines/data_preprocessing.one_hot_encoder.PandasCommon/b523335c-0c47-4d02-a582-f69609cde1e8.json create mode 120000 tods/common-primitives/pipelines/data_transformation.column_parser.DataFrameCommon/4ec215d1-6484-4502-a6dd-f659943ccb94.json create mode 120000 tods/common-primitives/pipelines/data_transformation.column_parser.DataFrameCommon/a8c40699-c48d-4f12-aa18-639c5fb6baae.json create mode 120000 tods/common-primitives/pipelines/data_transformation.column_parser.DataFrameCommon/b523335c-0c47-4d02-a582-f69609cde1e8.json create mode 120000 tods/common-primitives/pipelines/data_transformation.column_parser.DataFrameCommon/d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json create mode 120000 tods/common-primitives/pipelines/data_transformation.construct_predictions.DataFrameCommon/4ec215d1-6484-4502-a6dd-f659943ccb94.json create mode 120000 tods/common-primitives/pipelines/data_transformation.construct_predictions.DataFrameCommon/b523335c-0c47-4d02-a582-f69609cde1e8.json create mode 120000 tods/common-primitives/pipelines/data_transformation.construct_predictions.DataFrameCommon/d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json create mode 120000 tods/common-primitives/pipelines/data_transformation.dataset_to_dataframe.Common/4ec215d1-6484-4502-a6dd-f659943ccb94.json create mode 120000 tods/common-primitives/pipelines/data_transformation.dataset_to_dataframe.Common/a8c40699-c48d-4f12-aa18-639c5fb6baae.json create mode 120000 tods/common-primitives/pipelines/data_transformation.dataset_to_dataframe.Common/b523335c-0c47-4d02-a582-f69609cde1e8.json create mode 120000 tods/common-primitives/pipelines/data_transformation.dataset_to_dataframe.Common/d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json create mode 100644 tods/common-primitives/pipelines/data_transformation.extract_columns.Common/4ec215d1-6484-4502-a6dd-f659943ccb94.json create mode 100644 tods/common-primitives/pipelines/data_transformation.extract_columns.Common/pipeline.py create mode 120000 tods/common-primitives/pipelines/data_transformation.extract_columns_by_semantic_types.DataFrameCommon/b523335c-0c47-4d02-a582-f69609cde1e8.json create mode 120000 tods/common-primitives/pipelines/data_transformation.extract_columns_by_semantic_types.DataFrameCommon/d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json create mode 100644 tods/common-primitives/pipelines/data_transformation.extract_columns_by_structural_types.Common/b523335c-0c47-4d02-a582-f69609cde1e8.json create mode 100644 tods/common-primitives/pipelines/data_transformation.extract_columns_by_structural_types.Common/pipeline.py create mode 100644 tods/common-primitives/pipelines/data_transformation.grouping_field_compose.Common/a8c40699-c48d-4f12-aa18-639c5fb6baae.json create mode 100644 tods/common-primitives/pipelines/data_transformation.grouping_field_compose.Common/pipeline.py create mode 120000 tods/common-primitives/pipelines/data_transformation.horizontal_concat.DataFrameConcat/2b307634-f01e-412e-8d95-7e54afd4731f.json create mode 120000 tods/common-primitives/pipelines/data_transformation.remove_columns.Common/b523335c-0c47-4d02-a582-f69609cde1e8.json create mode 100644 tods/common-primitives/pipelines/data_transformation.rename_duplicate_name.DataFrameCommon/11ee9290-992d-4e48-97ed-1a6e4c15f92f.json create mode 100644 tods/common-primitives/pipelines/evaluation.kfold_timeseries_split.Common/k-fold-timeseries-split.yml create mode 100644 tods/common-primitives/pipelines/operator.dataset_map.DataFrameCommon/k-fold-timeseries-split-raw.yml create mode 100644 tods/common-primitives/pipelines/regression.xgboost_gbtree.DataFrameCommon/0f636602-6299-411b-9873-4b974cd393ba.json create mode 120000 tods/common-primitives/pipelines/schema_discovery.profiler.Common/4ec215d1-6484-4502-a6dd-f659943ccb94.json create mode 120000 tods/common-primitives/pipelines/schema_discovery.profiler.Common/a8c40699-c48d-4f12-aa18-639c5fb6baae.json create mode 120000 tods/common-primitives/pipelines/schema_discovery.profiler.Common/b523335c-0c47-4d02-a582-f69609cde1e8.json create mode 100755 tods/common-primitives/run_pipelines.sh create mode 100755 tods/common-primitives/run_tests.py create mode 100644 tods/common-primitives/setup.cfg create mode 100644 tods/common-primitives/setup.py create mode 100644 tods/common-primitives/sklearn-wrap/.gitignore create mode 100644 tods/common-primitives/sklearn-wrap/requirements.txt create mode 100644 tods/common-primitives/sklearn-wrap/setup.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKARDRegression.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKAdaBoostClassifier.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKAdaBoostRegressor.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKBaggingClassifier.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKBaggingRegressor.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKBernoulliNB.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKBinarizer.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKCountVectorizer.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKDecisionTreeClassifier.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKDecisionTreeRegressor.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKDummyClassifier.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKDummyRegressor.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKElasticNet.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKExtraTreesClassifier.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKExtraTreesRegressor.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKFastICA.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKFeatureAgglomeration.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKGaussianNB.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKGaussianProcessRegressor.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKGaussianRandomProjection.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKGenericUnivariateSelect.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKGradientBoostingClassifier.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKGradientBoostingRegressor.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKImputer.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKKNeighborsClassifier.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKKNeighborsRegressor.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKKernelPCA.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKKernelRidge.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKLars.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKLasso.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKLassoCV.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKLinearDiscriminantAnalysis.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKLinearRegression.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKLinearSVC.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKLinearSVR.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKLogisticRegression.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKMLPClassifier.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKMLPRegressor.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKMaxAbsScaler.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKMinMaxScaler.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKMissingIndicator.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKMultinomialNB.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKNearestCentroid.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKNormalizer.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKNystroem.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKOneHotEncoder.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKOrdinalEncoder.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKPCA.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKPassiveAggressiveClassifier.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKPassiveAggressiveRegressor.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKPolynomialFeatures.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKQuadraticDiscriminantAnalysis.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKQuantileTransformer.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKRBFSampler.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKRandomForestClassifier.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKRandomForestRegressor.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKRandomTreesEmbedding.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKRidge.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKRobustScaler.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKSGDClassifier.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKSGDRegressor.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKSVC.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKSVR.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKSelectFwe.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKSelectPercentile.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKSparseRandomProjection.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKStandardScaler.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKStringImputer.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKTfidfVectorizer.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKTruncatedSVD.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/SKVarianceThreshold.py create mode 100644 tods/common-primitives/sklearn-wrap/sklearn_wrap/__init__.py create mode 100644 tods/common-primitives/tests/test_audio_reader.py create mode 100644 tods/common-primitives/tests/test_cast_to_type.py create mode 100644 tods/common-primitives/tests/test_column_map.py create mode 100644 tods/common-primitives/tests/test_column_parser.py create mode 100644 tods/common-primitives/tests/test_compute_metafeatures.py create mode 100644 tods/common-primitives/tests/test_construct_predictions.py create mode 100644 tods/common-primitives/tests/test_csv_reader.py create mode 100644 tods/common-primitives/tests/test_cut_audio.py create mode 100644 tods/common-primitives/tests/test_dataframe_flatten.py create mode 100644 tods/common-primitives/tests/test_dataframe_image_reader.py create mode 100644 tods/common-primitives/tests/test_dataframe_to_list.py create mode 100644 tods/common-primitives/tests/test_dataframe_to_ndarray.py create mode 100644 tods/common-primitives/tests/test_dataframe_utils.py create mode 100644 tods/common-primitives/tests/test_dataset_map.py create mode 100644 tods/common-primitives/tests/test_dataset_sample.py create mode 100644 tods/common-primitives/tests/test_dataset_to_dataframe.py create mode 100644 tods/common-primitives/tests/test_datetime_field_compose.py create mode 100644 tods/common-primitives/tests/test_datetime_range_filter.py create mode 100644 tods/common-primitives/tests/test_denormalize.py create mode 100644 tods/common-primitives/tests/test_extract_columns_semantic_types.py create mode 100644 tods/common-primitives/tests/test_extract_columns_structural_types.py create mode 100644 tods/common-primitives/tests/test_fixed_split.py create mode 100644 tods/common-primitives/tests/test_grouping_field_compose.py create mode 100644 tods/common-primitives/tests/test_horizontal_concat.py create mode 100644 tods/common-primitives/tests/test_kfold_split.py create mode 100644 tods/common-primitives/tests/test_kfold_timeseries_split.py create mode 100644 tods/common-primitives/tests/test_lgbm_classifier.py create mode 100644 tods/common-primitives/tests/test_list_to_dataframe.py create mode 100644 tods/common-primitives/tests/test_list_to_ndarray.py create mode 100644 tods/common-primitives/tests/test_ndarray_to_dataframe.py create mode 100644 tods/common-primitives/tests/test_ndarray_to_list.py create mode 100644 tods/common-primitives/tests/test_no_split.py create mode 100644 tods/common-primitives/tests/test_normalize_column_references.py create mode 100644 tods/common-primitives/tests/test_normalize_graphs.py create mode 100644 tods/common-primitives/tests/test_numeric_range_filter.py create mode 100644 tods/common-primitives/tests/test_one_hot_maker.py create mode 100644 tods/common-primitives/tests/test_pandas_onehot_encoder.py create mode 100644 tods/common-primitives/tests/test_random_forest.py create mode 100644 tods/common-primitives/tests/test_ravel.py create mode 100644 tods/common-primitives/tests/test_redact_columns.py create mode 100644 tods/common-primitives/tests/test_regex_filter.py create mode 100644 tods/common-primitives/tests/test_remove_duplicate_columns.py create mode 100644 tods/common-primitives/tests/test_rename_duplicate_columns.py create mode 100644 tods/common-primitives/tests/test_replace_semantic_types.py create mode 100644 tods/common-primitives/tests/test_simple_profiler.py create mode 100644 tods/common-primitives/tests/test_stack_ndarray_column.py create mode 100644 tods/common-primitives/tests/test_tabular_extractor.py create mode 100644 tods/common-primitives/tests/test_term_filter.py create mode 100644 tods/common-primitives/tests/test_text_reader.py create mode 100644 tods/common-primitives/tests/test_train_score_split.py create mode 100644 tods/common-primitives/tests/test_unseen_label_decoder.py create mode 100644 tods/common-primitives/tests/test_unseen_label_encoder.py create mode 100644 tods/common-primitives/tests/test_video_reader.py create mode 100644 tods/common-primitives/tests/test_xgboost_dart.py create mode 100644 tods/common-primitives/tests/test_xgboost_gbtree.py create mode 100644 tods/common-primitives/tests/test_xgboost_regressor.py create mode 100644 tods/common-primitives/tests/utils.py create mode 100644 tods/data_processing/CategoricalToBinary.py create mode 100644 tods/data_processing/ColumnFilter.py create mode 100644 tods/data_processing/ContinuityValidation.py create mode 100644 tods/data_processing/DatasetToDataframe.py create mode 100644 tods/data_processing/DuplicationValidation.py create mode 100644 tods/data_processing/TimeIntervalTransform.py create mode 100644 tods/data_processing/TimeStampValidation.py create mode 100644 tods/data_processing/__init__.py create mode 100644 tods/detection_algorithm/AutoRegODetect.py create mode 100644 tods/detection_algorithm/DeepLog.py create mode 100644 tods/detection_algorithm/KDiscordODetect.py create mode 100755 tods/detection_algorithm/LSTMODetect.py create mode 100644 tods/detection_algorithm/MatrixProfile.py create mode 100644 tods/detection_algorithm/PCAODetect.py create mode 100644 tods/detection_algorithm/PyodABOD.py create mode 100644 tods/detection_algorithm/PyodAE.py create mode 100644 tods/detection_algorithm/PyodCBLOF.py create mode 100644 tods/detection_algorithm/PyodCOF.py create mode 100644 tods/detection_algorithm/PyodHBOS.py create mode 100644 tods/detection_algorithm/PyodIsolationForest.py create mode 100644 tods/detection_algorithm/PyodKNN.py create mode 100644 tods/detection_algorithm/PyodLODA.py create mode 100644 tods/detection_algorithm/PyodLOF.py create mode 100755 tods/detection_algorithm/PyodMoGaal.py create mode 100644 tods/detection_algorithm/PyodOCSVM.py create mode 100644 tods/detection_algorithm/PyodSOD.py create mode 100644 tods/detection_algorithm/PyodSoGaal.py create mode 100644 tods/detection_algorithm/PyodVAE.py create mode 100644 tods/detection_algorithm/Telemanom.py create mode 100755 tods/detection_algorithm/UODBasePrimitive.py create mode 100644 tods/detection_algorithm/core/AutoRegOD.py create mode 100644 tods/detection_algorithm/core/CollectiveBase.py create mode 100755 tods/detection_algorithm/core/CollectiveCommonTest.py create mode 100644 tods/detection_algorithm/core/KDiscord.py create mode 100755 tods/detection_algorithm/core/LSTMOD.py create mode 100644 tods/detection_algorithm/core/MultiAutoRegOD.py create mode 100644 tods/detection_algorithm/core/PCA.py create mode 100755 tods/detection_algorithm/core/UODCommonTest.py create mode 100644 tods/detection_algorithm/core/algorithm_implementation.py create mode 100644 tods/detection_algorithm/core/test_CollectiveBase.py create mode 100644 tods/detection_algorithm/core/utility.py create mode 100644 tods/detection_algorithm/core/utils/channel.py create mode 100644 tods/detection_algorithm/core/utils/errors.py create mode 100644 tods/detection_algorithm/core/utils/modeling.py create mode 100644 tods/detection_algorithm/core/utils/utils.py create mode 100644 tods/entry_points.ini create mode 100644 tods/feature_analysis/AutoCorrelation.py create mode 100644 tods/feature_analysis/BKFilter.py create mode 100644 tods/feature_analysis/DiscreteCosineTransform.py create mode 100644 tods/feature_analysis/FastFourierTransform.py create mode 100644 tods/feature_analysis/HPFilter.py create mode 100644 tods/feature_analysis/NonNegativeMatrixFactorization.py create mode 100644 tods/feature_analysis/SKTruncatedSVD.py create mode 100644 tods/feature_analysis/SpectralResidualTransform.py create mode 100644 tods/feature_analysis/StatisticalAbsEnergy.py create mode 100644 tods/feature_analysis/StatisticalAbsSum.py create mode 100644 tods/feature_analysis/StatisticalGmean.py create mode 100644 tods/feature_analysis/StatisticalHmean.py create mode 100644 tods/feature_analysis/StatisticalKurtosis.py create mode 100644 tods/feature_analysis/StatisticalMaximum.py create mode 100644 tods/feature_analysis/StatisticalMean.py create mode 100644 tods/feature_analysis/StatisticalMeanAbs.py create mode 100644 tods/feature_analysis/StatisticalMeanAbsTemporalDerivative.py create mode 100644 tods/feature_analysis/StatisticalMeanTemporalDerivative.py create mode 100644 tods/feature_analysis/StatisticalMedian.py create mode 100644 tods/feature_analysis/StatisticalMedianAbsoluteDeviation.py create mode 100644 tods/feature_analysis/StatisticalMinimum.py create mode 100644 tods/feature_analysis/StatisticalSkew.py create mode 100644 tods/feature_analysis/StatisticalStd.py create mode 100644 tods/feature_analysis/StatisticalVar.py create mode 100644 tods/feature_analysis/StatisticalVariation.py create mode 100644 tods/feature_analysis/StatisticalVecSum.py create mode 100644 tods/feature_analysis/StatisticalWillisonAmplitude.py create mode 100644 tods/feature_analysis/StatisticalZeroCrossing.py create mode 100644 tods/feature_analysis/TRMF.py create mode 100644 tods/feature_analysis/WaveletTransform.py create mode 100644 tods/feature_analysis/__init__.py create mode 100644 tods/reinforcement/RuleBasedFilter.py create mode 100644 tods/requirements.txt create mode 100755 tods/run_tests.py create mode 100644 tods/setup.py create mode 100644 tods/tests/test_AutoRegODetect.py create mode 100755 tods/tests/test_BKFilter.py create mode 100644 tods/tests/test_CategoricalBinary.py create mode 100644 tods/tests/test_ColumnFilter.py create mode 100644 tods/tests/test_ContinuityValidation.py create mode 100644 tods/tests/test_DeepLog.py create mode 100644 tods/tests/test_DiscreteCosineTransform.py create mode 100644 tods/tests/test_DuplicationValidation.py create mode 100644 tods/tests/test_FastFourierTransform.py create mode 100644 tods/tests/test_HPFilter.py create mode 100644 tods/tests/test_HoltSmoothing.py create mode 100644 tods/tests/test_HoltWintersExponentialSmoothing.py create mode 100644 tods/tests/test_KDiscordODetect.py create mode 100644 tods/tests/test_LSTMODetector.py create mode 100644 tods/tests/test_MatrixProfile.py create mode 100644 tods/tests/test_MovingAverageTransform.py create mode 100644 tods/tests/test_NonNegativeMatrixFactorization.py create mode 100644 tods/tests/test_PCAODetect.py create mode 100644 tods/tests/test_PyodABOD.py create mode 100644 tods/tests/test_PyodAE.py create mode 100644 tods/tests/test_PyodCBLOF.py create mode 100644 tods/tests/test_PyodCOF.py create mode 100644 tods/tests/test_PyodHBOS.py create mode 100644 tods/tests/test_PyodIsolationForest.py create mode 100644 tods/tests/test_PyodKNN.py create mode 100644 tods/tests/test_PyodLODA.py create mode 100644 tods/tests/test_PyodLOF.py create mode 100644 tods/tests/test_PyodMoGaal.py create mode 100644 tods/tests/test_PyodOCSVM.py create mode 100644 tods/tests/test_PyodSOD.py create mode 100644 tods/tests/test_PyodSoGaal.py create mode 100644 tods/tests/test_PyodVAE.py create mode 100644 tods/tests/test_SKAxiswiseScaler.py create mode 100644 tods/tests/test_SKPowerTransformer.py create mode 100644 tods/tests/test_SKQuantileTransformer.py create mode 100644 tods/tests/test_SKStandardizer.py create mode 100644 tods/tests/test_SKTruncatedSVD.py create mode 100644 tods/tests/test_SimpleExponentialSmoothing.py create mode 100644 tods/tests/test_SpectralResidualTransform.py create mode 100644 tods/tests/test_StastiticalStd.py create mode 100644 tods/tests/test_StatisticalAbsEnergy.py create mode 100644 tods/tests/test_StatisticalAbsSum.py create mode 100644 tods/tests/test_StatisticalGmean.py create mode 100644 tods/tests/test_StatisticalHmean.py create mode 100644 tods/tests/test_StatisticalKurtosis.py create mode 100644 tods/tests/test_StatisticalMaximum.py create mode 100644 tods/tests/test_StatisticalMean.py create mode 100644 tods/tests/test_StatisticalMeanAbs.py create mode 100644 tods/tests/test_StatisticalMeanAbsTemporalDerivative.py create mode 100644 tods/tests/test_StatisticalMeanTemporalDerivative.py create mode 100644 tods/tests/test_StatisticalMedian.py create mode 100644 tods/tests/test_StatisticalMedianAbsoluteDeviation.py create mode 100644 tods/tests/test_StatisticalMinimum.py create mode 100644 tods/tests/test_StatisticalSkew.py create mode 100644 tods/tests/test_StatisticalVar.py create mode 100644 tods/tests/test_StatisticalVariation.py create mode 100644 tods/tests/test_StatisticalVecSum.py create mode 100644 tods/tests/test_StatisticalWillisonAmplitude.py create mode 100644 tods/tests/test_StatisticalZeroCrossing.py create mode 100644 tods/tests/test_TRMF.py create mode 100644 tods/tests/test_Telemanom.py create mode 100644 tods/tests/test_TimeSeriesSeasonalityTrendDecomposition.py create mode 100644 tods/tests/test_TimeStampValidation.py create mode 100644 tods/tests/test_WaveletTransformer.py create mode 100644 tods/tests/utils.py create mode 100644 tods/timeseries_processing/.HoltSmoothing.py.swo create mode 100644 tods/timeseries_processing/HoltSmoothing.py create mode 100644 tods/timeseries_processing/HoltWintersExponentialSmoothing.py create mode 100644 tods/timeseries_processing/MovingAverageTransform.py create mode 100644 tods/timeseries_processing/SKAxiswiseScaler.py create mode 100644 tods/timeseries_processing/SKPowerTransformer.py create mode 100644 tods/timeseries_processing/SKQuantileTransformer.py create mode 100644 tods/timeseries_processing/SKStandardScaler.py create mode 100644 tods/timeseries_processing/SimpleExponentialSmoothing.py create mode 100644 tods/timeseries_processing/TimeSeriesSeasonalityTrendDecomposition.py create mode 100644 tods/timeseries_processing/__init__.py create mode 100644 tods/tods/__init__.py create mode 100644 tods/tods/search/__init__.py create mode 100644 tods/tods/search/brute_force_search.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ac9a2f2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,116 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +tests/.asv + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +docs/d3m.rst +docs/d3m.*.rst + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mypy +.mypy_cache/ + +# site +public/ + +.idea/ +tmp/ + +*.swp +results.csv +pipeline.yml +pipeline_run.yml +example_pipeline.json +.DS_Store +tmp.txt diff --git a/README.md b/README.md new file mode 100644 index 0000000..2dc5c2f --- /dev/null +++ b/README.md @@ -0,0 +1,143 @@ +# TODS +This is a time-seried outlier detection system. + +## Axolotl +Running pre-defined pipeline +``` +python examples/build_AutoEncoder_pipeline.py +python examples/run_predefined_pipeline.py +``` + +## Installation + +This package works with **Python 3.6** and pip 19+. You need to have the following packages installed on the system (for Debian/Ubuntu): +``` +sudo apt-get install libssl-dev libcurl4-openssl-dev libyaml-dev build-essential libopenblas-dev libcap-dev ffmpeg +``` + +Then run the script `install.sh`. The script witll install d3m core package with: +``` +cd d3m +pip3 install -e . +cd .. +``` +Then it installs common primitives (which will be used in the running examples): +``` +cd common-primitives +pip3 install -e . +cd .. +``` +And it installs sklearn wrapper with: +``` +cd sklearn-wrap +pip3 install -r requirements.txt +pip3 install -e . +cd .. +``` +It installs anomaly primitives (ours) by: +``` +cd anomaly-primitives +pip3 install -r requirements.txt +pip3 install -e . +cd .. +``` + +There could be some missing dependencies that are not listed above. Try to fix it by yourself if you meet any. + +# Dataset +Datasets are located in `datasets/anomaly`. `raw_data` is the raw time series data. `transform.py` is script to transform the raw data to D3M format. `template` includes some templates for generating D3M data. If you run `transform.py`, the script will load the raw `kpi` data and create a folder named `kpi` in D3M format. + +The generated csv file will have the following columns: `d3mIndex`, `timestamp`, `value`, `'ground_truth`. In the example kpi dataset, there is only one value. For other datasets there could be multiple values. The goal of the pipline is to predict the `ground_truth` based on `timestamp` and the value(s). + +There is a nice script to check whether the dataset is in the right format. Run +``` +python3 datasets/validate.py datasets/anomaly/kpi/ +``` +The expected output is as follows: +``` +Validating problem '/home/grads/d/daochen/tods/tods/datasets/anomaly/kpi/SCORE/problem_TEST/problemDoc.json'. +Validating dataset '/home/grads/d/daochen/tods/tods/datasets/anomaly/kpi/SCORE/dataset_TEST/datasetDoc.json'. +Validating problem '/home/grads/d/daochen/tods/tods/datasets/anomaly/kpi/kpi_problem/problemDoc.json'. +Validating problem '/home/grads/d/daochen/tods/tods/datasets/anomaly/kpi/TEST/problem_TEST/problemDoc.json'. +Validating dataset '/home/grads/d/daochen/tods/tods/datasets/anomaly/kpi/TEST/dataset_TEST/datasetDoc.json'. +Validating dataset '/home/grads/d/daochen/tods/tods/datasets/anomaly/kpi/kpi_dataset/datasetDoc.json'. +Validating dataset '/home/grads/d/daochen/tods/tods/datasets/anomaly/kpi/TRAIN/dataset_TRAIN/datasetDoc.json'. +Validating problem '/home/grads/d/daochen/tods/tods/datasets/anomaly/kpi/TRAIN/problem_TRAIN/problemDoc.json'. +Validating all datasets and problems. +There are no errors. +``` +Of course, you can also create other datasets with `transform.py`. But for now, we can focus on this example dataset since other datasets are usually in the same format. + +# Example +In D3M, our goal is to provide a **solution** to a **problem** on a **dataset**. Here, solution is a pipline which consists of data processing, classifiers, etc. + +Run the example to build the first pipline with +``` +python3 examples/build_iforest_pipline.py +``` +Note that we have not implemented iForest yet. This one is actually Random Forest. This will generate a file `pipline.yml`, which describes a pipline. We can run the pipeline on the example data in this repo as follows: +``` +python3 -m d3m runtime fit-produce -p pipeline.yml -r datasets/anomaly/kpi/TRAIN/problem_TRAIN/problemDoc.json -i datasets/anomaly/kpi/TRAIN/dataset_TRAIN/datasetDoc.json -t datasets/anomaly/kpi/TEST/dataset_TEST/datasetDoc.json -o results.csv -O pipeline_run.yml +``` +Another example on a subset of the sequences of Yahoo dataset is as follows: +``` +python3 -m d3m runtime fit-produce -p pipeline.yml -r datasets/anomaly/yahoo_sub_5/TRAIN/problem_TRAIN/problemDoc.json -i datasets/anomaly/yahoo_sub_5/TRAIN/dataset_TRAIN/datasetDoc.json -t datasets/anomaly/yahoo_sub_5/TEST/dataset_TEST/datasetDoc.json -o results.csv -O pipeline_run.yml +``` +The above commands will generate two files `results.csv` and `pipline_run.yml` + +# How to add a new primitive + +For new primitives, put them in `/anomaly_pritives`. There is an example for isolation forest (however, this is essentially a RandomForest, although the name is IsolationForest. We need more efforts to change it to real IsolationForest). + +In addition to add a new file, you need to register the promitive in `anomaly-primitives/setup.py` and rerun pip install. + +Use the following command to check whether your new primitives are registered: +``` +python3 -m d3m index search +``` + +Test the new primitives: +``` +python3 examples/build_iforest_pipline.py +``` + +# Template for meta-data in primitives + +* `__author__`: `DATA Lab at Texas A&M University` +* `name`: Just a name. Name your primitive with a few words +* `python_path`: This path should have **5** segments. The first two segments should be `d3m.primitives`. The third segment shoulb be `anomaly_detection`, `data_preprocessing` or `feature_construction` (it should match `primitive_family`). The fourth segment should be your algorithm name, e.g., `isolation_forest`. Note that this name should also be added to [this file](d3m/d3m/metadata/primitive_names.py). The last segment should be one of `Preprocessing`, `Feature`, `Algorithm` (for now). +* `source`: `name` should be `DATA Lab at Texas A&M University`, `contact` should be `mailto:khlai037@tamu.edu`, `uris` should have `https://gitlab.com/lhenry15/tods.git` and the path your py file. +* `algorithms_types`: Name the primitive by your self and add it to [here](d3m/d3m/metadata/schemas/v0/definitions.json#L1957). **Then reinstall d3m.** Fill this field with `metadata_base.PrimitiveAlgorithmType.YOUR_NAME` +* `primitive_family`: For preprocessing primitives, use `metadata_base.PrimitiveFamily.DATA_PREPROCESSING`. For feature analysis primitives, use `metadata_base.PrimitiveFamily.FEATURE_CONSTRUCTION`. For anomaly detection primitives, use `metadata_base.PrimitiveFamily.ANOMALY_DETECTION`. +* `id`: Randomly generate one with `import uuid; uuid.uuid4()` +* `hyperparameters_to_tune`: Specify what hyperparameters can be tuned in your primitive +* `version`: `0.0.1` + +Notes: + +1. `installation` is not required. We remove it. + +2. Try to reinstall everything if it does not work. + +3. An example of fake Isolation Forest is [here](anomaly-primitives/anomaly_primitives/SKIsolationForest.py#L294) + + +## Resources of D3M + +If you still have questions, you may refer to the following resources. + +Dataset format [https://gitlab.com/datadrivendiscovery/data-supply](https://gitlab.com/datadrivendiscovery/data-supply) + +Instructions for creating primitives [https://docs.datadrivendiscovery.org/v2020.1.9/interfaces.html](https://docs.datadrivendiscovery.org/v2020.1.9/interfaces.html) + +We use a stable version of d3m core package at [https://gitlab.com/datadrivendiscovery/d3m/-/tree/v2020.1.9](https://gitlab.com/datadrivendiscovery/d3m/-/tree/v2020.1.9). + +The documentation is at [https://docs.datadrivendiscovery.org/](https://docs.datadrivendiscovery.org/). + +The core package documentation is at [https://docs.datadrivendiscovery.org/v2020.1.9/index.html](https://docs.datadrivendiscovery.org/v2020.1.9/index.html) + +The common-primitives is v0.8.0 at [https://gitlab.com/datadrivendiscovery/common-primitives/-/tree/v0.8.0/common_primitives](https://gitlab.com/datadrivendiscovery/common-primitives/-/tree/v0.8.0/common_primitives) + +The sklearn-wrap uses dist branch [https://gitlab.com/datadrivendiscovery/sklearn-wrap/-/tree/dist](https://gitlab.com/datadrivendiscovery/sklearn-wrap/-/tree/dist) + +There are other primitives developed by many universities but are not used in this repo. See [https://gitlab.com/datadrivendiscovery/primitives](https://gitlab.com/datadrivendiscovery/primitives) diff --git a/axolotl/.gitignore b/axolotl/.gitignore new file mode 100644 index 0000000..66fb22f --- /dev/null +++ b/axolotl/.gitignore @@ -0,0 +1,108 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +tests/.asv + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +docs/d3m.rst +docs/d3m.*.rst + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mypy +.mypy_cache/ + +# site +public/ + +.idea/ +tmp/ diff --git a/axolotl/.gitlab-ci.yml b/axolotl/.gitlab-ci.yml new file mode 100644 index 0000000..d8b359f --- /dev/null +++ b/axolotl/.gitlab-ci.yml @@ -0,0 +1,33 @@ +tests: + image: registry.gitlab.com/axolotl1/axolotl/base:latest + stage: test + tags: + - d3m_runner + services: + - docker:dind + variables: + DOCKER_HOST: tcp://docker:2375 + DOCKER_TLS_CERTDIR: "" + GIT_SUBMODULE_STRATEGY: recursive + script: + - pip3 install -e . + - python3 ./run_tests.py + + +build_base_image: + stage: build + image: registry.gitlab.com/datadrivendiscovery/images/testing:ubuntu-bionic-python36 + tags: + - d3m_runner + services: + - docker:dind + variables: + DOCKER_HOST: tcp://docker:2375 + DOCKER_TLS_CERTDIR: "" + script: + - ./images/build-images.sh base + only: + - devel + + + diff --git a/axolotl/.gitmodules b/axolotl/.gitmodules new file mode 100644 index 0000000..5706087 --- /dev/null +++ b/axolotl/.gitmodules @@ -0,0 +1,3 @@ +[submodule "tests/data"] + path = tests/data + url = https://gitlab.com/datadrivendiscovery/tests-data.git diff --git a/axolotl/LICENSE b/axolotl/LICENSE new file mode 100644 index 0000000..6f75635 --- /dev/null +++ b/axolotl/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/axolotl/README.md b/axolotl/README.md new file mode 100644 index 0000000..bd59995 --- /dev/null +++ b/axolotl/README.md @@ -0,0 +1,41 @@ +# Axolotl + +This package provides an easy and high level abstraction +of the [D3M](https://gitlab.com/datadrivendiscovery/d3m) API for AutoML. It contains a suit of basic +requirements and building blocks +[primitives](https://gitlab.com/datadrivendiscovery/primitives). + +## Installation + +The package contains two different version of dependencies, +one with GPU support and other that uses CPU. For the installation +we strongly encourage the use of a python 3.6 virtual environment. + +* CPU version. +```bash +pip3 install -e git+https://gitlab.com/axolotl1/axolotl.git@devel#egg=axolotl[cpu] +``` + +* GPU version. +```bash +pip3 install -e git+https://gitlab.com/axolotl1/axolotl.git@devel#egg=axolotl[gpu] +``` + +Note: +For MacOs, pycurls needs to be manually installed: +```bash +PYCURL_SSL_LIBRARY=openssl LDFLAGS="-L/usr/local/opt/openssl/lib" CPPFLAGS="-I/usr/local/opt/openssl/include" pip install --no-cache-dir pycurl==7.43.0.3 +``` + +## Usage +For new users we recommend installing the package and then cloning it via +```bash +git clone --recursive https://gitlab.com/axolotl1/axolotl.git +``` + +Then start jupyter lab via +```bash +jupyter lab +``` +And then open the [examples](https://gitlab.com/axolotl1/axolotl/-/tree/devel/examples) +directory and try to run them. \ No newline at end of file diff --git a/axolotl/axolotl/__init__.py b/axolotl/axolotl/__init__.py new file mode 100644 index 0000000..c8d46de --- /dev/null +++ b/axolotl/axolotl/__init__.py @@ -0,0 +1,2 @@ +__version__ = 'devel' +__description__ = 'Automated Machine Learning Framework' \ No newline at end of file diff --git a/axolotl/axolotl/algorithms/__init__.py b/axolotl/axolotl/algorithms/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/axolotl/axolotl/algorithms/autokeras_integration/__init__.py b/axolotl/axolotl/algorithms/autokeras_integration/__init__.py new file mode 100644 index 0000000..96d6e74 --- /dev/null +++ b/axolotl/axolotl/algorithms/autokeras_integration/__init__.py @@ -0,0 +1,82 @@ +from d3m.metadata.pipeline import Pipeline + +from axolotl.algorithms.autokeras_integration.constants import OMIT_LAYERS, step_function +from axolotl.algorithms.autokeras_integration.steps import set_learner, set_prediction, set_data, \ + set_loss + + +def keras2pipeline(keras_model, batch_size=32): + # Creating pipeline + from tensorflow.python.keras.activations import softmax + pipeline_description = Pipeline() + + pipeline_description.add_input(name='inputs') + + set_data(pipeline_description) + set_loss(pipeline_description) + + offset = len(pipeline_description.steps) + + previous_layer_ids = get_previous_layer_ids(keras_model) + + layers = keras_model.layers + + step_id = 0 + layer_to_step_id = {} + + total_layer_num = len(layers) + for i, layer in enumerate(layers): + cls_name = get_layer_class_name(layer) + if cls_name in OMIT_LAYERS: + continue + layer_id = get_layer_id(layer) + if len(previous_layer_ids[layer_id]) > 0: + layer.previous_layer_ids = tuple( + layer_to_step_id[i] + offset for i in previous_layer_ids[layer_id] + ) + else: + layer.previous_layer_ids = [None] + # Since JPL does not support Softmax Layer, we add the workaround to make use of softmax + if i == total_layer_num - 2 and cls_name == 'Dense': + layer.activation = softmax + d3m_step = step_function[cls_name](step_id, layer) + pipeline_description.add_step(d3m_step) + layer_to_step_id[layer_id] = step_id + step_id += 1 + + set_learner(pipeline_description, batch_size) + set_prediction(pipeline_description) + pipeline_description.add_output( + name='output predictions', data_reference=f"steps.{len(pipeline_description.steps) - 1}.produce") + + return pipeline_description + + +def get_previous_layer_ids(keras_model): + from tensorflow.python.util import nest + model = keras_model + layers = model.layers + + previous_layer_ids = {} + for layer in layers: + layer_id = str(id(layer)) + previous_layer_ids[layer_id] = set() + for i, node in enumerate(layer._inbound_nodes): + node_key = layer.name + '_ib-' + str(i) + if node_key in model._network_nodes: + for inbound_layer in nest.flatten(node.inbound_layers): + inbound_cls_name = get_layer_class_name(inbound_layer) + inbound_layer_id = get_layer_id(inbound_layer) + if inbound_cls_name in OMIT_LAYERS: + previous_layer_ids[layer_id].update(previous_layer_ids[inbound_layer_id]) + else: + previous_layer_ids[layer_id].add(inbound_layer_id) + return previous_layer_ids + + +def get_layer_id(layer): + return str(id(layer)) + + +def get_layer_class_name(layer): + return layer.__class__.__name__ \ No newline at end of file diff --git a/axolotl/axolotl/algorithms/autokeras_integration/block.py b/axolotl/axolotl/algorithms/autokeras_integration/block.py new file mode 100644 index 0000000..bc4d12a --- /dev/null +++ b/axolotl/axolotl/algorithms/autokeras_integration/block.py @@ -0,0 +1,205 @@ +from d3m import index +from d3m.metadata.pipeline import PrimitiveStep +from d3m.metadata.base import ArgumentType + + +class Block: + def __init__(self, block_id, primitive, previous_layer_id): + self.block_id = block_id + self.primitive = primitive + self.previous_layer_id = previous_layer_id + + def get_step(self): + step = PrimitiveStep(primitive=index.get_primitive(self.primitive)) + if self.previous_layer_id is not None: + step.add_hyperparameter(name='previous_layer', argument_type=ArgumentType.PRIMITIVE, + data=self.previous_layer_id) + return step + + +class Conv(Block): + def __init__(self, filters, kernel_size, strides, padding, block_id, primitive, previous_layer_id): + super(Conv, self).__init__(block_id, primitive, previous_layer_id) + self.filters = filters + self.kernel_size = kernel_size[0] + self.strides = strides[0] + self.padding = 'same' if padding else 'valid' + + def get_step(self): + step = super().get_step() + step.add_hyperparameter(name='filters', argument_type=ArgumentType.VALUE, data=self.filters) + step.add_hyperparameter(name='kernel_size', argument_type=ArgumentType.VALUE, data=self.kernel_size) + step.add_hyperparameter(name='strides', argument_type=ArgumentType.VALUE, data=self.strides) + step.add_hyperparameter(name='padding', argument_type=ArgumentType.VALUE, data=self.padding) + return step + + +class Conv1D(Conv): + def __init__(self, block_id, filters=10, kernel_size=2, strides=1, padding='valid', previous_layer_id=None): + super(Conv1D, self).__init__(filters, kernel_size, strides, padding, block_id, + "d3m.primitives.layer.convolution_1d.KerasWrap", previous_layer_id) + + +class Conv2D(Conv): + def __init__(self, block_id, filters=10, kernel_size=2, strides=1, padding='valid', previous_layer_id=None): + super(Conv2D, self).__init__(filters, kernel_size, strides, padding, block_id, + "d3m.primitives.layer.convolution_2d.KerasWrap", previous_layer_id) + + +class Conv3D(Conv): + def __init__(self, block_id, filters=10, kernel_size=2, strides=1, padding='valid', previous_layer_id=None): + super(Conv3D, self).__init__(filters, kernel_size, strides, padding, block_id, + "d3m.primitives.layer.convolution_3d.KerasWrap", previous_layer_id) + + +class Dense(Block): + def __init__(self, block_id, units=120, activation='linear', previous_layer_id=None): + super(Dense, self).__init__(block_id, "d3m.primitives.layer.dense.KerasWrap", previous_layer_id) + self.units = units + self.activation = activation.__name__.lower() + + def get_step(self): + step = super().get_step() + step.add_hyperparameter(name='units', argument_type=ArgumentType.VALUE, data=self.units) + step.add_hyperparameter(name='activation', argument_type=ArgumentType.VALUE, data=self.activation) + return step + + +class BatchNorm2D(Block): + def __init__(self, block_id, previous_layer_id): + super(BatchNorm2D, self).__init__(block_id, "d3m.primitives.layer.batch_normalization.KerasWrap", + previous_layer_id) + + def get_step(self): + step = super().get_step() + return step + + +class MaxPooling(Block): + def __init__(self, pool_size, strides, padding, block_id, primitive, previous_layer_id): + super(MaxPooling, self).__init__(block_id, primitive, previous_layer_id) + self.pool_size = pool_size + self.strides = strides[0] + self.padding = 'same' if padding else 'valid' + + def get_step(self): + step = super().get_step() + step.add_hyperparameter(name='pool_size', argument_type=ArgumentType.VALUE, data=self.pool_size) + step.add_hyperparameter(name='strides', argument_type=ArgumentType.VALUE, data=self.strides) + step.add_hyperparameter(name='padding', argument_type=ArgumentType.VALUE, data=self.padding) + return step + + +class MaxPooling1D(MaxPooling): + def __init__(self, block_id, pool_size=(2, 2), strides=(1, 1), padding='valid', previous_layer_id=None): + super(MaxPooling1D, self).__init__(pool_size, strides, padding, block_id, + "d3m.primitives.layer.max_pooling_1d.KerasWrap", previous_layer_id) + + +class MaxPooling2D(MaxPooling): + def __init__(self, block_id, pool_size=(2, 2), strides=(1, 1), padding='valid', previous_layer_id=None): + super(MaxPooling2D, self).__init__(pool_size, strides, padding, block_id, + "d3m.primitives.layer.max_pooling_2d.KerasWrap", previous_layer_id) + + +class MaxPooling3D(MaxPooling): + def __init__(self, block_id, pool_size=(2, 2), strides=(1, 1), padding='valid', previous_layer_id=None): + super(MaxPooling3D, self).__init__(pool_size, strides, padding, block_id, + "d3m.primitives.layer.max_pooling_3d.KerasWrap", previous_layer_id) + + +class AvgPooling(Block): + def __init__(self, pool_size, strides, padding, block_id, primitive, previous_layer_id): + super(AvgPooling, self).__init__(block_id, primitive, previous_layer_id) + self.pool_size = pool_size[0] + self.strides = strides[0] + self.padding = 'same' if padding else 'valid' + + def get_step(self): + step = super().get_step() + step.add_hyperparameter(name='pool_size', argument_type=ArgumentType.VALUE, data=self.pool_size) + step.add_hyperparameter(name='strides', argument_type=ArgumentType.VALUE, data=self.strides) + step.add_hyperparameter(name='padding', argument_type=ArgumentType.VALUE, data=self.padding) + return step + + +class AvgPooling1D(AvgPooling): + def __init__(self, block_id, pool_size=(2, 2), strides=(1, 1), padding='valid', previous_layer_id=None): + super(AvgPooling1D, self).__init__(pool_size, strides, padding, block_id, + "d3m.primitives.layer.average_pooling_1d.KerasWrap", previous_layer_id) + + +class AvgPooling2D(AvgPooling): + def __init__(self, block_id, pool_size=(2, 2), strides=(1, 1), padding='valid', previous_layer_id=None): + super(AvgPooling2D, self).__init__(pool_size, strides, padding, block_id, + "d3m.primitives.layer.average_pooling_2d.KerasWrap", previous_layer_id) + + +class AvgPooling3D(AvgPooling): + def __init__(self, block_id, pool_size=(2, 2), strides=(1, 1), padding='valid', previous_layer_id=None): + super(AvgPooling3D, self).__init__(pool_size, strides, padding, block_id, + "d3m.primitives.layer.average_pooling_3d.KerasWrap", previous_layer_id) + + +class GlobalAvgPooling2d(Block): + def __init__(self, block_id, data_format='channels_last', previous_layer_id=None): + super(GlobalAvgPooling2d, self).__init__(block_id, "d3m.primitives.layer.global_average_pooling_2d.KerasWrap", + previous_layer_id=previous_layer_id) + self.data_format = data_format + + def get_step(self): + step = super().get_step() + step.add_hyperparameter(name='data_format', argument_type=ArgumentType.VALUE, data=self.data_format) + return step + + +# JPL does not have such primitives, +# class GlobalMaxPooling2d(MaxPooling2D): +# def __init__(self, block_id, input_shape, previous_layer_id): +# kernel_size = input_shape[0] +# super(GlobalMaxPooling2d, self).__init__(block_id, kernel_size, previous_layer_id=previous_layer_id) + + +class Dropout(Block): + def __init__(self, block_id, rate=0.2, previous_layer_id=None): + super(Dropout, self).__init__(block_id, "d3m.primitives.layer.dropout.KerasWrap", previous_layer_id) + self.rate = rate + + def get_step(self): + step = super().get_step() + step.add_hyperparameter(name='rate', argument_type=ArgumentType.VALUE, data=self.rate) + return step + + +class Flatten(Block): + def __init__(self, block_id, previous_layer_id): + super(Flatten, self).__init__(block_id, "d3m.primitives.layer.flatten.KerasWrap", previous_layer_id) + + +class Add(Block): + def __init__(self, block_id, previous_layer_ids): + super(Add, self).__init__(block_id, "d3m.primitives.layer.add.KerasWrap", None) + self.previous_layer_ids = previous_layer_ids + + def get_step(self): + step = PrimitiveStep(primitive=index.get_primitive(self.primitive)) + step.add_hyperparameter(name='previous_layers', argument_type=ArgumentType.PRIMITIVE, + data=self.previous_layer_ids) + return step + + +class Concatenate(Block): + def __init__(self, block_id, previous_layer_ids): + super(Concatenate, self).__init__(block_id, "d3m.primitives.layer.concat.KerasWrap", None) + self.previous_layer_ids = previous_layer_ids + + def get_step(self): + step = PrimitiveStep(primitive=index.get_primitive(self.primitive)) + step.add_hyperparameter(name='previous_layers', argument_type=ArgumentType.PRIMITIVE, + data=self.previous_layer_ids) + return step + + +class Null(Block): + def __init__(self, block_id): + super(Null, self).__init__(block_id, "d3m.primitives.layer.null.KerasWrap", None) diff --git a/axolotl/axolotl/algorithms/autokeras_integration/constants.py b/axolotl/axolotl/algorithms/autokeras_integration/constants.py new file mode 100644 index 0000000..0533405 --- /dev/null +++ b/axolotl/axolotl/algorithms/autokeras_integration/constants.py @@ -0,0 +1,23 @@ +from .mapping import * + +step_function = { + 'Dense': fetch_dense_step, + 'Conv1D': fetch_conv1D_step, + 'Conv2D': fetch_conv2D_step, + 'Conv3D': fetch_conv3D_step, + 'BatchNormalization': fetch_batch_norm_step, + 'MaxPooling2D': fetch_maxpool2d_step, + 'Dropout': fetch_dropout_step, + 'AvgPooling2D': fetch_avgpool2d_step, + # 'GlobalMaxPooling2d': JPL does not have such primitives, + 'GlobalAveragePooling2D': fetch_global_avgpooling_step, + 'Flatten': fetch_flatten_step, + 'Add': fetch_add_step, + 'Concatenate': fetch_concatenate_step, + 'Null': fetch_null_step, + # 'Substract': we do not implement +} + +ACTIVATIONS = {'ReLU'} +OMIT_LAYERS = {'InputLayer', 'Normalization', 'ReLU', 'ZeroPadding2D', 'Softmax', 'Activation'} +FORWARD_LAYERS = {'Dense', 'Conv1d', 'Conv2d', 'Conv3d'} diff --git a/axolotl/axolotl/algorithms/autokeras_integration/mapping.py b/axolotl/axolotl/algorithms/autokeras_integration/mapping.py new file mode 100644 index 0000000..f10aa16 --- /dev/null +++ b/axolotl/axolotl/algorithms/autokeras_integration/mapping.py @@ -0,0 +1,122 @@ +from .block import * + + +def fetch_conv1D_step(block_id, layer): + return Conv1D( + block_id, + layer.filters, + layer.kernel_size, + layer.strides, + layer.padding, + layer.previous_layer_ids[0] + ).get_step() + + +def fetch_conv2D_step(block_id, layer): + return Conv2D( + block_id, + layer.filters, + layer.kernel_size, + layer.strides, + layer.padding, + layer.previous_layer_ids[0] + ).get_step() + + +def fetch_conv3D_step(block_id, layer): + return Conv3D( + block_id, + layer.filters, + layer.kernel_size, + layer.strides, + layer.padding, + layer.previous_layer_ids[0] + ).get_step() + + +def fetch_dense_step(block_id, layer): + return Dense( + block_id, + layer.units, + layer.activation, + layer.previous_layer_ids[0] + ).get_step() + + +def fetch_batch_norm_step(block_id, layer): + return BatchNorm2D( + block_id, + layer.previous_layer_ids[0] + ).get_step() + + +def fetch_maxpool2d_step(block_id, layer): + return MaxPooling2D( + block_id, + layer.pool_size, + layer.strides, + layer.padding, + layer.previous_layer_ids[0] + ).get_step() + + +def fetch_avgpool2d_step(block_id, layer): + return AvgPooling2D( + block_id, + layer.pool_size, + layer.strides, + layer.padding, + layer.previous_layer_ids[0] + ).get_step() + + +def fetch_dropout_step(block_id, layer): + return Dropout( + block_id, + layer.rate, + layer.previous_layer_ids[0] + ).get_step() + + +# JPL does not have such primitives, +# def fetch_global_maxpooling_step(block_id, layer): +# return GlobalMaxPooling2d( +# block_id, +# layer.input.shape, +# layer.previous_layer_ids[0] +# ).get_step() + + +def fetch_global_avgpooling_step(block_id, layer): + return GlobalAvgPooling2d( + block_id, + layer.data_format, + layer.previous_layer_ids[0] + ).get_step() + + +def fetch_flatten_step(block_id, layer): + return Flatten( + block_id, + layer.previous_layer_ids[0] + ).get_step() + + +def fetch_add_step(block_id, layer): + return Add( + block_id, + layer.previous_layer_ids + ).get_step() + + +def fetch_concatenate_step(block_id, layer): + return Concatenate( + block_id, + layer.previous_layer_ids + ).get_step() + + +def fetch_null_step(block_id): + return Null( + block_id, + ).get_step() diff --git a/axolotl/axolotl/algorithms/autokeras_integration/steps.py b/axolotl/axolotl/algorithms/autokeras_integration/steps.py new file mode 100644 index 0000000..a73f637 --- /dev/null +++ b/axolotl/axolotl/algorithms/autokeras_integration/steps.py @@ -0,0 +1,126 @@ +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import PrimitiveStep + +import d3m.primitives.data_preprocessing.image_reader +import d3m.primitives.data_transformation.denormalize +import d3m.primitives.data_transformation.dataset_to_dataframe +import d3m.primitives.data_transformation.construct_predictions +import d3m.primitives.data_transformation.extract_columns_by_semantic_types +import d3m.primitives.data_transformation.replace_semantic_types + +import d3m.primitives.loss_function.categorical_crossentropy +import d3m.primitives.loss_function.categorical_accuracy + +import d3m.primitives.learner.model +import d3m.primitives.data_wrangling.batching + +LOSS_SETUP_IDX = IP_STEP = OP_STEP = READER_STEP = -1 +BATCH_SIZE = 40 + + +def set_data(pipeline_description): + global IP_STEP, OP_STEP, READER_STEP + + # denormalize + denorm_step_idx = 0 + step = PrimitiveStep( + primitive_description=d3m.primitives.data_transformation.denormalize.Common.metadata.query()) + step.add_argument( + name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') + step.add_output('produce') + pipeline_description.add_step(step) + + # dataset_to_dataframe + dataset_to_dataframe_step_idx = len(pipeline_description.steps) + step = PrimitiveStep( + primitive_description=d3m.primitives.data_transformation.dataset_to_dataframe.Common.metadata.query()) + step.add_argument( + name='inputs', argument_type=ArgumentType.CONTAINER, + data_reference='steps.{}.produce'.format(denorm_step_idx)) + step.add_output('produce') + pipeline_description.add_step(step) + + # extract targets + extract_step_idx = len(pipeline_description.steps) + extract_targets = PrimitiveStep( + d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common.metadata.query()) + extract_targets.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, + data_reference='steps.{}.produce'.format(dataset_to_dataframe_step_idx)) + extract_targets.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) + extract_targets.add_output('produce') + pipeline_description.add_step(extract_targets) + + # replace semantic types + # Need to be used for CIFAR-10 + replace_step_idx = len(pipeline_description.steps) + replace_semantic = PrimitiveStep( + d3m.primitives.data_transformation.replace_semantic_types.Common.metadata.query()) + replace_semantic.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, + data_reference=f'steps.{extract_step_idx}.produce') + replace_semantic.add_hyperparameter(name='to_semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/SuggestedTarget', + 'https://metadata.datadrivendiscovery.org/types/TrueTarget']) + replace_semantic.add_hyperparameter(name='from_semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) + replace_semantic.add_output('produce') + pipeline_description.add_step(replace_semantic) + + # image reader + reader_step_idx = len(pipeline_description.steps) + reader = PrimitiveStep( + primitive_description=d3m.primitives.data_preprocessing.image_reader.Common.metadata.query()) + reader.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') + pipeline_description.add_step(reader) + + IP_STEP, OP_STEP, READER_STEP = dataset_to_dataframe_step_idx, replace_step_idx, reader_step_idx + + +def set_loss(pipeline_description): + global LOSS_SETUP_IDX + + LOSS_SETUP_IDX = len(pipeline_description.steps) + step = PrimitiveStep( + primitive_description=d3m.primitives.loss_function.categorical_crossentropy.KerasWrap.metadata.query()) + pipeline_description.add_step(step) + + +def set_learner(pipeline_description, batch_size=BATCH_SIZE): + learner_idx = len(pipeline_description.steps) + step = PrimitiveStep(primitive_description=d3m.primitives.learner.model.KerasWrap.metadata.query()) + step.add_hyperparameter(name='loss', argument_type=ArgumentType.PRIMITIVE, data=LOSS_SETUP_IDX) + step.add_hyperparameter(name='model_type', argument_type=ArgumentType.VALUE, data='classification') + step.add_hyperparameter(name='network_last_layer', argument_type=ArgumentType.PRIMITIVE, + data=learner_idx - 1) + step.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='replace') + lr = 0.0001 + adam_hypers = d3m.primitives.learner.model.KerasWrap.metadata.get_hyperparams().defaults(path='optimizer.Adam') + adam_hypers = adam_hypers.replace({'lr': lr}) + step.add_hyperparameter(name='optimizer', argument_type=ArgumentType.VALUE, data=adam_hypers) + pipeline_description.add_step(step) + + bz_loader = PrimitiveStep(primitive_description=d3m.primitives.data_wrangling.batching.TAMU.metadata.query()) + bz_loader.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, + data_reference=f'steps.{IP_STEP}.produce') + bz_loader.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, + data_reference='steps.{}.produce'.format(OP_STEP)) + bz_loader.add_hyperparameter(name='primitive_reader', argument_type=ArgumentType.PRIMITIVE, data=READER_STEP) + bz_loader.add_hyperparameter(name='primitive_learner', argument_type=ArgumentType.PRIMITIVE, data=learner_idx) + bz_loader.add_hyperparameter(name='batch_size', argument_type=ArgumentType.VALUE, data=batch_size) + bz_loader.add_hyperparameter(name='sampling_method', argument_type=ArgumentType.VALUE, data='random') + bz_loader.add_output('produce') + + pipeline_description.add_step(bz_loader) + + +def set_prediction(pipeline_description): + pred = PrimitiveStep( + primitive_description=d3m.primitives.data_transformation.construct_predictions.Common.metadata.query()) + pred.add_argument( + name='inputs', argument_type=ArgumentType.CONTAINER, + data_reference=f"steps.{len(pipeline_description.steps) - 1}.produce" + ) + pred.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, + data_reference='steps.{}.produce'.format(IP_STEP)) + pred.add_output('produce') + pipeline_description.add_step(pred) diff --git a/axolotl/axolotl/algorithms/autokeras_search.py b/axolotl/axolotl/algorithms/autokeras_search.py new file mode 100644 index 0000000..faf6c50 --- /dev/null +++ b/axolotl/axolotl/algorithms/autokeras_search.py @@ -0,0 +1,145 @@ +import logging +import numpy as np + +import autokeras as ak +from d3m import exceptions, index, container +from d3m.metadata import base as metadata_base + +from axolotl.algorithms.autokeras_integration import keras2pipeline +from axolotl.algorithms.base import PipelineSearchBase +from axolotl.utils.pipeline import PipelineResult + +logger = logging.getLogger(__name__) + + +class AutoKerasSearch(PipelineSearchBase): + + def __init__(self, problem_description, backend, + max_trials=10000, directory='.', epochs=1, batch_size=32, validation_split=0.2): + super(AutoKerasSearch, self).__init__(problem_description, backend, ranking_function=None) + + self.clf = ak.ImageClassifier(max_trials=max_trials, seed=self.random_seed, directory=directory) + self.tuner = self.clf.tuner + self.epochs = epochs + self.batch_size = batch_size + self.validation_split = validation_split + + def search_fit(self, input_data, time_limit=300, *, expose_values=False): + dataframe = self.get_dataframe(input_data) + y = self.get_y(dataframe) + x = self.get_x(dataframe) + + self.clf.fit(x=x, y=y, epochs=self.epochs, batch_size=self.batch_size, + validation_split=self.validation_split) + keras_model = self.clf.export_model() + best_pipeline = keras2pipeline(keras_model, batch_size=self.batch_size) + + fitted_pipeline_result = self.backend.fit_pipeline( + problem_description=self.problem_description, pipeline=best_pipeline, + input_data=input_data, expose_outputs=expose_values + ) + + if fitted_pipeline_result.error is not None: + logging.error('No solution founded') + pipeline_result = PipelineResult(pipeline=best_pipeline) + pipeline_result.error = RuntimeError("No solution found") + return pipeline_result + + self.best_fitted_pipeline_id = fitted_pipeline_result.fitted_pipeline_id + return fitted_pipeline_result + + def mark_columns(self, dataset): + problem_inputs = self.problem_description['inputs'] + for problem_input in problem_inputs: + for target in problem_input.get('targets', []): + if target['resource_id'] not in dataset: + raise exceptions.NotFoundError( + "Error marking target column: dataset does not contain resource with resource ID '{resource_id}'.".format( + resource_id=target['resource_id'], + ), + ) + if not isinstance(dataset[target['resource_id']], container.DataFrame): + raise TypeError( + "Error marking target column: resource '{resource_id}' is not a DataFrame.".format( + resource_id=target['resource_id'], + ), + ) + if not 0 <= target['column_index'] < dataset[target['resource_id']].shape[1]: + raise ValueError( + "Error marking target column: resource '{resource_id}' does not have a column with index '{column_index}'.".format( + resource_id=target['resource_id'], + column_index=target['column_index'], + ), + ) + + dataset.metadata = dataset.metadata.add_semantic_type( + (target['resource_id'], metadata_base.ALL_ELEMENTS, target['column_index']), + 'https://metadata.datadrivendiscovery.org/types/Target', + ) + dataset.metadata = dataset.metadata.add_semantic_type( + (target['resource_id'], metadata_base.ALL_ELEMENTS, target['column_index']), + 'https://metadata.datadrivendiscovery.org/types/TrueTarget', + ) + # If column is marked as a target, it cannot be attribute as well. + # This allows one to define in problem description otherwise attribute columns as targets. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/265 + dataset.metadata = dataset.metadata.remove_semantic_type( + (target['resource_id'], metadata_base.ALL_ELEMENTS, target['column_index']), + 'https://metadata.datadrivendiscovery.org/types/Attribute', + ) + return dataset + + def get_dataframe(self, input_data): + # denormalize + denormalize = index.get_primitive('d3m.primitives.data_transformation.denormalize.Common') + hyperparams_class = denormalize.metadata.get_hyperparams() + primitive = denormalize(hyperparams=hyperparams_class.defaults()) + dataset = primitive.produce(inputs=input_data[0]).value + + # Add Target column into dataset + dataset = self.mark_columns(dataset) + + # dataset to dataframe + dataset_dataframe = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common') + hyperparams_class = dataset_dataframe.metadata.get_hyperparams() + primitive = dataset_dataframe(hyperparams=hyperparams_class.defaults()) + dataframe = primitive.produce(inputs=dataset).value + + return dataframe + + def get_y(self, dataframe): + # extract targets + get_columns_semantic = index.get_primitive( + 'd3m.primitives.data_transformation.extract_columns_by_semantic_types.Common') + hyperparams_class = get_columns_semantic.metadata.get_hyperparams() + primitive = get_columns_semantic( + hyperparams=hyperparams_class.defaults().replace( + { + 'semantic_types': ( + 'https://metadata.datadrivendiscovery.org/types/TrueTarget', + 'https://metadata.datadrivendiscovery.org/types/Target', + 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', + 'https://metadata.datadrivendiscovery.org/types/PredictedTarget' + ) + } + ) + ) + targets = primitive.produce(inputs=dataframe).value + y = np.array(targets, dtype=np.int64) + return y + + def get_x(self, dataframe): + # reading images + image_reader = index.get_primitive('d3m.primitives.data_preprocessing.image_reader.Common') + hyperparams_class = image_reader.metadata.get_hyperparams() + primitive = image_reader(hyperparams=hyperparams_class.defaults().replace( + {'return_result': 'replace'}) + ) + columns_to_use = primitive._get_columns(dataframe.metadata) + column_index = columns_to_use[0] + temp = [ + primitive._read_filename(column_index, dataframe.metadata.query((row_index, column_index)), value) + for row_index, value in enumerate(dataframe.iloc[:, column_index]) + ] + x = np.array(temp, dtype=np.float64) + return x diff --git a/axolotl/axolotl/algorithms/base.py b/axolotl/axolotl/algorithms/base.py new file mode 100644 index 0000000..2ccea22 --- /dev/null +++ b/axolotl/axolotl/algorithms/base.py @@ -0,0 +1,241 @@ +import abc +import uuid +import logging +import time +import typing + +from d3m.metadata.problem import Problem +from d3m.metadata.pipeline import Pipeline +from d3m import runtime as runtime_module +from d3m import container +from d3m.metadata.base import Context +from d3m import utils as d3m_utils +from d3m.metadata import pipeline_run as pipeline_run_module + +from axolotl.backend.base import RunnerBase +from axolotl.utils.pipeline import PipelineResult +from axolotl.utils.schemas import ContainerType +from axolotl.utils import resources as resources_module + +logger = logging.getLogger(__name__) + + +class PipelineSearchBase: + """ + Base class for pipeline searcher, this class should provide the common interface for pipeline + searchers to be integrated with the system. + + Nothing should be computed or initialized on the constructor, just adding more variables. + Everything else should be computed at start_search. + + Parameters + ---------- + problem_description : Problem + A problem description. + backend : RunnerBase + An instance of a backend class. + primitives_blocklist : typing.Sequence[str] + A list of string with pipeline names to avoid. + ranking_function : typing.Callable + A function that takes as an input a dataframe of scores, and generates a rank, smaller is better + + + Attributes + ---------- + backend : RunnerBase + An instance of a backend class. + random_seed : int + Random seed passed to the constructor. + volumes_dir : str + Path to a directory with static files required by primitives. + scratch_dir : str + Path to a directory to store any temporary files needed during execution. + ranking_function : typing.Callable + A function that takes as an input a dataframe of scores, and generates a rank, smaller is better + problem_description : Problem + A problem description. + primitives_blocklist : typing.Sequence[str] + A list of string with pipeline names to avoid. + + history : typing.Dict[str, PipelineResult] + A list of all the evaluated pipelines with their execution results and performance. + """ + + def __init__(self, + problem_description: Problem, backend: RunnerBase, *, + primitives_blocklist: typing.Sequence[str] = None, ranking_function: typing.Callable = None + ) -> None: + self.search_id = str(uuid.uuid4()) + self.backend = backend + self.random_seed = backend.random_seed + self.volumes_dir = backend.volumes_dir + self.scratch_dir = backend.scratch_dir + self.ranking_function = ranking_function + + self.problem_description: Problem = problem_description + self.primitives_blocklist: typing.Sequence[str] = primitives_blocklist + + self.history: typing.List[PipelineResult] = [] + + # missing typing + self.best_fitted_pipeline_id: str = None + self.input_data: typing.Sequence[ContainerType] = None + + with d3m_utils.silence(): + self.runtime_environment = pipeline_run_module.RuntimeEnvironment() + + def search(self, time_limit: float): + """ + This method executes the whole search, by calling the ``_search`` method multiple times + as long as there is time left and put the results on the history. + + Parameters + ---------- + time_limit : float + Time limit for the search + """ + time_start = time.time() + largest_iteration = 0 + + i = 0 + + while True: + i += 1 + time_left = time_limit - (time.time() - time_start) + + if time_left < 5: + logger.info('-- Time out --') + break + + if time_left - largest_iteration < 5: + logger.info("""-- Time out -- \n Time left {} Next iteration could be over {}""".format(time_left, largest_iteration)) + break + + start_iteration_time = time.time() + results = self._search(time_left=time_left) + self.history += results + current_iteration_time = time.time() - start_iteration_time + + if largest_iteration < current_iteration_time: + largest_iteration = current_iteration_time + + def search_fit(self, input_data: typing.Sequence[ContainerType], time_limit: float = 300, *, + expose_values: bool = False) -> typing.Tuple[runtime_module.Runtime, PipelineResult]: + """ + This method calls search and fit the best ranking pipelines located from the search located on the history. + + Parameters + ---------- + input_data : typing.Sequence[ContainerType] + A list of D3M containers to be use as the pipeline input. + + time_limit : float + The time limit to be use for the search. + + expose_values : bool + A flag that allows the user expose all intermediate result of the pipeline during fitting. + """ + self.input_data = input_data + self.search(time_limit) + + best_pipeline = None + for pipeline_result in self.history: + if pipeline_result.error is None: + if best_pipeline is None: + best_pipeline = pipeline_result + else: + if pipeline_result.rank < best_pipeline.rank: + best_pipeline = pipeline_result + + if best_pipeline is None: + logging.error('No solution founded') + pipeline_result = PipelineResult(fitted_pipeline_id='') + pipeline_result.error = RuntimeError("No solution found") + return _, pipeline_result + + return self.fit(best_pipeline.pipeline, input_data, expose_values) + + def fit(self, pipeline: Pipeline, input_data: typing.Sequence[container.Dataset], + expose_outputs: bool = False) -> typing.Tuple[runtime_module.Runtime, PipelineResult]: + + pipeline_result = PipelineResult(pipeline=pipeline) + + runtime, output, result = runtime_module.fit( + pipeline=pipeline, inputs=input_data, problem_description=self.problem_description, context=Context.TESTING, + hyperparams=None, random_seed=self.random_seed, volumes_dir=self.volumes_dir, + runtime_environment=self.runtime_environment, scratch_dir=self.scratch_dir, expose_produced_outputs=expose_outputs + ) + if result.has_error(): + pipeline_result.status = "ERRORED" + pipeline_result.error = result.error + else: + pipeline_result.status = "COMPLETED" + + pipeline_result.exposed_outputs = result.values + pipeline_result.output = output + + return runtime, pipeline_result + + def produce(self, fitted_pipeline: runtime_module.Runtime, input_data: typing.Sequence[container.Dataset], + expose_outputs: bool = False) -> PipelineResult: + pipeline_result = PipelineResult(fitted_pipeline_id='') + + with d3m_utils.silence(): + output, result = runtime_module.produce( + fitted_pipeline=fitted_pipeline, test_inputs=input_data, + expose_produced_outputs=expose_outputs + ) + + if result.has_error(): + pipeline_result.status = "ERRORED" + pipeline_result.error = result.error + else: + pipeline_result.status = "COMPLETED" + + pipeline_result.exposed_outputs = result.values + pipeline_result.output = output + return pipeline_result + + @abc.abstractmethod + def _search(self, time_left: float) -> typing.Sequence[PipelineResult]: + """ + A method where the search is going to be implemented. + The search algorithm should be iteration oriented, each of the call should end + on returning the status of pipelines evaluated. + + Parameters + ---------- + time_left : float + TTime left for the iteration + + Returns + ------- + typing.Sequence[PipelineResult] + A list of pipeline results with the information of the pipeline ran during the iteration. + + """ + + def pretty_print(self, deep: bool = False): + """ + A function that prints everything really nice. + """ + from pprint import pprint + + def simplify_value(input_value): + if isinstance(input_value, Problem): + return input_value.to_simple_structure() + elif isinstance(input_value, Pipeline): + return input_value.to_json_structure() + elif isinstance(input_value, PipelineResult): + return vars(input_value) + elif isinstance(input_value, dict): + new_value = {} + for nested_variable, nested_val in input_value.items(): + new_value[nested_variable] = simplify_value(nested_val) + return new_value + + class_instance = vars(self) + if deep: + class_instance = simplify_value(class_instance) + + pprint(class_instance) diff --git a/axolotl/axolotl/algorithms/bayesian_search.py b/axolotl/axolotl/algorithms/bayesian_search.py new file mode 100644 index 0000000..9b91db1 --- /dev/null +++ b/axolotl/axolotl/algorithms/bayesian_search.py @@ -0,0 +1,27 @@ +import enum + +from axolotl.algorithms.tuners.bayesian_oracle import BayesianOptimizationOracle +from axolotl.algorithms.tuners.tunable_base import TunableBase + + +class BayesianSearch(TunableBase): + def __init__(self, problem_description, backend, primitives_blocklist=None, + max_trials=10000, directory='.', num_initial_points=None, num_eval_trials=None): + super(BayesianSearch, self).__init__(problem_description, backend, + primitives_blocklist=primitives_blocklist, num_eval_trials=num_eval_trials) + self.directory = directory + self.project_name = 'random_search' + + self.objective = self.problem_description['problem']['performance_metrics'][0]['metric'] + if isinstance(self.objective, enum.Enum): + self.objective = self.objective.name + + self.oracle = BayesianOptimizationOracle( + objective=self.objective, + max_trials=max_trials, # pre-defined number, + seed=self.random_seed, # seed + hyperparameters=self.hyperparameters, + num_initial_points=num_initial_points, + ) + self.oracle._set_project_dir( + self.directory, self.project_name, overwrite=True) diff --git a/axolotl/axolotl/algorithms/data_driven_search.py b/axolotl/axolotl/algorithms/data_driven_search.py new file mode 100644 index 0000000..e1687cb --- /dev/null +++ b/axolotl/axolotl/algorithms/data_driven_search.py @@ -0,0 +1,1086 @@ +import copy +import uuid +import numpy + +from d3m.metadata.pipeline import Pipeline, PrimitiveStep, Resolver +from d3m import index +from d3m import runtime as runtime_module +from d3m import utils as d3m_utils + +from axolotl.algorithms.base import PipelineSearchBase +from axolotl.utils import schemas as schemas_utils, pipeline as pipeline_utils +from d3m.metadata.base import ArgumentType, ALL_ELEMENTS +from axolotl.algorithms.dummy import dummy_ranking_function +from axolotl.algorithms.bayesian_search import BayesianSearch +import multiprocessing + +PREP_PRIMITIVES = { + 'Denormalize': 'd3m.primitives.data_transformation.denormalize.Common', + 'DatasetToDataFrame': 'd3m.primitives.data_transformation.dataset_to_dataframe.Common', + 'ColumnParser': 'd3m.primitives.data_transformation.column_parser.Common', + 'ExtractColumnsBySemanticTypes': 'd3m.primitives.data_transformation.extract_columns_by_semantic_types.Common', + 'Imputer': 'd3m.primitives.data_cleaning.imputer.SKlearn', + 'SimpleProfiler': 'd3m.primitives.schema_discovery.profiler.Common', + 'ReplaceSemanticTypes': 'd3m.primitives.data_transformation.replace_semantic_types.Common', + 'DropColumns': 'd3m.primitives.data_transformation.remove_columns.Common', + 'OneHotMaker': 'd3m.primitives.data_preprocessing.one_hot_encoder.MakerCommon', + 'ExtractColumns': 'd3m.primitives.data_transformation.extract_columns.Common', + 'GeneralHorizontalConcat': 'd3m.primitives.data_transformation.horizontal_concat.TAMU', + 'Imputer': 'd3m.primitives.data_cleaning.imputer.SKlearn', + 'FeatureSelection': 'd3m.primitives.feature_selection.select_fwe.SKlearn', + 'ConstructPredictions': 'd3m.primitives.data_transformation.construct_predictions.Common', + 'OrdinalEncoder': 'd3m.primitives.data_transformation.ordinal_encoder.SKlearn', + 'RobustScale': 'd3m.primitives.data_preprocessing.robust_scaler.SKlearn', + 'TimeSeriesToList': 'd3m.primitives.data_preprocessing.time_series_to_list.DSBOX', + 'TimeSeriesFeaturization': 'd3m.primitives.feature_extraction.random_projection_timeseries_featurization.DSBOX', + 'TextReader': 'd3m.primitives.data_preprocessing.text_reader.Common', + 'TextEncoder': 'd3m.primitives.data_transformation.encoder.DistilTextEncoder', + 'AddSemanticTypes': 'd3m.primitives.data_transformation.add_semantic_types.Common', + 'SemiClassification': 'd3m.primitives.semisupervised_classification.iterative_labeling.AutonBox' +} + +LOADED_PRIMITIVES = {} + +DATA_TYPES = { + 'attribute': 'https://metadata.datadrivendiscovery.org/types/Attribute', + 'target': 'https://metadata.datadrivendiscovery.org/types/Target', + 'suggested_target': 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', + 'true_target': 'https://metadata.datadrivendiscovery.org/types/TrueTarget', + 'float': 'http://schema.org/Float', + 'int': 'http://schema.org/Integer', + 'unknown_type': 'https://metadata.datadrivendiscovery.org/types/UnknownType', + 'categorical': 'https://metadata.datadrivendiscovery.org/types/CategoricalData', + 'text': 'http://schema.org/Text', + 'bool': 'http://schema.org/Boolean', + 'file': 'https://metadata.datadrivendiscovery.org/types/FileName', + 'time_series': 'https://metadata.datadrivendiscovery.org/types/Timeseries', + 'date': 'http://schema.org/DateTime', + 'time': 'https://metadata.datadrivendiscovery.org/types/Time' +} + +with d3m_utils.silence(): + for key, value in PREP_PRIMITIVES.items(): + LOADED_PRIMITIVES[key] = index.get_primitive(value) + + +def get_semantic_types(input_dataframe): + semantic_types = [] + for i in range(input_dataframe.metadata.query((ALL_ELEMENTS,))['dimension']['length']): + semantic_types.append(input_dataframe.metadata.query((ALL_ELEMENTS, i,))['semantic_types']) + return semantic_types + + +def get_indexes_by_semantic_type(input_dataframe, semantic_type): + semantic_types = get_semantic_types(input_dataframe) + indexes = [] + for i in range(len(semantic_types)): + if semantic_type in semantic_types[i]: + indexes.append(i) + return indexes + + +def get_index_data_to_profile(input_dataframe): + indexes_to_profile = [] + for i in range(input_dataframe.metadata.query((ALL_ELEMENTS,))['dimension']['length']): + if DATA_TYPES['unknown_type'] in input_dataframe.metadata.query((ALL_ELEMENTS, i,))['semantic_types'] and \ + input_dataframe.metadata.query((ALL_ELEMENTS, i,))['structural_type'] == str: + indexes_to_profile.append(i) + return indexes_to_profile + + +def run_primitive(primitive, arguments, hyperparams=()): + # TODO add static support for static file + _hyperparams = primitive.metadata.get_hyperparams().defaults() + hp_to_update = {} + for hyperparam in hyperparams: + name, argument_type, data = hyperparam + hp_to_update[name] = data + _hyperparams = _hyperparams.replace(hp_to_update) + primitive_instance = primitive(hyperparams=_hyperparams) + use_set_training_data = pipeline_utils.query_multiple_terms( + primitive.metadata, ['primitive_code', 'instance_methods', 'set_training_data', 'arguments']) + if use_set_training_data is not None and use_set_training_data: + primitive_instance.set_training_data(**arguments) + primitive_instance.fit() + + produce_arguments = pipeline_utils.query_multiple_terms( + primitive.metadata, ['primitive_code', 'instance_methods', 'produce', 'arguments']) + + arguments_keys = list(arguments.keys()) + for argument in arguments_keys: + if argument not in produce_arguments: + print('removing argument', argument) + del arguments[argument] + return primitive_instance.produce(**arguments).value + + +def add_primitive_step_to_pipeline(pipeline, primitive, arguments=(), hyperparams=(), resolver=Resolver()): + step = PrimitiveStep(primitive=primitive, resolver=resolver) + for argument in arguments: + name, argument_type, data_reference = argument + step.add_argument(name=name, argument_type=argument_type, data_reference=data_reference) + for hyperparam in hyperparams: + name, argument_type, data = hyperparam + step.add_hyperparameter(name=name, argument_type=argument_type, data=data) + step.add_output('produce') + pipeline.add_step(step) + + +def fix_arguments(arguments): + _arguments = [] + for name, reference in arguments.items(): + _arguments.append((name, ArgumentType.CONTAINER, reference)) + return _arguments + + +def prepare_arguments(available_data, arguments): + _arguments = {} + for name, reference in arguments.items(): + if isinstance(reference, list): + _arguments[name] = [] + for elem in reference: + _arguments[name].append(available_data[elem]) + else: + _arguments[name] = available_data[reference] + return _arguments + + +def shrink_dataset(dataset, n_rows=10000): + if 'learningData' not in dataset or len(dataset.keys()) > 1 or len(dataset['learningData']) <= n_rows: + return dataset + + print('=' * 100) + print('Shrinking dataset from {} to {}'.format(len(dataset['learningData']), n_rows)) + df = dataset['learningData'].sample(n=n_rows) + df['d3mIndex'] = df['d3mIndex'].apply(lambda x: int(x)) + df = df.sort_values(by=['d3mIndex']) + df['d3mIndex'] = df['d3mIndex'].apply(lambda x: str(x)) + df = df.reset_index(drop=True) + + dataset['learningData'] = df + metadata = dataset.metadata + + metadata = metadata.update(('learningData',), { + 'structural_type': metadata.query(('learningData',))['structural_type'], + 'semantic_types': [ + 'https://metadata.datadrivendiscovery.org/types/Table', + 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint', + ], + 'dimension': { + 'name': 'rows', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], + 'length': n_rows, + }, + }) + + dataset.metadata = metadata + return dataset + + +def get_primitives_by_family(family_type): + pass + + +def index_to_operate(input_data, data_type, exclude_targets): + indexes = [] + semantic_types = get_semantic_types(input_data) + for i in range(len(semantic_types)): + if data_type in semantic_types[i]: + if DATA_TYPES['target'] in semantic_types[i]: + if not exclude_targets: + indexes.append(i) + else: + indexes.append(i) + return indexes + + +DEFAULT_HYPERPARAMS = { + 'ColumnParser': [ + ('parse_semantic_types', ArgumentType.VALUE, + ['http://schema.org/Integer', 'http://schema.org/Float', + 'https://metadata.datadrivendiscovery.org/types/FloatVector', 'http://schema.org/DateTime'] + ) + ], + 'SimpleProfiler': [ + ('return_result', ArgumentType.VALUE, 'replace'), + ('categorical_max_absolute_distinct_values', ArgumentType.VALUE, None), + ('categorical_max_ratio_distinct_values', ArgumentType.VALUE, 0.20) + ], + 'ReplaceSemanticTypes': [ + ('return_result', ArgumentType.VALUE, 'replace'), + ('from_semantic_types', ArgumentType.VALUE, [DATA_TYPES['unknown_type']]), + ('to_semantic_types', ArgumentType.VALUE, [DATA_TYPES['categorical']]) + ], + 'OneHotMaker': [ + ('return_result', ArgumentType.VALUE, 'replace'), + ('encode_target_columns', ArgumentType.VALUE, True), + ('handle_unseen', ArgumentType.VALUE, 'column'), + ('handle_missing_value', ArgumentType.VALUE, 'column') + ], + "Imputer": [ + ('return_result', ArgumentType.VALUE, 'replace'), + ('use_semantic_types', ArgumentType.VALUE, True), + ], + 'OrdinalEncoder': [ + ('return_result', ArgumentType.VALUE, 'replace'), + ('use_semantic_types', ArgumentType.VALUE, True), + ], + 'RobustScale': [ + ('return_result', ArgumentType.VALUE, 'replace'), + ('use_semantic_types', ArgumentType.VALUE, True), + ] +} + + +class PrimitiveHandler: + def __init__(self, primitive, hyperparams=[], resolver=Resolver()): + self.primitive = primitive + self.hyperparams = hyperparams + self.resolver = resolver + + def add_produce(self, available_data, pipeline, arguments, indexes=[]): + _arguments = fix_arguments(arguments) + + hyperparams = self.hyperparams + if indexes and 'use_columns' in self.primitive.metadata.get_hyperparams().defaults(): + hyperparams = self.hyperparams + [('use_columns', ArgumentType.VALUE, indexes)] + add_primitive_step_to_pipeline(pipeline, self.primitive, _arguments, hyperparams, resolver=self.resolver) + output = run_primitive(self.primitive, prepare_arguments(available_data, arguments), hyperparams) + current_data_ref = 'steps.{}.produce'.format(len(pipeline.steps) - 1) + available_data[current_data_ref] = output + return current_data_ref + + def run_primitive(self, arguments, hyperparams=[], indexes=[]): + _hyperparams = self.hyperparams + if hyperparams: + _hyperparams = self.hyperparams + hyperparams + _hyperparams = _hyperparams if not indexes else _hyperparams + [('use_columns', ArgumentType.VALUE, indexes)] + return run_primitive(self.primitive, arguments, hyperparams) + + +class FileHandler: + + def __init__(self, resolver=Resolver()): + self.use_colummns = True + self.resolver = resolver + self.exclude_targets = None + self.problem_description = None + self.task_description = None + + def add_produce(self, available_data, pipeline, arguments, indexes=[]): + last_valid_data_ref = arguments['inputs'] + origindal_data_ref = last_valid_data_ref + current_data_ref = self.add_output_time_series(available_data, pipeline, arguments, indexes=[]) + + if current_data_ref is not None: + arguments = {'inputs': current_data_ref} + last_valid_data_ref = current_data_ref + + current_data_ref = self.add_output_text(available_data, pipeline, arguments, indexes=[]) + + if current_data_ref is not None: + arguments = {'inputs': current_data_ref} + last_valid_data_ref = current_data_ref + + if last_valid_data_ref == origindal_data_ref: + last_valid_data_ref = None + + return True, last_valid_data_ref + + def add_output_time_series(self, available_data, pipeline, arguments, indexes=[]): + initial_ref = arguments['inputs'] + semantic_types = get_semantic_types(available_data[initial_ref]) + indexes_to_remove = [] + for i, _type in enumerate(semantic_types): + if DATA_TYPES['file'] in _type and DATA_TYPES['time_series'] in _type: + indexes_to_remove.append(i) + if not indexes_to_remove: + return + print('File TimeSeriesHandler') + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['TimeSeriesToList'], resolver=self.resolver) + current_data_ref = primitive_handler.add_produce(available_data, pipeline, arguments) + + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['TimeSeriesFeaturization'], resolver=self.resolver) + current_data_ref_to_concat = primitive_handler.add_produce(available_data, pipeline, {'inputs': current_data_ref}) + + drop_hp = [('columns', ArgumentType.VALUE, indexes_to_remove)] + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['DropColumns'], drop_hp, resolver=self.resolver) + current_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': initial_ref}) + + data_refs_to_concat = [current_data_ref, current_data_ref_to_concat] + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['GeneralHorizontalConcat'], resolver=self.resolver) + last_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': data_refs_to_concat}) + return last_data_ref + + def add_output_text(self, available_data, pipeline, arguments, indexes=[]): + initial_ref = arguments['inputs'] + semantic_types = get_semantic_types(available_data[initial_ref]) + indexes_to_remove = [] + for i, _type in enumerate(semantic_types): + if DATA_TYPES['file'] in _type and DATA_TYPES['text'] in _type: + indexes_to_remove.append(i) + if not indexes_to_remove: + return + + print('File TextReader Handler') + text_rd_hp = [('return_result', ArgumentType.VALUE, 'replace')] + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['TextReader'], text_rd_hp, resolver=self.resolver) + text_data_ref = primitive_handler.add_produce(available_data, pipeline, arguments) + + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['ExtractColumnsBySemanticTypes'], + [('semantic_types', ArgumentType.VALUE, [DATA_TYPES['attribute']])], + self.resolver) + attributes_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': text_data_ref}) + + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['ExtractColumnsBySemanticTypes'], + [('semantic_types', ArgumentType.VALUE, [DATA_TYPES['target']])], + self.resolver) + target_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': text_data_ref}) + + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['TextEncoder'], + [('encoder_type', ArgumentType.VALUE, 'tfidf')], + self.resolver) + current_data_ref = primitive_handler.add_produce( + available_data, pipeline, {'inputs': attributes_data_ref, 'outputs': target_data_ref}) + + no_semantic_types = [] + for i in range(available_data[current_data_ref].metadata.query((ALL_ELEMENTS,))['dimension']['length']): + if 'semantic_types' not in available_data[current_data_ref].metadata.query((ALL_ELEMENTS, i,)) and \ + available_data[current_data_ref].metadata.query((ALL_ELEMENTS, i,))['structural_type'] == numpy.float64: + no_semantic_types.append(i) + + add_semantic_hp = [('columns', ArgumentType.VALUE, no_semantic_types), + ('semantic_types', ArgumentType.VALUE, [DATA_TYPES['float'], DATA_TYPES['attribute']])] + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['AddSemanticTypes'], add_semantic_hp, resolver=self.resolver) + current_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': current_data_ref}) + + data_refs_to_concat = [target_data_ref, current_data_ref] + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['GeneralHorizontalConcat'], resolver=self.resolver) + last_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': data_refs_to_concat}) + + return last_data_ref + + +class CategoricalHandler: + def __init__(self, resolver=Resolver()): + self.use_colummns = True + self.resolver = resolver + self.exclude_targets = None + self.problem_description = None + self.task_description = None + + def _get_criteria(self, input_data, indexes=[]): + index_to_ordinal = [] + index_to_drop = [] + index_to_one_hot = [] + + total_n_values = len(input_data) + for _index in indexes: + n_categories = len(input_data.iloc[:, _index].unique()) + categories_ratio = n_categories/total_n_values + if categories_ratio >= 0.8: + index_to_drop.append(_index) + else: + if n_categories <= 10: + index_to_one_hot.append(_index) + else: + if n_categories <= 100 and not input_data.iloc[:, _index].isnull().values.any(): + index_to_ordinal.append(_index) + else: + index_to_drop.append(_index) + return index_to_ordinal, index_to_one_hot, index_to_drop + + def add_produce(self, available_data, pipeline, arguments, indexes=[]): + index_to_ordinal, index_to_one_hot, index_to_drop = self._get_criteria( + available_data[arguments['inputs']], indexes) + _arguments = fix_arguments(arguments) + current_data_ref = arguments['inputs'] + + index_to_drop += index_to_ordinal + + if index_to_drop: + print('Drop columns', index_to_drop) + drop_hp = [('columns', ArgumentType.VALUE, index_to_drop)] + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['DropColumns'], drop_hp, resolver=self.resolver) + current_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': current_data_ref}) + + if index_to_one_hot: + new_indexes = index_to_operate(available_data[current_data_ref], DATA_TYPES['categorical'], self.exclude_targets) + _, index_to_one_hot, _ = self._get_criteria(available_data[current_data_ref], new_indexes) + print('OneHot', index_to_one_hot) + + one_hot_hp = DEFAULT_HYPERPARAMS['OneHotMaker'] + [('use_columns', ArgumentType.VALUE, index_to_one_hot)] + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['OneHotMaker'], one_hot_hp, resolver=self.resolver) + current_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': current_data_ref}) + + # if index_to_ordinal: + # new_indexes = index_to_operate(available_data[current_data_ref], DATA_TYPES['categorical'], + # self.exclude_targets) + # index_to_ordinal, _, _ = self._get_criteria(available_data[current_data_ref], new_indexes) + # primitive = LOADED_PRIMITIVES['OrdinalEncoder'] + # ordinal_hp = DEFAULT_HYPERPARAMS['OrdinalEncoder'] + [('use_columns', ArgumentType.VALUE, index_to_ordinal)] + # add_primitive_step_to_pipeline(pipeline, primitive, _arguments, ordinal_hp, resolver=self.resolver) + # output = run_primitive(primitive, prepare_arguments(available_data, arguments), ordinal_hp) + # current_data_ref = 'steps.{}.produce'.format(len(pipeline.steps) - 1) + # available_data[current_data_ref] = output + # arguments = {'inputs': current_data_ref} + # _arguments = fix_arguments(arguments) + # + # cat_indexes = get_indexes_by_semantic_type(available_data[current_data_ref], DATA_TYPES['categorical']) + # index_to_fix = [] + # for _index in cat_indexes: + # if available_data[current_data_ref].metadata.query((ALL_ELEMENTS, _index,))['structural_type'] == numpy.float64: + # index_to_fix.append(_index) + # + # if index_to_fix: + # primitive = LOADED_PRIMITIVES['ReplaceSemanticTypes'] + # replace_sem_hp = [ + # ('return_result', ArgumentType.VALUE, 'replace'), + # ('from_semantic_types', ArgumentType.VALUE, [DATA_TYPES['categorical']]), + # ('to_semantic_types', ArgumentType.VALUE, [DATA_TYPES['float']]), + # ('use_columns', ArgumentType.VALUE, index_to_fix) + # ] + # add_primitive_step_to_pipeline(pipeline, primitive, _arguments, replace_sem_hp, resolver=self.resolver) + # output = run_primitive(primitive, prepare_arguments(available_data, arguments), replace_sem_hp) + # current_data_ref = 'steps.{}.produce'.format(len(pipeline.steps) - 1) + # available_data[current_data_ref] = output + return True, current_data_ref + + +class BooleanHandler: + def __init__(self, resolver=Resolver()): + self.use_colummns = True + self.resolver = resolver + self.exclude_targets = None + self.problem_description = None + self.task_description = None + + def add_produce(self, available_data, pipeline, arguments, indexes=[]): + indexes = index_to_operate(available_data[arguments['inputs']], DATA_TYPES['bool'], self.exclude_targets) + if not indexes: + print("Skipping Boolean no columns to operate") + return True, None + + + replace_sem_hp = [ + ('return_result', ArgumentType.VALUE, 'replace'), + ('from_semantic_types', ArgumentType.VALUE, [DATA_TYPES['bool']]), + ('to_semantic_types', ArgumentType.VALUE, [DATA_TYPES['categorical']]), + ('use_columns', ArgumentType.VALUE, indexes) + ] + primitive_handler = PrimitiveHandler( + LOADED_PRIMITIVES['ReplaceSemanticTypes'], replace_sem_hp, resolver=self.resolver) + current_data_ref = primitive_handler.add_produce(available_data, pipeline, arguments) + + one_hot_hp = [ + ('return_result', ArgumentType.VALUE, 'replace'), + ('encode_target_columns', ArgumentType.VALUE, True), + ('handle_missing_value', ArgumentType.VALUE, 'column'), + ('use_columns', ArgumentType.VALUE, indexes) + ] + primitive_handler = PrimitiveHandler( + LOADED_PRIMITIVES['OneHotMaker'], one_hot_hp, resolver=self.resolver) + current_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': current_data_ref}) + return True, current_data_ref + + +class DateHandler: + def __init__(self, resolver=Resolver()): + self.use_colummns = True + self.resolver = resolver + self.exclude_targets = None + self.problem_description = None + self.task_description = None + + def add_produce(self, available_data, pipeline, arguments, indexes=[]): + indexes = [] + semantic_types = get_semantic_types(available_data[arguments['inputs']]) + for i in range(len(semantic_types)): + if DATA_TYPES['date'] in semantic_types[i] and DATA_TYPES['time'] in semantic_types[i]: + if DATA_TYPES['target'] in semantic_types[i]: + if not self.exclude_targets: + indexes.append(i) + else: + indexes.append(i) + + if not indexes: + print("Skipping Boolean no columns to operate") + return True, None + + replace_sem_hp = [ + ('return_result', ArgumentType.VALUE, 'replace'), + ('from_semantic_types', ArgumentType.VALUE, [DATA_TYPES['date'], DATA_TYPES['time']]), + ('to_semantic_types', ArgumentType.VALUE, [DATA_TYPES['float']]), + ('use_columns', ArgumentType.VALUE, indexes) + ] + primitive_handler = PrimitiveHandler( + LOADED_PRIMITIVES['ReplaceSemanticTypes'], replace_sem_hp, resolver=self.resolver) + current_data_ref = primitive_handler.add_produce(available_data, pipeline, arguments) + + return True, current_data_ref + + +class TextHandler: + def __init__(self, resolver=Resolver()): + self.use_colummns = True + self.resolver = resolver + self.exclude_targets = None + self.problem_description = None + self.task_description = None + + def add_produce(self, available_data, pipeline, arguments, indexes=[]): + indexes = [] + semantic_types = get_semantic_types(available_data[arguments['inputs']]) + for i in range(len(semantic_types)): + if DATA_TYPES['text'] in semantic_types[i] and not DATA_TYPES['file'] in semantic_types[i]: + if DATA_TYPES['target'] in semantic_types[i]: + if not self.exclude_targets: + indexes.append(i) + else: + indexes.append(i) + + if not indexes: + print("Skipping Text no columns to operate") + return True, None + + print('TextHandler') + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['ExtractColumnsBySemanticTypes'], + [('semantic_types', ArgumentType.VALUE, [DATA_TYPES['attribute']])], + self.resolver) + attributes_data_ref = primitive_handler.add_produce(available_data, pipeline, arguments) + + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['ExtractColumnsBySemanticTypes'], + [('semantic_types', ArgumentType.VALUE, [DATA_TYPES['target']])], + self.resolver) + target_data_ref = primitive_handler.add_produce(available_data, pipeline, arguments) + + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['TextEncoder'], + [('encoder_type', ArgumentType.VALUE, 'tfidf')], + self.resolver) + current_data_ref = primitive_handler.add_produce( + available_data, pipeline, {'inputs': attributes_data_ref, 'outputs': target_data_ref}) + + no_semantic_types = [] + for i in range(available_data[current_data_ref].metadata.query((ALL_ELEMENTS,))['dimension']['length']): + if 'semantic_types' not in available_data[current_data_ref].metadata.query((ALL_ELEMENTS, i,)) and \ + available_data[current_data_ref].metadata.query((ALL_ELEMENTS, i,))[ + 'structural_type'] == numpy.float64: + no_semantic_types.append(i) + + add_semantic_hp = [('columns', ArgumentType.VALUE, no_semantic_types), + ('semantic_types', ArgumentType.VALUE, [DATA_TYPES['float'], DATA_TYPES['attribute']])] + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['AddSemanticTypes'], add_semantic_hp, + resolver=self.resolver) + current_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': current_data_ref}) + + data_refs_to_concat = [target_data_ref, current_data_ref] + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['GeneralHorizontalConcat'], resolver=self.resolver) + last_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': data_refs_to_concat}) + + return True, last_data_ref + + + +class DataTypesHandler: + def __init__(self, problem_description, task_description, + handlers=None, use_default_handlers=True, exclude_targets=True, resolver=Resolver()): + DEFAULT_DATA_HANDLERS = { + DATA_TYPES['float']: None, + DATA_TYPES['int']: None, + DATA_TYPES['bool']: BooleanHandler(resolver=resolver), + DATA_TYPES['categorical']: CategoricalHandler(resolver=resolver), + DATA_TYPES['date']: DateHandler(resolver=resolver), + DATA_TYPES['file']: FileHandler(resolver=resolver), + DATA_TYPES['text']: TextHandler(resolver=resolver) + } + self.problem_description = problem_description + self.task_description = task_description + self.resolver = resolver + self.exclude_targets = exclude_targets + if handlers is None: + self.handlers = DEFAULT_DATA_HANDLERS + else: + if use_default_handlers: + self.handlers = DEFAULT_DATA_HANDLERS + for name, handler in handlers.items(): + self.handlers[name] = handlers + else: + self.handlers = handlers + + def add_produce(self, pipeline, input_dataframe): + data_ref = 'steps.{}.produce'.format(len(pipeline.steps) - 1) + available_data = {data_ref: input_dataframe} + last_data_ref = data_ref + + use_columns = [] + not_use_columns = [] + for handler_name in self.handlers.keys(): + if self.check_use_columns_in_handler(handler_name): + use_columns.append(handler_name) + else: + not_use_columns.append(handler_name) + + last_use_column_handler_index = len(use_columns) - 1 + handler_names = use_columns + not_use_columns + last_use_column_handler_data_ref = None + data_refs_to_concat = [] + + # We execute the handler in order according to whether or not the support use_columns. + for i, handler_name in enumerate(handler_names): + print(i, handler_name) + use_columns, new_data_ref = self.execute_handler(available_data, pipeline, last_data_ref, handler_name) + if new_data_ref is not None: + last_data_ref = new_data_ref + if i == last_use_column_handler_index: + last_use_column_handler_data_ref = last_data_ref + elif i > last_use_column_handler_index: + data_refs_to_concat.append(new_data_ref) + + # we get the columns of the ones that we use by using negation of excluiding types. + # we do this if there are not_use_columns + if not_use_columns: + # get the columns that columns that were not modified or used use_columns + primitive_handler = PrimitiveHandler( + LOADED_PRIMITIVES['ExtractColumnsBySemanticTypes'], + [('semantic_types', ArgumentType.VALUE, not_use_columns), ('negate', ArgumentType.VALUE, True)], + self.resolver) + new_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': last_use_column_handler_data_ref}) + data_refs_to_concat.insert(0, new_data_ref) + + # We concatenate all together + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['GeneralHorizontalConcat'], resolver=self.resolver) + last_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': data_refs_to_concat}) + + return available_data[last_data_ref], pipeline + + def check_use_columns_in_handler(self, handler_name): + use_columns = True + if self.handlers[handler_name] is not None: + if isinstance(self.handlers[handler_name], PrimitiveHandler): + use_columns = 'use_columns' in self.handlers[handler_name].primitive.metadata.get_hyperparams().defaults() + else: + use_columns = self.handlers[handler_name].use_colummns + return use_columns + + def execute_handler(self, available_data, pipeline, data_ref, handler_name): + new_data_ref = None + use_columns = False + if self.handlers[handler_name] is not None: + if isinstance(self.handlers[handler_name], PrimitiveHandler): + use_columns, new_data_ref = self._execute_primitive_handler(available_data, pipeline, data_ref, handler_name) + else: + self.handlers[handler_name].exclude_targets = self.exclude_targets + self.handlers[handler_name].problem_description = self.problem_description + self.handlers[handler_name].task_description = self.task_description + indexes = self._index_to_operate(available_data[data_ref], handler_name) + if indexes: + use_columns, new_data_ref = self.handlers[handler_name].add_produce( + available_data, pipeline, {'inputs': data_ref}, indexes) + else: + print('Skipping', handler_name) + return use_columns, new_data_ref + + def _index_to_operate(self, input_data, data_type): + indexes = [] + semantic_types = get_semantic_types(input_data) + for i in range(len(semantic_types)): + if data_type in semantic_types[i]: + if DATA_TYPES['target'] in semantic_types[i]: + if not self.exclude_targets: + indexes.append(i) + else: + indexes.append(i) + return indexes + + def _execute_primitive_handler(self, available_data, pipeline, data_ref, handler_name): + use_columns = 'use_columns' in self.handlers[handler_name].primitive.metadata.get_hyperparams().defaults() + indexes = self._index_to_operate(available_data[data_ref],handler_name) + # if no columns to operate, return + if not indexes: + return [], None + + if use_columns: + new_data_ref = self.handlers[handler_name].add_produce( + available_data, pipeline, {'inputs': data_ref}, indexes) + else: + # get the columns with specific semnatic types and then we run the primitive with the inputs + primitive_handler = PrimitiveHandler( + LOADED_PRIMITIVES['ExtractColumnsBySemanticTypes'], [('columns', ArgumentType.VALUE, indexes)], self.resolver) + new_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': data_ref}) + new_data_ref = self.handlers[handler_name].add_produce( + available_data, pipeline, {'inputs': available_data[new_data_ref]}, indexes) + return use_columns, new_data_ref + + +class Preprocessing: + def __init__(self, problem_description, task_description, *, primitives_blocklist=None, resolver=None): + self.problem_description = problem_description + self.task_description = task_description + self.primitives_blocklist = [] if primitives_blocklist is None else primitives_blocklist + self.resolver = Resolver(primitives_blocklist=primitives_blocklist) if resolver is None else resolver + + self.profile_pipeline = None + self.parsed_pipeline = None + self.featurization_pipeline = None + self.imputed_pipeline = None + self.feature_selection_pipeline = None + self.dataframe_data = None + self.dataframe_reference = None + + def get_imputed_pipline(self, input_data, pipeline=None, handler=None): + if pipeline is None: + pipeline = copy.deepcopy(self.featurization_pipeline) + if handler is None: + self.imputed_pipeline = pipeline + return + if not input_data.isnull().values.any(): + print('No Nan Values found') + self.imputed_pipeline = pipeline + return + + current_data_ref = 'steps.{}.produce'.format(len(pipeline.steps) - 1) + available_data = {current_data_ref: input_data} + current_data_ref = handler.add_produce(available_data, pipeline, {'inputs': current_data_ref}) + self.dataframe_data = available_data[current_data_ref] + self.imputed_pipeline = pipeline + + def get_feature_selection_pipeline(self, input_data, pipeline=None, handler=None): + if pipeline is None: + pipeline = copy.deepcopy(self.imputed_pipeline) + if handler is None: + self.feature_selection_pipeline = pipeline + return + current_data_ref = 'steps.{}.produce'.format(len(pipeline.steps) - 1) + available_data = {current_data_ref: input_data} + current_data_ref = handler.add_produce(available_data, pipeline, {'inputs': current_data_ref}) + self.dataframe_data = available_data[current_data_ref] + self.feature_selection_pipeline = pipeline + + def get_data_handler_pipeline(self, input_data, pipeline=None): + if pipeline is None: + pipeline = copy.deepcopy(self.parsed_pipeline) + type_handler = DataTypesHandler(self.problem_description, self.task_description) + self.dataframe_data, self.featurization_pipeline = type_handler.add_produce(pipeline, input_data) + + def get_parsed_dataframe(self, input_data, pipeline=None): + if pipeline is None: + pipeline = copy.deepcopy(self.profile_pipeline) + current_data_ref = 'steps.{}.produce'.format(len(pipeline.steps) - 1) + available_data = {current_data_ref: input_data} + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['ColumnParser'], DEFAULT_HYPERPARAMS['ColumnParser'], self.resolver) + current_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': current_data_ref}) + self.dataframe_data = available_data[current_data_ref] + self.parsed_pipeline = pipeline + + def get_dataset_to_dataframe_pipeline(self, input_data, pipeline=None): + if pipeline is None: + pipeline = Pipeline() + pipeline.add_input('input_data') + current_data_ref = 'inputs.0' + available_data = {} + + if len(input_data) > 1: + raise ValueError('Search with multiple inputs is not supported yet.') + _input_data, _ = runtime_module.Runtime._mark_columns(self.problem_description.get('inputs', []), input_data[-1]) + available_data[current_data_ref] = _input_data + + # Add denormalize + if len(_input_data.keys()) > 1: + print('There are multiple resources, adding denormalize') + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['Denormalize'], resolver=self.resolver) + current_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': current_data_ref}) + + # Add dataset to dataframe + print('Adding dataset to dataframe') + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['DatasetToDataFrame'], resolver=self.resolver) + current_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': current_data_ref}) + + # add profiling + index_to_profile = get_index_data_to_profile(available_data[current_data_ref]) + if index_to_profile: + current_data_ref = self.profile_data(available_data, pipeline, current_data_ref, index_to_profile) + + self.dataframe_reference = current_data_ref + self.dataframe_data = available_data[current_data_ref] + self.profile_pipeline = pipeline + + def profile_data(self, available_data, pipeline, data_ref, index_to_profile): + # Thi sfunction helps to abstract the process when the data is profiled. + target_indexes = get_indexes_by_semantic_type(available_data[data_ref], DATA_TYPES['target']) + + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['SimpleProfiler'], DEFAULT_HYPERPARAMS['SimpleProfiler'], + self.resolver) + profiled_output = primitive_handler.run_primitive({'inputs': available_data[data_ref]}, + indexes=index_to_profile) + profiles_semantic_types = get_semantic_types(profiled_output) + + # TODO make a list of tasks that has discrete target + # If the task is classification we need to make sure that the targets are categorical, + # otherwise there is a chance that the targets are considered as numerical an wrongly parse. + categorical_indexes = [] + if self.task_description['task_type'] == 'CLASSIFICATION': + for i in target_indexes: + if DATA_TYPES['categorical'] not in profiles_semantic_types[i]: + index_to_profile.remove(i) + categorical_indexes.append(i) + current_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': data_ref}, + indexes=index_to_profile) + if categorical_indexes: + primitive_handler = PrimitiveHandler(LOADED_PRIMITIVES['ReplaceSemanticTypes'], + DEFAULT_HYPERPARAMS['ReplaceSemanticTypes'], self.resolver) + current_data_ref = primitive_handler.add_produce(available_data, pipeline, {'inputs': current_data_ref}, + indexes=categorical_indexes) + + return current_data_ref + + def generate_preprocessing_by_step(self, input_data=None, feature_selection_handler=None, impute_handler=None): + if self.profile_pipeline is None: + print('=' * 100) + print('profiled pipeline') + self.get_dataset_to_dataframe_pipeline(input_data) + return [] + elif self.parsed_pipeline is None: + print('=' * 100) + print('parsing') + self.get_parsed_dataframe(self.dataframe_data) + self.dataframe_data.metadata.pretty_print() + return [] + elif self.featurization_pipeline is None: + print('=' * 100) + print('feature') + self.get_data_handler_pipeline(self.dataframe_data) + return [] + elif self.imputed_pipeline is None: + print('=' * 100) + print('Imputer') + self.get_imputed_pipline(self.dataframe_data, handler=impute_handler) + return [] + elif self.feature_selection_pipeline is None: + print('=' * 100) + print('selection') + self.get_feature_selection_pipeline(self.dataframe_data, handler=feature_selection_handler) + print(self.dataframe_data) + self.dataframe_data.metadata.pretty_print() + return [] + + +class DataDrivenSearch(PipelineSearchBase): + def __init__(self, problem_description, backend, *, primitives_blocklist=None, + ranking_function=None, hyperparameter_tuner=BayesianSearch, n_workers=1): + super().__init__(problem_description=problem_description, backend=backend, + primitives_blocklist=primitives_blocklist, ranking_function=ranking_function) + if self.ranking_function is None: + self.ranking_function = dummy_ranking_function + + self.task_description = schemas_utils.get_task_description(self.problem_description['problem']['task_keywords']) + self.resolver = Resolver(primitives_blocklist=self.primitives_blocklist) + + print(self.task_description) + print(self.problem_description['problem']) + + self.preprocessing = Preprocessing(self.problem_description, self.task_description, + primitives_blocklist=self.primitives_blocklist) + self.preprocessing_handlers = None + self.max_num_pipelines_to_eval = n_workers + print('max_num_pipelines_to_eval', self.max_num_pipelines_to_eval) + # self.max_num_pipelines_to_eval = 1 + + self.search_started = False + self.total_time = None + self.learner_candidates = None + self.failed_learner = [] + self.successful_learner = [] + # TODO update this to be defined on problem/metrics terms + self.data_preparation_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA") + self.metrics = self.problem_description['problem']['performance_metrics'] + + self.scoring_pipeline = schemas_utils.get_scoring_pipeline() + self.data_preparation_params = schemas_utils.DATA_PREPARATION_PARAMS['no_split'] + + self.tuner_enable = False + self.hyperparameter_tunner_init = False + self.hyperparameter_tunner = hyperparameter_tuner( + self.problem_description, self.backend, primitives_blocklist=self.primitives_blocklist, + max_trials=100000, directory=self.backend.scratch_dir) + self.n_pipelines_to_tune = self.max_num_pipelines_to_eval + + def _search(self, time_left): + if self.preprocessing.profile_pipeline is None: + self.preprocessing_handlers = { + 'input_data': self.input_data, + 'impute_handler': PrimitiveHandler(primitive=LOADED_PRIMITIVES['Imputer'], + hyperparams=DEFAULT_HYPERPARAMS['Imputer'], + resolver=self.resolver), + 'feature_selection_handler': PrimitiveHandler(primitive=LOADED_PRIMITIVES['RobustScale'], + hyperparams=DEFAULT_HYPERPARAMS['RobustScale'], + resolver=self.resolver), + } + if self.preprocessing.feature_selection_pipeline is None: + return self.preprocessing.generate_preprocessing_by_step(**self.preprocessing_handlers) + + if self.learner_candidates is None: + self.input_data = [shrink_dataset(self.input_data[0])] + terms_to_block = ['data_augmentation', 'data_preprocessing', 'data_cleaning', + 'data_transformation', 'evaluation', 'feature_construction', + 'feature_extraction', 'layer', 'loss_function', 'metalearning', + 'operator', 'schema_discovery', + 'd3m.primitives.semisupervised_classification.iterative_labeling.AutonBox'] + mapped_task = False + learner_candidates = pipeline_utils.filter_primitives_by_dataframe_input( + pipeline_utils.get_primitive_candidates( + self.task_description['task_type'], self.task_description['data_types'], + self.task_description['semi'], extra_block=terms_to_block) + ) + if not learner_candidates: + mapped_task = True + learner_candidates = pipeline_utils.filter_primitives_by_dataframe_input( + pipeline_utils.get_primitive_candidates( + schemas_utils.get_task_mapping(self.task_description['task_type']), + self.task_description['data_types'], self.task_description['semi'], extra_block=terms_to_block) + ) + if self.task_description['task_type'] != 'CLASSIFICATION' and \ + self.task_description['task_type'] != 'REGRESSION' and \ + learner_candidates and not mapped_task: + learner_candidates = pipeline_utils.filter_primitives_by_dataframe_input( + pipeline_utils.get_primitive_candidates( + schemas_utils.get_task_mapping(self.task_description['task_type']), self.task_description['data_types'], + self.task_description['semi'], extra_block=terms_to_block) + ) + learner_candidates + + self.learner_candidates = list(set([info[0] for info in learner_candidates])) + print(len(self.learner_candidates), self.learner_candidates) + return [] + + if len(self.learner_candidates) > len(self.failed_learner) + len(self.successful_learner): + print('Model Selection') + pipelines_to_eval = [] + for leaner_candidate in self.learner_candidates: + if len(pipelines_to_eval) >= self.max_num_pipelines_to_eval: + break + + if leaner_candidate not in self.failed_learner and leaner_candidate not in self.successful_learner: + pipeline = self.complete_pipeline(self.preprocessing.feature_selection_pipeline, leaner_candidate) + if pipeline is None: + self.failed_learner.append(leaner_candidate) + else: + print('Evaluating', leaner_candidate) + self.successful_learner.append(leaner_candidate) + pipelines_to_eval.append(pipeline) + pipeline_results = self.backend.evaluate_pipelines( + problem_description=self.problem_description, pipelines=pipelines_to_eval, input_data=self.input_data, + metrics=self.metrics, data_preparation_pipeline=self.data_preparation_pipeline, + scoring_pipeline=self.scoring_pipeline, data_preparation_params=self.data_preparation_params) + + return [self.ranking_function(pipeline_result) for pipeline_result in pipeline_results] + + if not self.hyperparameter_tunner_init and not self.tuner_enable: + print('init tuner') + self.hyperparameter_tunner_init = True + completed_pipelines = [result for result in self.history if result.status == 'COMPLETED'] + if not completed_pipelines: + print('No pipelines to tune') + return [] + completed_pipelines.sort(key=lambda x: x.rank) + pipeline_candidates = completed_pipelines[:self.n_pipelines_to_tune] + pipeline_candidates = [candidate.pipeline for candidate in pipeline_candidates] + self.hyperparameter_tunner.set_pipeline_candidates(self.input_data, pipeline_candidates) + self.hyperparameter_tunner.init_search_space() + self.hyperparameter_tunner.input_data = self.input_data + self.tuner_enable = True + + if self.hyperparameter_tunner_init and self.tuner_enable: + return self.hyperparameter_tunner._search(time_left) + return [] + + def complete_pipeline(self, pipeline, primitive): + + def add_construct_predictions(_pipeline, _dataframe_ref, _resolver): + _data_ref = 'steps.{}.produce'.format(len(_pipeline.steps) - 1) + _arguments={'inputs': _data_ref, 'reference': _dataframe_ref} + add_primitive_step_to_pipeline( + _pipeline, LOADED_PRIMITIVES['ConstructPredictions'], fix_arguments(_arguments), resolver=_resolver) + _data_ref = 'steps.{}.produce'.format(len(_pipeline.steps) - 1) + _pipeline.add_output(_data_ref, 'output') + + new_pipeline = copy.deepcopy(pipeline) + new_pipeline.id = str(uuid.uuid4()) + new_pipeline.created = Pipeline().created + + data_ref = 'steps.{}.produce'.format(len(new_pipeline.steps) - 1) + + primitive_arguments = pipeline_utils.query_multiple_terms( + primitive.metadata, ['primitive_code', 'arguments']) + + failed = False + + if not self.task_description['semi']: + + # we check if the primitive has use_semantic_types + # if that is the case, it is straight forward to complete the pipeline + try: + if 'use_semantic_types' in primitive.metadata.get_hyperparams().defaults(): + arguments = {'inputs': data_ref} + hyperparams = [('use_semantic_types', ArgumentType.VALUE, True)] + if 'outputs' in primitive_arguments: + arguments['outputs'] = data_ref + if 'return_result' in primitive.metadata.get_hyperparams().defaults(): + hyperparams.append(('return_result', ArgumentType.VALUE, 'replace')) + + add_primitive_step_to_pipeline(new_pipeline, primitive, fix_arguments(arguments), hyperparams, self.resolver) + add_construct_predictions(new_pipeline, self.preprocessing.dataframe_reference, self.resolver) + else: + # Otherwise, we need to get the inputs and outputs via extract columns by semantic_types + # for this case, we are assuming that th interface has inputs and outputs + arguments = {'inputs': data_ref} + attributes_hyperparams = [('semantic_types', ArgumentType.VALUE, [DATA_TYPES['attribute']])] + target_hyperparams = [('semantic_types', ArgumentType.VALUE, [DATA_TYPES['target']])] + add_primitive_step_to_pipeline( + new_pipeline, LOADED_PRIMITIVES['ExtractColumnsBySemanticTypes'], fix_arguments(arguments), + attributes_hyperparams, self.resolver) + attributes_data_ref = 'steps.{}.produce'.format(len(new_pipeline.steps) - 1) + + add_primitive_step_to_pipeline( + new_pipeline, LOADED_PRIMITIVES['ExtractColumnsBySemanticTypes'], fix_arguments(arguments), + target_hyperparams, self.resolver) + targets_data_ref = 'steps.{}.produce'.format(len(new_pipeline.steps) - 1) + + arguments = {'inputs': attributes_data_ref, 'outputs': targets_data_ref} + hyperparams = [] + if 'return_result' in primitive.metadata.get_hyperparams().defaults(): + hyperparams.append(('return_result', ArgumentType.VALUE, 'replace')) + add_primitive_step_to_pipeline(new_pipeline, primitive, fix_arguments(arguments), hyperparams, self.resolver) + add_construct_predictions(new_pipeline, self.preprocessing.dataframe_reference, self.resolver) + except Exception as e: + print(e) + failed = True + else: + try: + print('=====task_description semi: {} estimator: {} ====='.format(self.task_description['semi'], primitive)) + arguments = {'inputs': data_ref} + attributes_hyperparams = [('semantic_types', ArgumentType.VALUE, [DATA_TYPES['attribute']])] + target_hyperparams = [('semantic_types', ArgumentType.VALUE, [DATA_TYPES['target']])] + add_primitive_step_to_pipeline( + new_pipeline, LOADED_PRIMITIVES['ExtractColumnsBySemanticTypes'], fix_arguments(arguments), + attributes_hyperparams, self.resolver) + attributes_data_ref = 'steps.{}.produce'.format(len(new_pipeline.steps) - 1) + + add_primitive_step_to_pipeline( + new_pipeline, LOADED_PRIMITIVES['ExtractColumnsBySemanticTypes'], fix_arguments(arguments), + target_hyperparams, self.resolver) + targets_data_ref = 'steps.{}.produce'.format(len(new_pipeline.steps) - 1) + + arguments = {'inputs': attributes_data_ref, 'outputs': targets_data_ref} + hyperparams = [('blackbox', ArgumentType.VALUE, primitive)] + add_primitive_step_to_pipeline(new_pipeline, LOADED_PRIMITIVES['SemiClassification'], + fix_arguments(arguments), hyperparams,self.resolver) + add_construct_predictions(new_pipeline, self.preprocessing.dataframe_reference, self.resolver) + except Exception as e: + print(e) + failed = True + + if failed: + return None + else: + return new_pipeline + + diff --git a/axolotl/axolotl/algorithms/dummy.py b/axolotl/axolotl/algorithms/dummy.py new file mode 100644 index 0000000..cc7d497 --- /dev/null +++ b/axolotl/axolotl/algorithms/dummy.py @@ -0,0 +1,87 @@ +import json +import uuid + +from d3m.metadata.pipeline import Pipeline + +from axolotl.algorithms.base import PipelineSearchBase +from axolotl.utils import schemas as schemas_utils, pipeline as pipeline_utils + + +def dummy_ranking_function(pipeline_result): + if pipeline_result.status == 'COMPLETED': + summarize_performance = schemas_utils.summarize_performance_metrics(pipeline_result.scores) + rank = schemas_utils.compute_rank(summarize_performance) + pipeline_result.rank = rank + return pipeline_result + + +class DummySearch(PipelineSearchBase): + def __init__(self, problem_description, backend, *, primitives_blocklist=None, ranking_function=None): + super().__init__(problem_description=problem_description, backend=backend, + primitives_blocklist=primitives_blocklist, ranking_function=ranking_function) + if self.ranking_function is None: + self.ranking_function = dummy_ranking_function + self.task_description = schemas_utils.get_task_description(self.problem_description['problem']['task_keywords']) + + self.available_pipelines = self._return_pipelines( + self.task_description['task_type'], self.task_description['task_subtype'], self.task_description['data_types']) + + # TODO update this to be defined on problem/metrics terms + self.data_preparation_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA") + self.metrics = self.problem_description['problem']['performance_metrics'] + + self.scoring_pipeline = schemas_utils.get_scoring_pipeline() + self.data_preparation_params = schemas_utils.DATA_PREPARATION_PARAMS['no_split'] + + self.offset = 10 + self.current_pipeline_index = 0 + + def _search(self, time_left): + pipelines_to_eval = self.available_pipelines[self.current_pipeline_index: self.current_pipeline_index+self.offset] + self.current_pipeline_index += self.offset + pipeline_results = self.backend.evaluate_pipelines( + problem_description=self.problem_description, pipelines=pipelines_to_eval, input_data=self.input_data, + metrics=self.metrics, data_preparation_pipeline=self.data_preparation_pipeline, + scoring_pipeline=self.scoring_pipeline, data_preparation_params=self.data_preparation_params) + + return [self.ranking_function(pipeline_result) for pipeline_result in pipeline_results] + + def _return_pipelines(self, task_type, task_subtype, data_type): + """ + A function that return predefined pipelines given a task type. + + Returns + ------- + A predefined pipelines if there are pipelines left, also if there is template + returns the new pipeline with the template. + + """ + # TODO incorporate task_subtype and data_type for future problems + with open(schemas_utils.PIPELINES_DB_DIR) as file: + possible_pipelines_dict = json.load(file) + + if task_type not in possible_pipelines_dict: + self.pipeline_left = False + return None + + possible_pipelines_dict = possible_pipelines_dict[task_type] + + if not possible_pipelines_dict: + return [] + + possible_pipelines = [] + for pipeline_dict in possible_pipelines_dict: + try: + pipeline = pipeline_utils.load_pipeline(pipeline_dict) + + # update id + pipeline.id = str(uuid.uuid4()) + + # update time + pipeline.created = Pipeline().created + + possible_pipelines.append(pipeline) + except Exception: + pass + + return possible_pipelines diff --git a/axolotl/axolotl/algorithms/random_search.py b/axolotl/axolotl/algorithms/random_search.py new file mode 100644 index 0000000..ef5a3d0 --- /dev/null +++ b/axolotl/axolotl/algorithms/random_search.py @@ -0,0 +1,27 @@ +import enum + +from axolotl.algorithms.tuners.random_search_oracle import RandomSearchOracle +from axolotl.algorithms.tuners.tunable_base import TunableBase + + +class RandomSearch(TunableBase): + def __init__(self, problem_description, backend, primitives_blocklist=None, + max_trials=10000, directory='.', num_eval_trials=None): + super(RandomSearch, self).__init__(problem_description, backend, + primitives_blocklist=primitives_blocklist, num_eval_trials=num_eval_trials) + self.directory = directory + self.project_name = 'random_search' + + self.objective = self.problem_description['problem']['performance_metrics'][0]['metric'] + if isinstance(self.objective, enum.Enum): + self.objective = self.objective.name + + self.oracle = RandomSearchOracle( + objective=self.objective, + max_trials=max_trials, # pre-defined number, + seed=self.random_seed, # seed + hyperparameters=self.hyperparameters, + ) + self.oracle._set_project_dir( + self.directory, self.project_name, overwrite=True) + diff --git a/axolotl/axolotl/algorithms/tuners/__init__.py b/axolotl/axolotl/algorithms/tuners/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/axolotl/axolotl/algorithms/tuners/bayesian_oracle.py b/axolotl/axolotl/algorithms/tuners/bayesian_oracle.py new file mode 100644 index 0000000..c957eae --- /dev/null +++ b/axolotl/axolotl/algorithms/tuners/bayesian_oracle.py @@ -0,0 +1,198 @@ +import numpy as np + +from scipy import optimize as scipy_optimize +from sklearn import exceptions + +from d3m.metadata import hyperparams +from kerastuner import Objective +from kerastuner.tuners.bayesian import BayesianOptimizationOracle as KerasBayesian +from kerastuner.engine import trial as trial_lib + +from axolotl.algorithms.tuners.hyperparameters import HyperParameters, \ + value_to_cumulative_prob, cumulative_prob_to_value +from axolotl.algorithms.tuners.oracle import infer_metric_direction, random_values, patch_invalid_hyperamaeters + + +class BayesianOptimizationOracle(KerasBayesian): + """ + Bayesian optimization oracle. + """ + + def __init__(self, + objective, + max_trials, + num_initial_points=None, + alpha=1e-4, + beta=2.6, + seed=None, + hyperparameters=None, + allow_new_entries=True, + tune_new_entries=True): + direction = infer_metric_direction(objective) + objective = Objective(name=objective, direction=direction) + super(BayesianOptimizationOracle, self).__init__( + objective=objective, + max_trials=max_trials, + num_initial_points=num_initial_points, + alpha=alpha, + beta=beta, + seed=seed, + hyperparameters=hyperparameters, + allow_new_entries=allow_new_entries, + tune_new_entries=tune_new_entries, + ) + self.num_complete_trials = 0 + self.sorted_candidates = [] + + # TODO how to save a trial + def _save_trial(self, trial): + pass + + def get_state(self): + # `self.trials` are saved in their own, Oracle-agnostic files. + # Just save the IDs for ongoing trials, since these are in `trials`. + state = {} + state['ongoing_trials'] = { + tuner_id: trial.trial_id + for tuner_id, trial in self.ongoing_trials.items()} + # Hyperparameters are part of the state because they can be added to + # during the course of the search. + state['hyperparameters'] = str(self.hyperparameters.get_config()) + + state.update({ + 'num_initial_points': self.num_initial_points, + 'alpha': self.alpha, + 'beta': self.beta, + }) + return state + + def _random_values(self): + """Fills the hyperparameter space with random values. + + Returns: + A dictionary mapping parameter names to suggested values. + """ + + values, seed_state = random_values(hyperparameters=self.hyperparameters, + seed_state=self._seed_state, + tried_so_far=self._tried_so_far, + max_collisions=self._max_collisions, + ) + self._seed_state = seed_state + return values + + def _nonfixed_space(self): + return [hp for hp in self.hyperparameters.space + if not isinstance(hp, hyperparams.Constant)] + + def _vector_to_values(self, vector): + hps = HyperParameters() + vector_index = 0 + for hp in self.hyperparameters.space: + hps.merge([hp]) + if isinstance(hp, hyperparams.Constant): + value = hp.get_default() + else: + prob = vector[vector_index] + vector_index += 1 + value = cumulative_prob_to_value(prob, hp) + + if hps.is_active(hp): + hps.values[hp.name] = value + patch_invalid_hyperamaeters(hps) + return hps.values + + def _vectorize_trials(self): + x = [] + y = [] + ongoing_trials = {t for t in self.ongoing_trials.values()} + for trial in self.trials.values(): + # Create a vector representation of each Trial's hyperparameters. + trial_hps = trial.hyperparameters + vector = [] + for hp in self._nonfixed_space(): + # For hyperparameters not present in the trial (either added after + # the trial or inactive in the trial), set to default value. + if trial_hps.is_active(hp): + trial_value = trial_hps.values[hp.name] + else: + trial_value = hp.default + + # Embed an HP value into the continuous space [0, 1]. + prob = value_to_cumulative_prob(trial_value, hp) + vector.append(prob) + + if trial in ongoing_trials: + # "Hallucinate" the results of ongoing trials. This ensures that + # repeat trials are not selected when running distributed. + x_h = np.array(vector).reshape((1, -1)) + y_h_mean, y_h_std = self.gpr.predict(x_h, return_std=True) + # Give a pessimistic estimate of the ongoing trial. + score = y_h_mean[0] + y_h_std[0] + elif trial.status == 'COMPLETED': + score = trial.score + # Always frame the optimization as a minimization for scipy.minimize. + if self.objective.direction == 'max': + score = -1*score + else: + continue + + x.append(vector) + y.append(score) + + x = np.array(x) + y = np.array(y) + return x, y + + def _populate_space(self, trial_id): + # Generate enough samples before training Gaussian process. + completed_trials = [t for t in self.trials.values() + if t.status == 'COMPLETED'] + + # Use 3 times the dimensionality of the space as the default number of + # random points. + dimensions = len(self.hyperparameters.space) + num_initial_points = self.num_initial_points or 3 * dimensions + if len(completed_trials) < num_initial_points: + return self._random_populate_space() + + if self.num_complete_trials == len(completed_trials) and len(self.sorted_candidates) > 0: + optimal_x = self.sorted_candidates.pop().x + values = self._vector_to_values(optimal_x) + return {'status': trial_lib.TrialStatus.RUNNING, + 'values': values} + + # track the number of complete trials + self.num_complete_trials = len(completed_trials) + + # Fit a GPR to the completed trials and return the predicted optimum values. + x, y = self._vectorize_trials() + try: + self.gpr.fit(x, y) + except exceptions.ConvergenceWarning: + # If convergence of the GPR fails, create a random trial. + return self._random_populate_space() + + def _upper_confidence_bound(x): + x = x.reshape(1, -1) + mu, sigma = self.gpr.predict(x, return_std=True) + return mu - self.beta * sigma + + num_restarts = 50 + bounds = self._get_hp_bounds() + x_seeds = self._random_state.uniform(bounds[:, 0], bounds[:, 1], + size=(num_restarts, bounds.shape[0])) + candidates = [ + scipy_optimize.minimize(_upper_confidence_bound, + x0=x_try, + bounds=bounds, + method='L-BFGS-B') + for x_try in x_seeds + ] + + self.sorted_candidates = sorted(candidates, key=lambda x: x.fun[0], reverse=True) + optimal_x = self.sorted_candidates.pop().x + + values = self._vector_to_values(optimal_x) + return {'status': trial_lib.TrialStatus.RUNNING, + 'values': values} diff --git a/axolotl/axolotl/algorithms/tuners/custom_hps.py b/axolotl/axolotl/algorithms/tuners/custom_hps.py new file mode 100644 index 0000000..e86fcdd --- /dev/null +++ b/axolotl/axolotl/algorithms/tuners/custom_hps.py @@ -0,0 +1,535 @@ +import sys +from collections import OrderedDict + +from d3m.metadata import hyperparams + +epsilon = sys.float_info.epsilon + +clf_xgboost_config = dict( + n_estimators=hyperparams.UniformInt( + lower=10, + upper=50, + default=20, + description='The number of trees in the forest.', + semantic_types=[ + 'https://metadata.datadrivendiscovery.org/types/TuningParameter', + 'https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter', + ], + ), + n_more_estimators=hyperparams.UniformInt( + lower=10, + upper=50, + default=20, + description='When continuing a fit, it controls how many more trees to add every time.', + semantic_types=[ + 'https://metadata.datadrivendiscovery.org/types/TuningParameter', + 'https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter', + ], + ), + max_depth=hyperparams.UniformInt( + lower=5, + upper=50, + default=30, + lower_inclusive=True, + upper_inclusive=True, + description='The maximum depth of the tree.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + learning_rate=hyperparams.LogUniform( + lower=1e-4, + upper=1e-1, + default=0.05, + lower_inclusive=True, + upper_inclusive=True, + description=r'Boosting learning rate (xgb\`s \"eta\")', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + gamma=hyperparams.Constant[float]( + default=0.0, + description='Minimum loss reduction required to make a further partition on a leaf node of the tree', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + min_child_weight = hyperparams.Constant[int]( + default=1, + description='Minimum sum of instance weight (hessian) needed in a child. If the tree partition step results ' + 'in a leaf node with the sum of instance weight less than min_child_weight, then the building ' + 'process will give up further partitioning ', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + # max_delta_step = hyperparams.Union[Union[int, None]]( + # configuration=OrderedDict( + # limit=hyperparams.Bounded[int]( + # lower=1, + # upper=None, + # default=1, + # description='Maximum delta step we allow each leaf output to be.' + # ), + # unlimited=hyperparams.Enumeration[int]( + # values=[0], + # default=0, + # description='No constraint.', + # ), + # ), + # default='unlimited', + # description='Maximum delta step we allow.', + # semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + # ), + subsample=hyperparams.Constant[float]( + default=1.0, + description='Subsample ratio of the training instances,this will prevent overfitting. Subsampling will occur ' + 'once in every boosting iteration.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + colsample_bytree=hyperparams.Constant[float]( + default=1.0, + description='Subsample ratio of columns when constructing each tree. Subsampling will occur once in every ' + 'boosting iteration', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + colsample_bylevel=hyperparams.Constant[float]( + default=1.0, + description='Subsample ratio of columns for each split, in each level. Subsampling will occur each time a new ' + 'split is made', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + reg_alpha=hyperparams.Uniform( + lower=0.1, + upper=1.0, + default=0.5, + lower_inclusive=True, + upper_inclusive=True, + description='L1 regularization term on weights. Increasing this value will make model more conservative.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + reg_lambda=hyperparams.Uniform( + lower=0.1, + upper=1.0, + default=0.5, + lower_inclusive=True, + upper_inclusive=True, + description='L2 regularization term on weights. Increasing this value will make model more conservative.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + # scale_pos_weight = hyperparams.Bounded[float]( + # lower=0, + # upper=None, + # default=1, + # description='Control the balance of positive and negative weights, useful for unbalanced classes', + # semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + # ), + base_score=hyperparams.Bounded[float]( + lower=0, + upper=None, + default=0.5, + description='The initial prediction score of all instances, global bias.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), +) + +dfs_single_tab_config = dict( + max_percent_null=hyperparams.Uniform( + lower=0, + upper=1, + default=0.9, + lower_inclusive=True, + upper_inclusive=True, + description='The maximum allowed correlation between any two features returned. A lower value means features will be more uncorrelated', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ) + +) + +lgbm_clf_config = dict( + n_estimators=hyperparams.UniformInt( + lower=10, + upper=50, + default=20, + description='The number of trees in the forest.', + semantic_types=[ + 'https://metadata.datadrivendiscovery.org/types/TuningParameter', + 'https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter', + ], + ), + n_more_estimators=hyperparams.UniformInt( + lower=10, + upper=50, + default=20, + description='When continuing a fit, it controls how many more trees to add every time.', + semantic_types=[ + 'https://metadata.datadrivendiscovery.org/types/TuningParameter', + 'https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter', + ], + ), + max_depth=hyperparams.UniformInt( + lower=5, + upper=50, + default=30, + lower_inclusive=True, + upper_inclusive=True, + description='The maximum depth of the tree.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + # num_leaves_base=hyperparams.Bounded[float]( + # lower=1, + # upper=2, + # default=2, + # description='Maximum tree leaves for base learners, this value is the base of the formula num_leaves_base^(max_depth)', + # semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + # ), + # subsample_for_bin=hyperparams.Bounded[int]( + # lower=1, + # upper=None, + # default=200000, + # description='number of data that sampled to construct histogram bins', + # semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + # ), + learning_rate=hyperparams.LogUniform( + lower=1e-4, + upper=1e-1, + default=0.05, + lower_inclusive=True, + upper_inclusive=True, + description=r'Boosting learning rate (xgb\`s \"eta\")', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + min_child_weight = hyperparams.Constant[int]( + default=1, + description='Minimum sum of instance weight (hessian) needed in a child. If the tree partition step results ' + 'in a leaf node with the sum of instance weight less than min_child_weight, then the building ' + 'process will give up further partitioning ', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + # min_child_samples=hyperparams.Bounded[int]( + # lower=0, + # upper=None, + # default=20, + # description='minimal number of data in one leaf. Can be used to deal with over-fitting', + # semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + # ), + # max_delta_step = hyperparams.Union[Union[int, None]]( + # configuration=OrderedDict( + # limit=hyperparams.Bounded[int]( + # lower=1, + # upper=None, + # default=1, + # description='Maximum delta step we allow each leaf output to be.' + # ), + # unlimited=hyperparams.Enumeration[int]( + # values=[0], + # default=0, + # description='No constraint.', + # ), + # ), + # default='unlimited', + # description='Maximum delta step we allow.', + # semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + # ), + subsample=hyperparams.Constant[float]( + default=1.0, + description='Subsample ratio of the training instances,this will prevent overfitting. Subsampling will occur ' + 'once in every boosting iteration.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + subsample_freq=hyperparams.Bounded[int]( + lower=0, + upper=1, + default=0, + description='frequency for bagging', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + colsample_bytree=hyperparams.Constant[float]( + default=1.0, + description='Subsample ratio of columns when constructing each tree. Subsampling will occur once in every ' + 'boosting iteration', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + min_split_gain=hyperparams.Bounded[float]( + lower=0, + upper=None, + default=0, + description='the minimal gain to perform split', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + reg_alpha=hyperparams.Uniform( + lower=0.1, + upper=1.0, + default=0.5, + lower_inclusive=True, + upper_inclusive=True, + description='L1 regularization term on weights. Increasing this value will make model more conservative.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + reg_lambda=hyperparams.Uniform( + lower=0.1, + upper=1.0, + default=0.5, + lower_inclusive=True, + upper_inclusive=True, + description='L2 regularization term on weights. Increasing this value will make model more conservative.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), +) + +sk_logistic_regression_config = dict( + dual=hyperparams.Constant[bool]( + default=False, + description='Dual or primal formulation. Dual formulation is only implemented for l2 penalty with liblinear solver. Prefer dual=False when n_samples > n_features.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] + ), + penalty=hyperparams.Choice( + choices={ + 'l1': hyperparams.Hyperparams.define( + configuration=OrderedDict({}) + ), + 'l2': hyperparams.Hyperparams.define( + configuration=OrderedDict({}) + ), + 'none': hyperparams.Hyperparams.define( + configuration=OrderedDict({}) + ), + 'elasticnet': hyperparams.Hyperparams.define( + configuration=OrderedDict({ + 'l1_ratio': hyperparams.Union( + configuration=OrderedDict({ + 'float': hyperparams.Uniform( + lower=0, + upper=1, + default=0.001, + lower_inclusive=True, + upper_inclusive=True, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + # 'l1_ratio must be between 0 and 1; got (l1_ratio=None)' + # 'none': hyperparams.Constant( + # default=None, + # semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + # ) + }), + default='float', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] + ) + }) + ) + }, + default='l2', + description='Used to specify the norm used in the penalization. The \'newton-cg\', \'sag\' and \'lbfgs\' solvers support only l2 penalties.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] + ), + intercept_scaling=hyperparams.Constant[float]( + default=1, + description='Useful only when the solver \'liblinear\' is used and self.fit_intercept is set to True. In this case, x becomes [x, self.intercept_scaling], i.e. a "synthetic" feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes ``intercept_scaling * synthetic_feature_weight``. Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] + ), + +) + +sk_decision_tree_clf_config = dict( + min_samples_split=hyperparams.Union( + configuration=OrderedDict({ + 'absolute': hyperparams.Constant[int]( + default=2, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + 'percent': hyperparams.Bounded[float]( + default=0.25, + lower=0, + upper=1, + lower_inclusive=False, + # upper_inclusive=False, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ) + }), + default='absolute', + description='The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a percentage and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for percentages.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] + ), + max_features=hyperparams.Union( + configuration=OrderedDict({ + # max_features must be in (0, n_features] + # 'specified_int': hyperparams.Bounded[int]( + # lower=0, + # upper=None, + # default=0, + # semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + # ), + 'calculated': hyperparams.Enumeration[str]( + values=['auto', 'sqrt', 'log2'], + default='auto', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + 'none': hyperparams.Constant( + default=None, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + 'percent': hyperparams.Bounded[float]( + default=0.25, + lower=0, + upper=1, + lower_inclusive=False, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ) + }), + default='none', + description='The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a percentage and `int(max_features * n_features)` features are considered at each split. - If "auto", then `max_features=sqrt(n_features)`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] + ), + # 'max_leaf_nodes 0 must be either None or larger than 1' + max_leaf_nodes=hyperparams.Constant( + default=None, + description='Grow a tree with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] + ), +) + +sk_sgd_clf_config = dict( + validation_fraction=hyperparams.Bounded[float]( + default=0.1, + lower=0, + upper=0.99999999999, + lower_inclusive=False, + # upper_inclusive=False, + description='The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] + ), + # eta0 must be > 0 + eta0=hyperparams.Bounded[float]( + lower=0.0, + upper=1.0, + default=0.1, + lower_inclusive=False, + description='The initial learning rate for the \'constant\' or \'invscaling\' schedules. The default value is 0.0 as eta0 is not used by the default schedule \'optimal\'.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] + ), + + +) + +sk_random_forest_clf_config = dict( + max_features=hyperparams.Union( + configuration=OrderedDict({ + # max_features must be in (0, n_features] + # 'specified_int': hyperparams.Bounded[int]( + # lower=0, + # upper=None, + # default=0, + # semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + # ), + 'calculated': hyperparams.Enumeration[str]( + values=['auto', 'sqrt', 'log2'], + default='auto', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + 'none': hyperparams.Constant( + default=None, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + 'percent': hyperparams.Uniform( + default=0.25, + lower=0, + upper=1, + lower_inclusive=True, + upper_inclusive=False, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ) + }), + default='calculated', + description='The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a percentage and `int(max_features * n_features)` features are considered at each split. - If "auto", then `max_features=sqrt(n_features)`. - If "sqrt", then `max_features=sqrt(n_features)` (same as "auto"). - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] + ), + max_samples=hyperparams.Union( + configuration=OrderedDict({ + 'absolute': hyperparams.Bounded[int]( + lower=0, + upper=None, + lower_inclusive=False, + default=1, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + 'percent': hyperparams.Bounded[float]( + default=0.9, + lower=0 + epsilon, + upper=1, + upper_inclusive=False, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + 'none': hyperparams.Constant( + default=None, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ) + }), + default='none', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] + ), +) + +sk_extra_tree_tree_clf_config = dict( + max_features=hyperparams.Union( + configuration=OrderedDict({ + 'calculated': hyperparams.Enumeration[str]( + values=['auto', 'sqrt', 'log2'], + default='auto', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + 'none': hyperparams.Constant( + default=None, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + 'percent': hyperparams.Bounded[float]( + default=0.25, + lower=0, + upper=1, + lower_inclusive=False, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ) + }), + default='calculated', + description='The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a percentage and `int(max_features * n_features)` features are considered at each split. - If "auto", then `max_features=sqrt(n_features)`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features.', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] + ), + max_samples=hyperparams.Union( + configuration=OrderedDict({ + 'absolute': hyperparams.Bounded[int]( + lower=0, + upper=None, + lower_inclusive=False, + default=1, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + 'percent': hyperparams.Bounded[float]( + default=0.9, + lower=0 + epsilon, + upper=1, + upper_inclusive=False, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ), + 'none': hyperparams.Constant( + default=None, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + ) + }), + default='none', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] + ) +) + +# To avoid the issue, https://gitlab.com/TAMU_D3M/d3m_primitives/-/issues/1 +tamu_feature_selection_config = dict( + percentage_selected_features=hyperparams.Uniform( + default=0.5, + upper=1, + lower=0.25, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + description="percentage of features to select, between 0 and 1") +) + +config = { + 'd3m.primitives.classification.xgboost_gbtree.DataFrameCommon': clf_xgboost_config, + 'd3m.primitives.feature_construction.deep_feature_synthesis.SingleTableFeaturization': dfs_single_tab_config, + 'd3m.primitives.classification.light_gbm.DataFrameCommon': lgbm_clf_config, + 'd3m.primitives.classification.logistic_regression.SKlearn': sk_logistic_regression_config, + 'd3m.primitives.classification.decision_tree.SKlearn': sk_decision_tree_clf_config, + 'd3m.primitives.classification.sgd.SKlearn': sk_sgd_clf_config, + 'd3m.primitives.classification.random_forest.SKlearn': sk_random_forest_clf_config, + 'd3m.primitives.classification.extra_trees.SKlearn': sk_extra_tree_tree_clf_config, + 'd3m.primitives.feature_selection.skfeature.TAMU': tamu_feature_selection_config, +} diff --git a/axolotl/axolotl/algorithms/tuners/hyperparameters.py b/axolotl/axolotl/algorithms/tuners/hyperparameters.py new file mode 100644 index 0000000..0da8c26 --- /dev/null +++ b/axolotl/axolotl/algorithms/tuners/hyperparameters.py @@ -0,0 +1,195 @@ +import json +import math +from scipy.stats import norm + +from d3m import utils as d3m_utils +from d3m.metadata import hyperparams +from d3m.metadata.hyperparams import HyperparameterMeta +from kerastuner.engine.hyperparameters import HyperParameters as KerasHyperparams + +PIPELINE_CHOICE = 'pipeline_choice' + + +def GET_CONFIG(param_val): + config = param_val.to_simple_structure() + config['p'] = param_val + if isinstance(param_val, hyperparams.SortedList) or isinstance(param_val, hyperparams.SortedSet): + config['is_configuration'] = param_val.is_configuration + return config + + +class HyperParameters(KerasHyperparams): + def get_config(self): + return { + 'space': [{'class_name': p.__class__.__name__, + 'config': GET_CONFIG(p)} + for p in self.space], + 'values': dict((k, v) for (k, v) in self.values.items()), + } + + def retrieve(self, name, val, parent_name=None, parent_values=None): + """Gets or creates a `HyperParameter`.""" + config = GET_CONFIG(val) + hp = config['p'] + hp.name = self._get_name(name) + hp.default = get_val(hp.get_default)() + hp.random_sample = get_val(hp.sample) + hp.conditions = [c for c in self._conditions] + with self._maybe_conditional_scope(parent_name, parent_values): + return self._retrieve(hp) + + def _register(self, hp): + """Registers a `HyperParameter` into this container.""" + self._hps[hp.name].append(hp) + self._space.append(hp) + value = hp.default + if self._conditions_are_active(hp.conditions): + self.values[hp.name] = value + return value + return None + + @classmethod + def from_config(cls, config): + hps = cls() + for p in config['space']: + p = p['config']['p'] + hps._hps[p.name].append(p) + hps._space.append(p) + hps.values = dict((k, v) for (k, v) in config['values'].items()) + return hps + + def copy(self): + return HyperParameters.from_config(self.get_config()) + + def __repr__(self): + return self.to_json() + + def to_json(self): + return json.dumps(self.__dict__, default=serialize) + + def _get_name_parts(self, full_name): + """Splits `full_name` into its scopes and leaf name.""" + str_parts = full_name.split('/') + parts = [] + + for part in str_parts: + if '=' in part: + parent_name, parent_values = part.split('=') + parent_values = parent_values.split(',') + parts.append({'parent_name': parent_name, + 'parent_values': parent_values}) + else: + parts.append(part) + + return parts + + def get_pipeline_id(self): + pipeline_id = self.values[PIPELINE_CHOICE] + return pipeline_id + + def get_name_parts(self, full_name): + step, primitive_name, hp_name = self._get_name_parts(full_name) + return step, primitive_name, hp_name + + +def get_val(func): + def wrapper(*args, **kwargs): + val = func(*args, **kwargs) + return val['choice'] if isinstance(val, dict) and 'choice' in val else val + return wrapper + + +def serialize(obj): + if isinstance(obj, HyperparameterMeta): + return obj.__dict__ + + +def value_to_cumulative_prob(value, hp): + """Convert a hyperparameter value to [0, 1].""" + if isinstance(hp, hyperparams.Constant): + return 0.5 + if isinstance(hp, hyperparams.UniformBool): + # Center the value in its probability bucket. + if value: + return 0.75 + return 0.25 + elif isinstance(hp, (hyperparams.Choice, hyperparams.Enumeration, hyperparams.Union)): + if isinstance(hp, hyperparams.Choice): + choices = hp.choices + index = list(choices.keys()).index(value) + elif isinstance(hp, hyperparams.Union): + choices = hp.configuration.keys() + for index, val_type in enumerate(hp.configuration.values()): + if isinstance(value, val_type.structural_type): + break + else: + choices = hp.values + index = choices.index(value) + ele_prob = 1 / len(choices) + # Center the value in its probability bucket. + return (index + 0.5) * ele_prob + elif isinstance(hp, (hyperparams.UniformInt, hyperparams.Uniform, hyperparams.Bounded)): + lower, upper = hp.lower, hp.upper + if lower is None or upper is None: + return 0.5 + return (value - lower) / (upper - lower) + elif isinstance(hp, hyperparams.LogUniform): + lower, upper = hp.lower, hp.upper + if lower is None or upper is None: + return 0.5 + return (math.log(value / lower) / + math.log(upper / lower)) + elif isinstance(hp, (hyperparams.Normal, hyperparams.LogNormal)): + return norm.cdf(value, hp.mu, hp.sigma) + else: + raise ValueError('Unrecognized HyperParameter type: {}'.format(hp)) + + +def cumulative_prob_to_value(prob, hp): + """Convert a value from [0, 1] to a hyperparameter value.""" + if isinstance(hp, hyperparams.Constant): + return hp.get_default() + elif isinstance(hp, hyperparams.UniformBool): + return bool(prob >= 0.5) + elif isinstance(hp, (hyperparams.Choice, hyperparams.Enumeration, hyperparams.Union)): + if isinstance(hp, hyperparams.Choice): + choices = list(hp.choices.keys()) + elif isinstance(hp, hyperparams.Union): + choices = list(hp.configuration.keys()) + else: + choices = hp.values + ele_prob = 1 / len(choices) + index = int(math.floor(prob / ele_prob)) + # Can happen when `prob` is very close to 1. + if index == len(choices): + index = index - 1 + if isinstance(hp, hyperparams.Union): + key = choices[index] + with d3m_utils.silence(): + val = hp.configuration[key].sample() + return val + return choices[index] + elif isinstance(hp, (hyperparams.UniformInt, hyperparams.Uniform, hyperparams.Bounded)): + import sys + epsilon = sys.float_info.epsilon + lower, upper = hp.lower, hp.upper + if lower is None or upper is None: + return hp.get_default() + value = prob * (upper - lower) + lower + if hp.structural_type == int: + return int(value) + if value == lower and not hp.lower_inclusive: + return value + epsilon + if value == upper and not hp.upper_inclusive: + return value - epsilon + return value + elif isinstance(hp, hyperparams.LogUniform): + lower, upper = hp.lower, hp.upper + if lower is None or upper is None: + return hp.get_default() + value = lower * math.pow(upper / lower, prob) + return value + elif isinstance(hp, (hyperparams.Normal, hyperparams.LogNormal)): + return norm.ppf(prob, loc=hp.mu, scale=hp.sigma) + else: + raise ValueError('Unrecognized HyperParameter type: {}'.format(hp)) diff --git a/axolotl/axolotl/algorithms/tuners/oracle.py b/axolotl/axolotl/algorithms/tuners/oracle.py new file mode 100644 index 0000000..7b129b1 --- /dev/null +++ b/axolotl/axolotl/algorithms/tuners/oracle.py @@ -0,0 +1,104 @@ +import os + +import hashlib +import random + +from d3m import utils as d3m_utils +from d3m.metadata import problem as problem_module +from axolotl.algorithms.tuners.hyperparameters import HyperParameters, PIPELINE_CHOICE + +_MAX_METRICS = { + problem_module.PerformanceMetric.ACCURACY, + problem_module.PerformanceMetric.PRECISION, + problem_module.PerformanceMetric.RECALL, + problem_module.PerformanceMetric.F1, + problem_module.PerformanceMetric.F1_MICRO, + problem_module.PerformanceMetric.F1_MACRO, + problem_module.PerformanceMetric.ROC_AUC, + problem_module.PerformanceMetric.JACCARD_SIMILARITY_SCORE, + problem_module.PerformanceMetric.NORMALIZED_MUTUAL_INFORMATION, # not sure + problem_module.PerformanceMetric.OBJECT_DETECTION_AVERAGE_PRECISION, +} +_MAX_METRICS_NAME = {s.name for s in _MAX_METRICS} + + +_MIN_METRICS = { + problem_module.PerformanceMetric.MEAN_ABSOLUTE_ERROR, + problem_module.PerformanceMetric.MEAN_SQUARED_ERROR, + problem_module.PerformanceMetric.ROOT_MEAN_SQUARED_ERROR, + problem_module.PerformanceMetric.R_SQUARED, +} +_MIN_METRICS_NAME = {s.name for s in _MIN_METRICS} + + +def infer_metric_direction(metric): + # Handle str input and get canonical object. + if isinstance(metric, str): + metric_name = metric + if metric_name in _MIN_METRICS_NAME: + return 'min' + elif metric_name in _MAX_METRICS_NAME: + return 'max' + + +def random_values(hyperparameters, seed_state, tried_so_far, max_collisions): + collisions = 0 + while 1: + # Generate a set of random values. + hps = HyperParameters() + with d3m_utils.silence(): + for hp in hyperparameters.space: + hps.merge([hp]) + if hps.is_active(hp): # Only active params in `values`. + hps.values[hp.name] = hp.random_sample(seed_state) + seed_state += 1 + # Pick out the invalid hyper-parameters + patch_invalid_hyperamaeters(hps) + + values = hps.values + # Keep trying until the set of values is unique, + # or until we exit due to too many collisions. + values_hash = compute_values_hash(values) + if values_hash in tried_so_far: + collisions += 1 + if collisions > max_collisions: + return None + continue + tried_so_far.add(values_hash) + break + return values, seed_state + + +def compute_values_hash(values): + keys = sorted(values.keys()) + s = ''.join(str(k) + '=' + str(values[k]) for k in keys) + return hashlib.sha256(s.encode('utf-8')).hexdigest()[:32] + + +def patch_invalid_hyperamaeters(hps): + values = hps.values + for full_name in values: + if full_name == PIPELINE_CHOICE: + continue + hp_val = values[full_name] + step, primitive_name, hp_name = hps.get_name_parts(full_name) + if primitive_name == 'd3m.primitives.classification.svc.SKlearn' \ + and hp_name == 'decision_function_shape' and hp_val == 'ovo': + # break_ties must be False if decision-function_shape == 'ovo' + break_ties = os.path.join(step, primitive_name, 'break_ties') + values[break_ties] = False + if primitive_name == 'd3m.primitives.classification.logistic_regression.SKlearn': + # elasticnet' penalty, solver must be'saga' + if hp_name == 'penalty' and hp_val == 'elasticnet': + solver = os.path.join(step, primitive_name, 'solver') + values[solver] = 'saga' + if hp_name == 'solver': + penalty = os.path.join(step, primitive_name, 'penalty') + # liblinear only supports 'ovr' multi_class and [l2, l1] penalty + if hp_val == 'liblinear': + multi_class = os.path.join(step, primitive_name, 'multi_class') + values[multi_class] = 'ovr' + values[penalty] = random.choice(['l2', 'l1']) + # ['lbfgs', 'newton-cg', 'sag'] only support [l2, none] penalty + elif hp_val in ['lbfgs', 'newton-cg', 'sag']: + values[penalty] = random.choice(['l2', 'none']) diff --git a/axolotl/axolotl/algorithms/tuners/random_search_oracle.py b/axolotl/axolotl/algorithms/tuners/random_search_oracle.py new file mode 100644 index 0000000..f446389 --- /dev/null +++ b/axolotl/axolotl/algorithms/tuners/random_search_oracle.py @@ -0,0 +1,66 @@ +from kerastuner import Objective +from kerastuner.engine import trial as trial_lib +from kerastuner.tuners.randomsearch import RandomSearchOracle as KerasRandomSearchOracle + +from axolotl.algorithms.tuners.oracle import infer_metric_direction, random_values + + +class RandomSearchOracle(KerasRandomSearchOracle): + """ + Random search oracle. + """ + + def __init__(self, + objective, + max_trials, + seed=None, + hyperparameters=None, + allow_new_entries=True, + tune_new_entries=True): + direction = infer_metric_direction(objective) + objective = Objective(name=objective, direction=direction) + super(RandomSearchOracle, self).__init__( + objective=objective, + max_trials=max_trials, + seed=seed, + hyperparameters=hyperparameters, + tune_new_entries=tune_new_entries, + allow_new_entries=allow_new_entries) + + def _populate_space(self, _): + values = self._random_values() + if values is None: + return {'status': trial_lib.TrialStatus.STOPPED, + 'values': None} + return {'status': trial_lib.TrialStatus.RUNNING, + 'values': values} + + def _random_values(self): + """Fills the hyperparameter space with random values. + + Returns: + A dictionary mapping parameter names to suggested values. + """ + + values, seed_state = random_values(hyperparameters=self.hyperparameters, + seed_state=self._seed_state, + tried_so_far=self._tried_so_far, + max_collisions=self._max_collisions, + ) + self._seed_state = seed_state + return values + + def _save_trial(self, trial): + pass + + def get_state(self): + # `self.trials` are saved in their own, Oracle-agnostic files. + # Just save the IDs for ongoing trials, since these are in `trials`. + state = {} + state['ongoing_trials'] = { + tuner_id: trial.trial_id + for tuner_id, trial in self.ongoing_trials.items()} + # Hyperparameters are part of the state because they can be added to + # during the course of the search. + state['hyperparameters'] = str(self.hyperparameters.get_config()) + return state diff --git a/axolotl/axolotl/algorithms/tuners/tunable_base.py b/axolotl/axolotl/algorithms/tuners/tunable_base.py new file mode 100644 index 0000000..1c789e4 --- /dev/null +++ b/axolotl/axolotl/algorithms/tuners/tunable_base.py @@ -0,0 +1,258 @@ +import logging +import multiprocessing + +import os +import uuid +import copy +from typing import Tuple +import re +import numpy as np + +from d3m.metadata import hyperparams +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline + +from kerastuner.engine import trial as trial_module + +from axolotl import predefined_pipelines +from axolotl.algorithms.tuners import custom_hps +from axolotl.algorithms.base import PipelineSearchBase +from axolotl.algorithms.dummy import dummy_ranking_function +from axolotl.algorithms.tuners.hyperparameters import HyperParameters, PIPELINE_CHOICE +from axolotl.utils import schemas as schemas_utils + +logger = logging.getLogger(__name__) + + +class TunableBase(PipelineSearchBase): + + def __init__(self, problem_description, backend, + primitives_blocklist=None, ranking_function=None, num_eval_trials=None): + if ranking_function is None: + ranking_function = dummy_ranking_function + if num_eval_trials is None: + num_eval_trials = multiprocessing.cpu_count() + super(TunableBase, self).__init__(problem_description, backend, + primitives_blocklist=primitives_blocklist, ranking_function=ranking_function) + # TODO update this to be defined on problem/metrics terms + self.data_preparation_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA") + self.data_preparation_params = schemas_utils.DATA_PREPARATION_PARAMS['no_split'] + + self.scoring_pipeline = schemas_utils.get_scoring_pipeline() + self.scoring_params = None + + self.metrics = problem_description['problem']['performance_metrics'] + + self.oracle = None + self.tuner_id = 'tuner' + self.hyperparameters = HyperParameters() + self.pipeline_candidates = {} + self.num_eval_trials = num_eval_trials + + def set_pipeline_candidates(self, input_data, pipeline_candidates): + if pipeline_candidates is None: + problem = self.problem_description + # ToDo should use fetch(input_data, problem, schemas_utils.PIPELINES_DB_DIR) + for pipeline in predefined_pipelines.fetch_from_file(problem, schemas_utils.PIPELINES_DB_DIR): + self.pipeline_candidates[pipeline.id] = pipeline + elif isinstance(pipeline_candidates, list): + for pipeline in pipeline_candidates: + self.pipeline_candidates[pipeline.id] = pipeline + elif isinstance(pipeline_candidates, dict): + self.pipeline_candidates = pipeline_candidates + else: + raise ValueError('pipeline_candidate should be None, list or dict') + + def init_search_space(self): + pipeline_id = hyperparams.Enumeration[str]( + values=list(self.pipeline_candidates.keys()), + default=list(self.pipeline_candidates.keys())[0], + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] + ) + self.hyperparameters.retrieve(PIPELINE_CHOICE, pipeline_id) + for pipeline in self.pipeline_candidates.values(): + self._get_pipeline_search_space(pipeline) + + def _get_pipeline_search_space(self, pipeline): + PREFIX_STEP = 'step' + with self.hyperparameters.conditional_scope(PIPELINE_CHOICE, pipeline.id): + for i, step in enumerate(pipeline.steps): + with self.hyperparameters.name_scope('{}{}'.format(PREFIX_STEP, i)): + primitive = step.primitive + self._get_primitive_search_space(primitive) + + def _get_primitive_search_space(self, primitive): + hyperparameters = primitive.metadata.query()['primitive_code']['hyperparams'] + primitive_python_path = primitive.metadata.query()['python_path'] + name = primitive_python_path + config = primitive.metadata.query()['primitive_code']['class_type_arguments']['Hyperparams'].configuration + custom_config = custom_hps.config.get(primitive_python_path, None) + if not custom_config is None: + config._dict.update(custom_config) + with self.hyperparameters.name_scope(name): + for param_name, param_info in hyperparameters.items(): + if self.is_tunable(param_info['semantic_types']): + param_val = config[param_name] + # SortedSet.to_simple_structure() has bug, so we skip it. + if isinstance(param_val, (hyperparams.List, hyperparams.Set)): + continue + self.hyperparameters.retrieve(param_name, param_val) + if isinstance(param_val, hyperparams.Choice): + for choice_name, choice_val in param_val.choices.items(): + with self.hyperparameters.conditional_scope(param_name, choice_name): + for sub_param_name, sub_param_val in choice_val.configuration.items(): + if sub_param_name != 'choice': + self.hyperparameters.retrieve(sub_param_name, sub_param_val) + + def is_tunable(self, semantic_types: Tuple[str, ...]) -> bool: + return any('tuning' in t.lower() for t in semantic_types) + + def search_fit(self, input_data, time_limit=300, *, expose_values=False, pipeline_candidates=None): + self.set_pipeline_candidates(input_data, pipeline_candidates) + self.init_search_space() + return super(TunableBase, self).search_fit(input_data, time_limit, expose_values=expose_values) + + def _search(self, time_left): + trials = self.create_trials(num_trials=self.num_eval_trials) + if len(trials) == 0: + logger.info('Oracle trigger exit') + return [] + results = self.run_trials(trials, input_data=self.input_data) + self.end_trials(trials) + return results + + def run_trials(self, trials, **fit_kwargs): + pipelines = [] + id_2_trials = {} + + for trial in trials: + hp = trial.hyperparameters + try: + pipeline = self.build_pipeline(hp) + id_2_trials[pipeline.id] = trial + pipelines.append(pipeline) + except Exception as e: + logger.error('Current trial is failed. Error: {}'.format(e)) + trial.status = trial_module.TrialStatus.INVALID + + input_data = fit_kwargs.pop('input_data') + + pipeline_results = self.backend.evaluate_pipelines( + problem_description=self.problem_description, + pipelines=pipelines, + input_data=input_data, + metrics=self.metrics, + data_preparation_pipeline=self.data_preparation_pipeline, + scoring_pipeline=self.scoring_pipeline, + data_preparation_params=self.data_preparation_params, + ) + + results = [] + for result in pipeline_results: + trial = id_2_trials[result.pipeline.id] + if result.status == 'ERRORED': + logger.error('Current trial is failed. Error: {}'.format(result.error)) + trial.status = trial_module.TrialStatus.INVALID + else: + scores = result.scores + # scores = runtime_module.combine_folds(scores) + summarize_performance = schemas_utils.summarize_performance_metrics(scores) + metrics = self._get_pipeline_metrics(summarize_performance) + self.oracle.update_trial( + trial.trial_id, metrics=metrics + ) + trial.status = trial_module.TrialStatus.COMPLETED + results.append(self.ranking_function(result)) + return results + + def build_pipeline(self, hyperparameters): + """ + hyperparameters example: + { + 'STEP5/d3m.primitives.feature_construction.deep_feature_synthesis.SingleTableFeaturization/max_percent_null: 0, + 'STEP7/d3m.primitives.data_preprocessing.robust_scaler.SKlearn/quantile_range: (2.798121390864261, 14.852664215409096), + } + """ + values = hyperparameters.values + pipeline_id = hyperparameters.get_pipeline_id() + pipeline = copy.deepcopy(self.pipeline_candidates[pipeline_id]) + pipeline.id = str(uuid.uuid4()) + # update time + pipeline.created = Pipeline().created + + skip_hps = set() + # for key in sorted(values.keys()): + for hp in hyperparameters.space: + if hyperparameters.is_active(hp) and hp.name not in skip_hps and hp.name != PIPELINE_CHOICE: + key = hp.name + step, primitive_name, hp_name = hyperparameters.get_name_parts(key) + value = values[key] + step_idx = self.__get_step_idx_by_name(step) + if step_idx is None: + raise KeyError('{} not in the pipeline'.format(primitive_name)) + primitive_step = pipeline.steps[step_idx] + arg_type = ArgumentType.VALUE + # In order to avoid the following error + # Value '0' for hyper-parameter \ + # 'STEP8/d3m.primitives.classification.xgboost_gbtree.DataFrameCommon/max_delta_step' \ + # is not an instance of the structural type: typing.Union[int, NoneType] + # Here is workaround + if isinstance(value, np.int64): + value = int(value) + elif isinstance(value, np.str_): + value = str(value) + elif isinstance(value, np.bool_): + value = bool(value) + if hp_name in primitive_step.hyperparams: + del primitive_step.hyperparams[hp_name] + # Handle Choice + if isinstance(hp, hyperparams.Choice): + choice_cls = hp.choices[value] + _vals = {} + for name in choice_cls.configuration: + if name == 'choice': + _vals[name] = value + else: + _key = os.path.join(step, primitive_name, name) + _vals[name] = values[_key] + skip_hps.add(_key) + value = choice_cls(_vals) + primitive_step.add_hyperparameter(name=hp_name, argument_type=arg_type, + data=value) + return pipeline + + def __get_step_idx_by_name(self, prefix_primitive_name): + regex = r"(?<=STEP)\d+" + match = re.search(regex, prefix_primitive_name, re.IGNORECASE) + if match: + return int(match.group(0)) + return None + + def _get_pipeline_metrics(self, summarize_performance): + metrics = {} + for name, info in summarize_performance.items(): + metrics[name] = info['mean'] + return metrics + + def end_trials(self, trials): + """A hook called after each trial is run. + + # Arguments: + trial: A `Trial` instance. + """ + [self.oracle.end_trial(trial.trial_id, trial.status) for trial in trials] + # self.oracle.update_space(trial.hyperparameters) + + def create_trials(self, num_trials): + trials = [] + for i in range(num_trials): + try: + trial = self.oracle.create_trial('{}_{}'.format(self.tuner_id, i)) + except: + break + + if trial.status == trial_module.TrialStatus.STOPPED: + break + else: + trials.append(trial) + return trials diff --git a/axolotl/axolotl/backend/__init__.py b/axolotl/axolotl/backend/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/axolotl/axolotl/backend/base.py b/axolotl/axolotl/backend/base.py new file mode 100644 index 0000000..e0471d9 --- /dev/null +++ b/axolotl/axolotl/backend/base.py @@ -0,0 +1,313 @@ +import abc +import typing + +from d3m.metadata.problem import Problem, PerformanceMetric +from d3m.metadata.pipeline import Pipeline + +from axolotl.utils.pipeline import PipelineResult +from axolotl.utils.schemas import ContainerType + + +class RunnerBase: + """ + A base class for the pipeline runner backend. + This child from this class must implement ``request_status`` and ``request_results`` which should keep + track of all requests. + + Parameters + ---------- + random_seed : int + Random seed passed to the constructor. + volumes_dir : str + Path to a directory with static files required by primitives. + In the standard directory structure (as obtained running ``python3 -m d3m index download``). + scratch_dir : str + Path to a directory to store any temporary files needed during execution. + + Attributes + ---------- + random_seed : int + Random seed passed to the constructor. + volumes_dir : str + Path to a directory with static files required by primitives. + In the standard directory structure (as obtained running ``python3 -m d3m index download``). + scratch_dir : str + Path to a directory to store any temporary files needed during execution. + """ + def __init__(self, *, random_seed: int = 0, volumes_dir: str = None, scratch_dir: str = None) -> None: + self.random_seed = random_seed + self.volumes_dir = volumes_dir + self.scratch_dir = scratch_dir + + def add_metric(self, name: str, *, best_value: float, worst_value: float, score_class: type, + requires_confidence: bool = False, requires_rank: bool = False): + """ + Method to register a new metric. + + Parameters + ---------- + name : str + Metric name, e.g. ACCURACY. + best_value : float + Value that represents the best e.g. in accuracy 1.0 + worst_value: float + Value that represent the worst e.g. in accuracy 0 + score_class : type + A class that helps computing the score. + requires_confidence : bool + A flag that tells if the scoring function requires a confidence value. + requires_rank : bool + A flag that tell if the scoring function requires the rank of the predictions. + """ + + PerformanceMetric.register_metric(name=name, best_value=best_value, worst_value=worst_value, score_class=score_class, + requires_confidence=requires_confidence, requires_rank=requires_rank) + + @abc.abstractmethod + def get_request(self, request_id: str) -> PipelineResult: + """ + A method that returns the result from the requests + + Parameters + ---------- + request_id : str + Request id of data to retrieve + + Returns + ------- + PipelineResult + A PipelineResult instance that contains the information. + """ + + @abc.abstractmethod + def fit_pipeline_request(self, problem_description: Problem, pipeline: Pipeline, + input_data: typing.Sequence[ContainerType], *, timeout: float = None, + expose_outputs: bool = False) -> str: + """ + A method that submit a fit_pipeline job. + + Parameters + ---------- + problem_description : Problem + A problem description. + pipeline : Pipeline + The pipeline that is going to be fitted. + input_data : typing.Sequence[ContainerType] + A list of D3M containers. + timeout : float + A maximum amount of time that pipelines are going to be executed in seconds. + expose_outputs : bool + A variable that enable exposing every intermediate results based on the input_data + + Returns + ------- + str + A request id. + """ + + def fit_pipeline(self, problem_description: Problem, pipeline: Pipeline, input_data: typing.Sequence[ContainerType], + *, timeout: float = None, expose_outputs: bool = False) -> PipelineResult: + """ + A method that fit a pipeline, save the state and returns a PipelineResult. + + Parameters + ---------- + problem_description : Problem + A problem description. + pipeline : Pipeline + A pipeline that are going to be fitted. + input_data : typing.Sequence[ContainerType] + A list of D3M containers. + timeout : float + A maximum amount of time that pipelines are going to be executed in seconds. + expose_outputs : bool + A variable that enable exposing every intermediate results based on the input_data + + Returns + ------- + PipelineResult + A pipeline result containg the result of fitting the pipeline. + """ + request_id = self.fit_pipeline_request(problem_description=problem_description, pipeline=pipeline, + input_data=input_data, timeout=timeout, + expose_outputs=expose_outputs) + return self.get_request(request_id) + + @abc.abstractmethod + def produce_pipeline_request(self, fitted_pipeline_id: str, input_data: typing.Sequence[ContainerType], *, + timeout: float = None, expose_outputs: bool = False) -> str: + """ + A method that submit a produce pipeline request. + + Parameters + ---------- + fitted_pipeline_id : str + The fitted pipeline if of the fitted pipeline to be use to produce results. + input_data : typing.Sequence[ContainerType] + A list of D3M containers. + timeout : float + A maximum amount of time that pipelines are going to be executed in seconds. + expose_outputs : bool + A variable that enable exposing every intermediate results based on the input_data + + Returns + ------- + str + A request id. + """ + + # @abc.abstractmethod + def produce_pipeline(self, fitted_pipeline_id: str, input_data: typing.Sequence[ContainerType], *, + timeout: float = None, expose_outputs: bool = False) -> PipelineResult: + """ + A method that produce multiple fitted pipelines, save their state and returns a list of PipelineResult + that contain the information of every pipeline run. + + Parameters + ---------- + fitted_pipeline_id : str + A list of fitted pipelines to run with the input_data + input_data : typing.Sequence[ContainerType] + A list of D3M containers. + timeout : float + A maximum amount of time that pipelines are going to be executed in seconds. + expose_outputs : bool + A variable that enable exposing every intermediate results based on the input_data + + Returns + ------- + PipelineResult + A PipelineResult intance containing the information about the produced pipeline. + """ + request_id = self.produce_pipeline_request(fitted_pipeline_id, input_data, timeout=timeout, + expose_outputs=expose_outputs) + return self.get_request(request_id) + + @abc.abstractmethod + def evaluate_pipeline_request( + self, problem_description: Problem, pipeline: Pipeline, + input_data: typing.Sequence[ContainerType], *, metrics: typing.Sequence[typing.Dict], + data_preparation_pipeline: Pipeline = None, scoring_pipeline: Pipeline = None, + data_preparation_params: typing.Dict[str, str] = None, scoring_params: typing.Dict[str, str] = None, + timeout: float = None + ) -> str: + """ + A method that evaluate multiple pipelines, and provides returns the scores and information of the pipelines. + + Parameters + ---------- + problem_description : Problem + A problem description. + pipeline : Pipeline + A list of pipelines that are going to be run. + input_data : typing.Sequence[ContainerType] + A list of D3M containers. + metrics : typing.Sequence[typing.Dict] + A dictionary containing the metrics and their arguments. + data_preparation_pipeline : Pipeline + A pipeline that prepares the data for the pipelines to be evaluated in, e.g. Cross-fold validation + scoring_pipeline : Pipeline + A pipeline that is used to compute the scores of the pipelines. + data_preparation_params : typing.Dict[str, str] + Parameters for the data preparation pipeline + scoring_params: typing.Dict[str, str] + Parameters for the scoring pipeline + timeout : float + A maximum amount of time that pipelines are going to be executed in seconds. + + Returns + ------- + str + A request id + """ + + def evaluate_pipeline( + self, problem_description: Problem, pipeline: Pipeline, + input_data: typing.Sequence[ContainerType], *, metrics: typing.Sequence[typing.Dict], + data_preparation_pipeline: Pipeline = None, scoring_pipeline: Pipeline = None, + data_preparation_params: typing.Dict[str, str] = None, scoring_params: typing.Dict[str, str] = None, + timeout: float = None + ) -> PipelineResult: + """ + A method that evaluate multiple pipelines, and provides returns the scores and information of the pipelines. + + Parameters + ---------- + problem_description : Problem + A problem description. + pipeline : Pipeline + A pipeline that is going to be evaluated. + input_data : typing.Sequence[ContainerType] + A list of D3M containers. + metrics : typing.Sequence[typing.Dict] + A dictionary containing the metrics and their arguments. + data_preparation_pipeline : Pipeline + A pipeline that prepares the data for the pipelines to be evaluated in, e.g. Cross-fold validation + scoring_pipeline : Pipeline + A pipeline that is used to compute the scores of the pipelines. + data_preparation_params : typing.Dict[str, str] + Parameters for the data preparation pipeline + scoring_params: typing.Dict[str, str] + Parameters for the scoring pipeline + timeout : float + A maximum amount of time that pipelines are going to be executed in seconds. + + Returns + ------- + PipelineResult + Result of the evaluation of the pipeline. + """ + request_id = self.evaluate_pipeline_request( + problem_description, pipeline, input_data, metrics=metrics, + data_preparation_pipeline=data_preparation_pipeline, scoring_pipeline=scoring_pipeline, + data_preparation_params=data_preparation_params, scoring_params=scoring_params, timeout=timeout + ) + return self.get_request(request_id) + + def evaluate_pipelines( + self, problem_description: Problem, pipelines: typing.Sequence[Pipeline], + input_data: typing.Sequence[ContainerType], *, metrics: typing.Sequence[typing.Dict], + data_preparation_pipeline: Pipeline = None, scoring_pipeline: Pipeline = None, + data_preparation_params: typing.Dict[str, str] = None, scoring_params: typing.Dict[str, str] = None, + timeout: float = None + ) -> typing.Sequence[PipelineResult]: + """ + A method that evaluate multiple pipelines, and provides returns the scores and information of the pipelines. + + Parameters + ---------- + problem_description : Problem + A problem description. + pipelines : typing.Sequence[str] + A list of pipelines that are going to be run. + input_data : typing.Sequence[ContainerType] + A list of D3M containers. + metrics : typing.Sequence[typing.Dict] + A dictionary containing the metrics and their arguments. + data_preparation_pipeline : Pipeline + A pipeline that prepares the data for the pipelines to be evaluated in, e.g. Cross-fold validation + scoring_pipeline : Pipeline + A pipeline that is used to compute the scores of the pipelines. + data_preparation_params : typing.Dict[str, str] + Parameters for the data preparation pipeline + scoring_params: typing.Dict[str, str] + Parameters for the scoring pipeline + timeout : float + A maximum amount of time that pipelines are going to be executed in seconds. + + Returns + ------- + typing.Sequence[PipelineResult] + A sequence of PipelineResults. + """ + request_ids = [] + for pipeline in pipelines: + request_ids.append( + self.evaluate_pipeline_request( + problem_description, pipeline, input_data, metrics=metrics, + data_preparation_pipeline=data_preparation_pipeline, scoring_pipeline=scoring_pipeline, + data_preparation_params=data_preparation_params, scoring_params=scoring_params, timeout=timeout + ) + ) + + return [self.get_request(request_id) for request_id in request_ids] diff --git a/axolotl/axolotl/backend/ray.py b/axolotl/axolotl/backend/ray.py new file mode 100644 index 0000000..f93e3dd --- /dev/null +++ b/axolotl/axolotl/backend/ray.py @@ -0,0 +1,269 @@ +import ray +import typing +import uuid +import binascii +import hashlib +import time +from ray.util import ActorPool + +from d3m import index as d3m_index +from d3m import utils as d3m_utils +from d3m import runtime as runtime_module +from d3m.metadata.problem import Problem +from d3m.metadata.pipeline import Pipeline +from d3m.metadata.base import Context +from d3m.metadata import pipeline_run as pipeline_run_module +from d3m import container as container_module + +from axolotl.backend.base import RunnerBase +from axolotl.utils.pipeline import PipelineResult, save_pipeline_run, save_exposed_values +from axolotl.utils.schemas import ContainerType +import multiprocessing + + +@ray.remote +class DataHandler: + def __init__(self): + self.data = {} + + def add_data(self, input_data): + if isinstance(input_data, list): + values = [] + for _data in input_data: + if isinstance(_data, container_module.Dataset): + values.append(_data.metadata.query(())['id']) + + data_id = str(hashlib.sha256(str(values).encode('utf8')).hexdigest()) + if data_id not in self.data: + self.data[data_id] = input_data + return data_id + + def get_data(self, data_id): + if data_id in self.data: + return self.data[data_id] + + +@ray.remote +class RayExecutor: + def __init__(self, *, random_seed: int = 0, volumes_dir: str = None, scratch_dir: str = None, store_results=False, + blocklist=()) -> None: + self.random_seed = random_seed + self.volumes_dir = volumes_dir + self.scratch_dir = scratch_dir + self.fitted_pipelines = {} + with d3m_utils.silence(): + d3m_index.load_all(blocklist=blocklist) + self.runtime_environment = pipeline_run_module.RuntimeEnvironment() + self.store_results = store_results + + def fit_pipeline( + self, data_handler, problem_description: Problem, pipeline: Pipeline, + input_data_id: str, *, timeout: float = None, expose_outputs: bool = False + ) -> PipelineResult: + pipeline_result = PipelineResult(pipeline=pipeline) + pipeline_result.status = "RUNNING" + pipeline_result.method_called = "fit" + + request_id = data_handler.get_data.remote(input_data_id) + input_data = ray.get(request_id) + + is_standard_pipeline = False + if len(input_data) == 1 and len(pipeline.outputs) == 1: + is_standard_pipeline = True + + with d3m_utils.silence(): + runtime, output, result = runtime_module.fit( + pipeline=pipeline, inputs=input_data, problem_description=problem_description, context=Context.TESTING, + hyperparams=None, random_seed=self.random_seed, volumes_dir=self.volumes_dir, + scratch_dir=self.scratch_dir, + runtime_environment=self.runtime_environment, is_standard_pipeline=is_standard_pipeline, + expose_produced_outputs=expose_outputs + ) + + if result.has_error(): + pipeline_result.status = "ERRORED" + pipeline_result.error = result.error + else: + pipeline_result.status = "COMPLETED" + fitted_pipeline_id = str(uuid.uuid4()) + + if self.store_results: + pipeline_result.exposed_outputs = save_exposed_values(result.values, pipeline.id, self.scratch_dir) + pipeline_result.output = save_exposed_values(output, pipeline.id, self.scratch_dir) + else: + pipeline_result.exposed_outputs = result.values + pipeline_result.output = output + + pipeline_result.fitted_pipeline_id = fitted_pipeline_id + self.fitted_pipelines[fitted_pipeline_id] = runtime + + if self.store_results: + pipeline_result.pipeline_run = save_pipeline_run(result.pipeline_run, self.scratch_dir) + + return pipeline_result + + def produce_pipeline( + self, data_handler, fitted_pipeline_id: str, input_data_id: str, *, + timeout: float = None, expose_outputs: bool = False + ) -> PipelineResult: + + pipeline_result = PipelineResult(fitted_pipeline_id=fitted_pipeline_id) + pipeline_result.status = "RUNNING" + pipeline_result.method_called = "produce" + pipeline_result.fitted_pipeline_id = fitted_pipeline_id + + request_id = data_handler.get_data.remote(input_data_id) + input_data = ray.get(request_id) + + with d3m_utils.silence(): + output, result = runtime_module.produce( + fitted_pipeline=self.fitted_pipelines[fitted_pipeline_id], test_inputs=input_data, + expose_produced_outputs=expose_outputs + ) + + if result.has_error(): + pipeline_result.status = "ERRORED" + pipeline_result.error = result.error + else: + pipeline_result.status = "COMPLETED" + if self.store_results: + pipeline_result.exposed_outputs = save_exposed_values(result.values, fitted_pipeline_id, self.scratch_dir) + pipeline_result.output = save_exposed_values(output, fitted_pipeline_id, self.scratch_dir) + else: + pipeline_result.exposed_outputs = result.values + pipeline_result.output = output + + if self.store_results: + pipeline_result.pipeline_run = save_pipeline_run(result.pipeline_run, self.scratch_dir) + + return pipeline_result + + def evaluate_pipeline( + self, data_handler, problem_description: Problem, pipeline: Pipeline, + input_data_id: str, *, metrics: typing.Sequence[typing.Dict], + data_preparation_pipeline: Pipeline = None, scoring_pipeline: Pipeline = None, + data_preparation_params: typing.Dict[str, str] = None, scoring_params: typing.Dict[str, str] = None, + timeout: float = None + ) -> PipelineResult: + + with d3m_utils.silence(): + pipeline_result = PipelineResult(pipeline=pipeline) + pipeline_result.status = "RUNNING" + pipeline_result.method_called = "evaluate" + + request_id = data_handler.get_data.remote(input_data_id) + input_data = ray.get(request_id) + + with d3m_utils.silence(): + scores, results = runtime_module.evaluate( + pipeline=pipeline, inputs=input_data, data_pipeline=data_preparation_pipeline, + scoring_pipeline=scoring_pipeline, problem_description=problem_description, + data_params=data_preparation_params, metrics=metrics, context=Context.TESTING, + scoring_params=scoring_params, hyperparams=None, random_seed=self.random_seed, + data_random_seed=self.random_seed, scoring_random_seed=self.random_seed, + volumes_dir=self.volumes_dir, scratch_dir=self.scratch_dir, runtime_environment=self.runtime_environment + ) + + if results.has_error(): + pipeline_result.status = "ERRORED" + pipeline_result.error = [result.error for result in results] + else: + pipeline_result.status = "COMPLETED" + pipeline_result.scores = runtime_module.combine_folds(scores) + + if self.store_results: + pipeline_result.pipeline_run = save_pipeline_run(results.pipeline_runs, self.scratch_dir) + return pipeline_result + + def fitted_pipeline_id_exists(self, fitted_pipeline_id): + return fitted_pipeline_id in self.fitted_pipelines + + +class RayRunner(RunnerBase): + def __init__(self, *, random_seed: int = 0, volumes_dir: str = None, scratch_dir: str = None, + store_results=False, n_workers=None, blocklist=()) -> None: + if not ray.is_initialized(): + ray.init() + + super().__init__(random_seed=random_seed, volumes_dir=volumes_dir, scratch_dir=scratch_dir) + self.data_handler = DataHandler.remote() + self.ray_executor = RayExecutor.remote(random_seed=random_seed, + volumes_dir=volumes_dir, scratch_dir=scratch_dir, + store_results=store_results,blocklist=blocklist) + + if n_workers is None: + n_workers = multiprocessing.cpu_count() + self.actor_pool = ActorPool([ + RayExecutor.remote(random_seed=random_seed, volumes_dir=volumes_dir, + scratch_dir=scratch_dir, store_results=store_results, + blocklist=blocklist) for _ in range(n_workers)] + ) + + # Wait for primitives to be load on the workers + # time.sleep(len(d3m_index.search()) * 0.15) + + def stop_ray(self): + ray.shutdown() + + def get_request(self, request_id: str): + return ray.get(ray.ObjectID(binascii.unhexlify(request_id))) + + def fit_pipeline_request(self, problem_description: Problem, pipeline: Pipeline, + input_data: typing.Sequence[ContainerType], *, timeout: float = None, + expose_outputs: bool = False) -> str: + + request_id = self.data_handler.add_data.remote(input_data) + input_data_id = ray.get(request_id) + request_id = self.ray_executor.fit_pipeline.remote(self.data_handler, problem_description, pipeline, input_data_id, + timeout=timeout, expose_outputs=expose_outputs) + return request_id.hex() + + def produce_pipeline_request(self, fitted_pipeline_id: str, input_data: typing.Sequence[ContainerType], *, + timeout: float = None, expose_outputs: bool = False) -> str: + request_id = self.data_handler.add_data.remote(input_data) + input_data_id = ray.get(request_id) + request_id = self.ray_executor.produce_pipeline.remote(self.data_handler, fitted_pipeline_id, input_data_id, timeout=timeout, + expose_outputs=expose_outputs) + return request_id.hex() + + def evaluate_pipeline_request( + self, problem_description: Problem, pipeline: Pipeline, + input_data: typing.Sequence[ContainerType], *, metrics: typing.Sequence[typing.Dict], + data_preparation_pipeline: Pipeline = None, scoring_pipeline: Pipeline = None, + data_preparation_params: typing.Dict[str, str] = None, scoring_params: typing.Dict[str, str] = None, + timeout: float = None + ) -> str: + request_id = self.data_handler.add_data.remote(input_data) + input_data_id = ray.get(request_id) + + request_id = self.ray_executor.evaluate_pipeline.remote( + self.data_handler, problem_description, pipeline, input_data_id, metrics=metrics, + data_preparation_pipeline=data_preparation_pipeline, scoring_pipeline=scoring_pipeline, + data_preparation_params=data_preparation_params, scoring_params=scoring_params, timeout=timeout + ) + return request_id.hex() + + def fitted_pipeline_id_exists(self, fitted_pipeline_id): + request_id = self.ray_executor.fitted_pipeline_id_exists.remote(fitted_pipeline_id) + return ray.get(request_id) + + def evaluate_pipelines( + self, problem_description: Problem, pipelines: typing.Sequence[Pipeline], + input_data: typing.Sequence[ContainerType], *, metrics: typing.Sequence[typing.Dict], + data_preparation_pipeline: Pipeline = None, scoring_pipeline: Pipeline = None, + data_preparation_params: typing.Dict[str, str] = None, scoring_params: typing.Dict[str, str] = None, + timeout: float = None + ) -> typing.Sequence[PipelineResult]: + request_id = self.data_handler.add_data.remote(input_data) + input_data_id = ray.get(request_id) + + args = [] + for pipeline in pipelines: + args.append({ + 'data_handler': self.data_handler, 'problem_description': problem_description, 'pipeline': pipeline, + 'input_data_id': input_data_id, 'metrics': metrics, 'data_preparation_pipeline': data_preparation_pipeline, + 'scoring_pipeline': scoring_pipeline,'data_preparation_params': data_preparation_params, + 'scoring_params': scoring_params,'timeout': timeout + }) + + return self.actor_pool.map(lambda actor, arg: actor.evaluate_pipeline.remote(**arg), args) diff --git a/axolotl/axolotl/backend/simple.py b/axolotl/axolotl/backend/simple.py new file mode 100644 index 0000000..2d6b9ad --- /dev/null +++ b/axolotl/axolotl/backend/simple.py @@ -0,0 +1,178 @@ +import typing +import uuid + +from d3m import utils as d3m_utils +from d3m import runtime as runtime_module +from d3m.metadata.problem import Problem +from d3m.metadata.pipeline import Pipeline +from d3m.metadata.base import Context +from d3m.metadata import pipeline_run as pipeline_run_module + +from axolotl.backend.base import RunnerBase +from axolotl.utils.pipeline import PipelineResult +from axolotl.utils.schemas import ContainerType + + +class SimpleRunner(RunnerBase): + def __init__(self, *, random_seed: int = 0, volumes_dir: str = None, scratch_dir: str = None) -> None: + super().__init__(random_seed=random_seed, volumes_dir=volumes_dir, scratch_dir=scratch_dir) + self.fitted_pipelines = {} + self.request_results = {} + + with d3m_utils.silence(): + self.runtime_environment = pipeline_run_module.RuntimeEnvironment() + + def get_request(self, request_id: str) -> PipelineResult: + """ + A method that returns the result from the requests + + Parameters + ---------- + request_id : str + Request id of data to retrieve + + Returns + ------- + PipelineResult + A PipelineResult instance that contains the information. + """ + if request_id in self.request_results: + return self.request_results[request_id] + else: + return PipelineResult(fitted_pipeline_id='') + + def fit_pipeline_request(self, problem_description: Problem, pipeline: Pipeline, + input_data: typing.Sequence[ContainerType], *, timeout: float = None, + expose_outputs: bool = False) -> str: + """ + A method that submit a fit_pipeline job. + + Parameters + ---------- + problem_description : Problem + A problem description. + pipeline : Pipeline + The pipeline that is going to be fitted. + input_data : typing.Sequence[ContainerType] + A list of D3M containers. + timeout : float + A maximum amount of time that pipelines are going to be executed in seconds. + expose_outputs : bool + A variable that enable exposing every intermediate results based on the input_data + + Returns + ------- + str + A request id. + """ + request_id = str(uuid.uuid4()) + pipeline_result = PipelineResult(pipeline=pipeline) + pipeline_result.status = "RUNNING" + pipeline_result.method_called = "fit" + + is_standard_pipeline = False + if len(input_data) == 1 and len(pipeline.outputs) == 1: + is_standard_pipeline = True + + runtime, output, result = runtime_module.fit( + pipeline=pipeline, inputs=input_data, problem_description=problem_description, context=Context.TESTING, + hyperparams=None, random_seed=self.random_seed, volumes_dir=self.volumes_dir, + scratch_dir=self.scratch_dir, + runtime_environment=self.runtime_environment, is_standard_pipeline=is_standard_pipeline, + expose_produced_outputs=expose_outputs + ) + + if result.has_error(): + pipeline_result.status = "ERRORED" + pipeline_result.error = result.error + else: + pipeline_result.status = "COMPLETED" + pipeline_result.exposed_outputs = result.values + pipeline_result.output = output + fitted_pipeline_id = str(uuid.uuid4()) + pipeline_result.fitted_pipeline_id = fitted_pipeline_id + self.fitted_pipelines[fitted_pipeline_id] = runtime + + pipeline_result.pipeline_run = result.pipeline_run + self.request_results[request_id] = pipeline_result + + return request_id + + def produce_pipeline_request(self, fitted_pipeline_id: str, input_data: typing.Sequence[ContainerType], *, + timeout: float = None, expose_outputs: bool = False) -> str: + """ + A method that submit a produce pipeline request. + + Parameters + ---------- + fitted_pipeline_id : str + The fitted pipeline if of the fitted pipeline to be use to produce results. + input_data : typing.Sequence[ContainerType] + A list of D3M containers. + timeout : float + A maximum amount of time that pipelines are going to be executed in seconds. + expose_outputs : bool + A variable that enable exposing every intermediate results based on the input_data + + Returns + ------- + str + A request id. + """ + request_id = str(uuid.uuid4()) + + pipeline_result = PipelineResult(fitted_pipeline_id=fitted_pipeline_id) + pipeline_result.status = "RUNNING" + pipeline_result.method_called = "produce" + pipeline_result.fitted_pipeline_id = fitted_pipeline_id + + output, result = runtime_module.produce( + fitted_pipeline=self.fitted_pipelines[fitted_pipeline_id], test_inputs=input_data, + expose_produced_outputs=expose_outputs + ) + + if result.has_error(): + pipeline_result.status = "ERRORED" + pipeline_result.error = result.error + else: + pipeline_result.status = "COMPLETED" + pipeline_result.output = output + pipeline_result.exposed_outputs = result.values + + pipeline_result.pipeline_run = result.pipeline_run + self.request_results[request_id] = pipeline_result + + return request_id + + def evaluate_pipeline_request( + self, problem_description: Problem, pipeline: Pipeline, + input_data: typing.Sequence[ContainerType], *, metrics: typing.Sequence[typing.Dict], + data_preparation_pipeline: Pipeline = None, scoring_pipeline: Pipeline = None, + data_preparation_params: typing.Dict[str, str] = None, scoring_params: typing.Dict[str, str] = None, + timeout: float = None + ) -> str: + request_id = str(uuid.uuid4()) + + pipeline_result = PipelineResult(pipeline=pipeline) + pipeline_result.status = "RUNNING" + pipeline_result.method_called = "evaluate" + + scores, results = runtime_module.evaluate( + pipeline=pipeline, inputs=input_data, data_pipeline=data_preparation_pipeline, + scoring_pipeline=scoring_pipeline, problem_description=problem_description, + data_params=data_preparation_params, metrics=metrics, context=Context.TESTING, + scoring_params=scoring_params, hyperparams=None, random_seed=self.random_seed, + data_random_seed=self.random_seed, scoring_random_seed=self.random_seed, + volumes_dir=self.volumes_dir, scratch_dir=self.scratch_dir, runtime_environment=self.runtime_environment + ) + + if results.has_error(): + pipeline_result.status = "ERRORED" + pipeline_result.error = [result.error for result in results] + else: + pipeline_result.status = "COMPLETED" + pipeline_result.scores = runtime_module.combine_folds(scores) + + self.request_results[request_id] = pipeline_result + return request_id + diff --git a/axolotl/axolotl/d3m_grpc/__init__.py b/axolotl/axolotl/d3m_grpc/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/axolotl/axolotl/d3m_grpc/constants.py b/axolotl/axolotl/d3m_grpc/constants.py new file mode 100644 index 0000000..e1f0f4b --- /dev/null +++ b/axolotl/axolotl/d3m_grpc/constants.py @@ -0,0 +1,127 @@ +import os +import json +import re + +from axolotl.utils.resources import check_directory + + +# A class to wrap envrioment variables under d3m scope. +class EnvVars: + # A label what is the setting under which the pod is being run; possible + # values: ta2, ta2ta3; this variable is available only for informative + # purposes but it is not used anymore to change an overall mode of operation + # of TA2 system because now TA2 evaluation will happen through TA2-TA3 API + # as well + D3MRUN = 'run' + PROJECT_ROOT = os.path.join(os.path.dirname(__file__), '../..') + # A location of dataset(s), can contain multiple datasets in arbitrary + # directory structure, read-only + D3MINPUTDIR = '/input_dir' + # A location to problem description to use (should be under D3MINPUTDIR), + # datasets are linked from the problem description using IDs, those datasets + # should exist inside D3MINPUTDIR + D3MPROBLEMPATH = 'problem_path' + # A location of output files, shared by TA2 and TA3 pods (and probably data + # mart) + D3MOUTPUTDIR = os.path.join(PROJECT_ROOT, 'output_dir') + # A local-to-host directory provided; used by memory sharing mechanisms + D3MLOCALDIR = os.path.join(D3MOUTPUTDIR, 'temp', 'plasma') + # A path to the volume with primitives' static files + D3MSTATICDIR = None + # Available CPU units in Kubernetes specification + D3MCPU = 0 + # Available CPU units in Kubernetes specification + D3MRAM = 0 + # Time limit for the search phase (available to the pod), in seconds + D3MTIMEOUT = -1 + + # Plasma socket + PLASMA_SOCKET = '/tmp/plasma' + + # datamart uri DATAMART_URL_NYU + DATAMART_URL_NYU = 'https://datamart.d3m.vida-nyu.org' + + if 'D3MRUN' in os.environ: + D3MRUN = os.environ['D3MRUN'] + if 'D3MINPUTDIR' in os.environ: + D3MINPUTDIR = os.environ['D3MINPUTDIR'] + if 'D3MPROBLEMPATH' in os.environ: + D3MPROBLEMPATH = os.environ['D3MPROBLEMPATH'] + if 'D3MOUTPUTDIR' in os.environ: + D3MOUTPUTDIR = os.environ['D3MOUTPUTDIR'] + if 'D3MLOCALDIR' in os.environ: + D3MLOCALDIR = os.environ['D3MLOCALDIR'] + if 'D3MSTATICDIR' in os.environ: + D3MSTATICDIR = os.environ['D3MSTATICDIR'] + if 'D3MCPU' in os.environ: + D3MCPU = int(float(os.environ['D3MCPU'])) + # if we don't set it or its to low set to 4 + # if D3MCPU < 4: + # D3MCPU = 4 + if 'D3MRAM' in os.environ: + D3MRAM = int(re.search(r'\d+', os.environ['D3MRAM']).group()) + if 'D3MTIMEOUT' in os.environ: + D3MTIMEOUT = os.environ['D3MTIMEOUT'] + if 'PLASMA_SOCKET' in os.environ: + PLASMA_SOCKET = os.environ['PLASMA_SOCKET'] + if 'DATAMART_URL_NYU' in os.environ: + DATAMART_URL_NYU = os.environ['DATAMART_URL_NYU'] + + +# # +class Path: + # Temporary directories. + # A temporary directory for other things. + TEMP_STORAGE_ROOT = os.path.join(EnvVars.D3MOUTPUTDIR, 'temp/') + # A temporary directory to store other stuff between ta2-ta3 + OTHER_OUTPUTS = os.path.join(TEMP_STORAGE_ROOT, 'other_outputs') + # To deprecate after figure out what to do with executables. + TEMP_PROBLEM_DESC = os.path.join(TEMP_STORAGE_ROOT, 'problem_description') + + check_directory(TEMP_STORAGE_ROOT) + check_directory(OTHER_OUTPUTS) + check_directory(TEMP_PROBLEM_DESC) + + +class SearchPath: + + def __init__(self, search_id): + self.base_path = os.path.join(EnvVars.D3MOUTPUTDIR, search_id) + + # A directory with ranked pipelines to be evaluated, named + # .json; these files should have additional field pipeline_rank + self.pipelines_ranked = os.path.join(self.base_path, 'pipelines_ranked') + check_directory(self.pipelines_ranked) + + # A directory with successfully scored pipelines during the search, + # named .json + self.pipelines_scored = os.path.join(self.base_path, 'pipelines_scored') + check_directory(self.pipelines_scored) + # A directory of full pipelines which have not been scored or ranked for any + # reason, named .json + self.pipelines_searched = os.path.join(self.base_path, 'pipelines_searched') + check_directory(self.pipelines_searched) + # A directory with any subpipelines referenced from pipelines in + # pipelines_* directories, named .json + self.subpipelines = os.path.join(self.base_path, 'subpipelines') + check_directory(self.subpipelines) + # A directory with pipeline run records in YAML format, multiple can be + # stored in the same file, named .yml + self.pipeline_runs = os.path.join(self.base_path, 'pipeline_runs') + check_directory(self.pipeline_runs) + # A directory where TA2 system can store any additional datasets to be + # provided during training and testing to their pipelines; each dataset + # should be provided in a sub-directory in a D3M dataset format; all + # datasets here should have an unique ID; in the case that additional + # datasets are provided, TA2 should output also pipeline run documents for + # their ranked pipelines because those pipeline run documents contain + # information how to map these additional inputs to pipeline inputs + self.additional_inputs = os.path.join(self.base_path, 'additional_inputs') + check_directory(self.additional_inputs) + + +# A class that wraps a block list of primitives +# To generate this list is necessary to run modules.utils.primitive_selection +class PrimitivesList: + with open(os.path.join(os.path.dirname(__file__), '..', 'utils', 'resources', 'blocklist.json'), 'r') as file: + BlockList = json.load(file) diff --git a/axolotl/axolotl/d3m_grpc/server.py b/axolotl/axolotl/d3m_grpc/server.py new file mode 100644 index 0000000..7964470 --- /dev/null +++ b/axolotl/axolotl/d3m_grpc/server.py @@ -0,0 +1,854 @@ +import argparse +import json +import logging +import os +import pathlib +import time +import warnings +from concurrent import futures +import ray +import os +import uuid + +import google.protobuf.timestamp_pb2 as p_timestamp +import grpc +from d3m import utils as d3m_utils, index as d3m_index +from d3m.metadata import problem as problem_module +from d3m.metadata.pipeline import Resolver +from d3m import container +from d3m import runtime as runtime_module +from d3m.metadata.base import Context +from ta3ta2_api import core_pb2, core_pb2_grpc, primitive_pb2, value_pb2, utils + +from axolotl.backend.ray import RayRunner +from axolotl.algorithms.dummy import DummySearch, dummy_ranking_function +from axolotl.algorithms.data_driven_search import DataDrivenSearch +from axolotl.utils.pipeline import load_pipeline, save_pipeline +from axolotl.d3m_grpc.constants import SearchPath, EnvVars, PrimitivesList, Path +from axolotl.utils import resources as resources_module, schemas as schemas_utils + +from pprint import pprint + + +__version__ = '2020.4.4_pre' +_ONE_DAY_IN_SECONDS = 60 * 60 * 24 + +logger = logging.getLogger(__name__) +AGENT = 'TAMU.10.0_pre' +ALLOWED_VALUE_TYPES = ['RAW', 'DATASET_URI', 'CSV_URI'] +SUPPORTED_EXTENSIONS = [] + + +def available_primitives(): + primitives_info = [] + + with d3m_utils.silence(): + for primitive_path in d3m_index.search(): + if primitive_path in PrimitivesList.BlockList: + continue + + try: + primitive = d3m_index.get_primitive(primitive_path) + primitive_id = primitive.metadata.query()['id'] + version = primitive.metadata.query()['version'] + python_path = primitive.metadata.query()['python_path'] + name = primitive.metadata.query()['name'] + digest = primitive.metadata.query().get('digest', None) + primitive_info = { + 'id': primitive_id, + 'version': version, + 'python_path': python_path, + 'name': name, + 'digest': digest + } + primitives_info.append(primitive_info) + except: + continue + return primitives_info + + +PRIMITIVES_LIST = available_primitives() + + +@ray.remote +class SearchWrappers: + def __init__(self, search_class, problem_description, backend, primitives_blocklist=None, ranking_function=None, n_workers=2): + self.search_algorithm = search_class(problem_description=problem_description, backend=backend, + primitives_blocklist=primitives_blocklist, ranking_function=ranking_function, + n_workers=n_workers) + self._seen_index = 0 + self.has_input_data = False + self.time_left = None + self.active_search = True + self.save_path = SearchPath(self.search_algorithm.search_id) + + def search_request(self, time_left, input_data=None): + time_start = time.time() + if not self.has_input_data: + self.search_algorithm.input_data = input_data + self.time_left = time_left + self.has_input_data = True + + results = self.search_algorithm._search(time_left) + self.search_algorithm.history += results + succeed_pipelines = [] + for result in results: + print('pipeline', result.pipeline.id, result.status) + # save all results in pipelines searched + save_pipeline(result.pipeline, self.save_path.pipelines_searched) + + # save all pipelines_runs + resources_module.copy_file(result.pipeline_run, self.save_path.pipeline_runs) + + # we filter the ones that were completed + if result.status == 'COMPLETED': + # since we were able to score it, we put a copy into the pipelines_scored directory + save_pipeline(result.pipeline, self.save_path.pipelines_scored) + succeed_pipelines.append(result) + + self.time_left -= time.time() - time_start + return succeed_pipelines + + def end_search(self): + self.active_search = False + + def is_search_active(self): + return self.active_search + + def get_search_id(self): + return self.search_algorithm.search_id + + def get_time_left(self): + return self.time_left + + +class Core(core_pb2_grpc.CoreServicer): + """ + A class that works as a server that provides support for the pipeline searches, and provides the interfaces + defined on the TA3-2 API. + + Attributes + ---------- + version: str + A str that represents the version of the Ta3-2 api that is supporting. + user_agents: dict() + A simple dictionary that keep the relation of the different users. + manager: ExecutionManger + Schedules the searches, and all resources related with the search. + """ + + def __init__(self): + logger.info('########## Initializing Service ##########') + self.version = core_pb2.DESCRIPTOR.GetOptions().Extensions[core_pb2.protocol_version] + self.n_workers = EnvVars.D3MCPU + if self.n_workers > 7: + self.n_workers = int(self.n_workers/2) + 1 + print('Server n_workers', self.n_workers) + self.backend = RayRunner(random_seed=0, volumes_dir=EnvVars.D3MSTATICDIR, scratch_dir=Path.TEMP_STORAGE_ROOT, + blocklist=PrimitivesList.BlockList, store_results=True, n_workers=self.n_workers) + self.searches = {} + self.request_mapping = {} + self.solutions = {} + self.problem_descriptions = {} + + # TODO add support for templates + def SearchSolutions(self, request, context): + user_agent = request.user_agent + logger.info('method=SearchSolution, agent=%s', user_agent) + + # Checking version of protocol. + if request.version != self.version: + logger.info(' method=SearchSolution, info=Different api version%s', self.version) + + # Types allowed by client + allowed_value_types = list(request.allowed_value_types) + + if not allowed_value_types: + allowed_value_types = ALLOWED_VALUE_TYPES + + problem_description = utils.decode_problem_description(request.problem) + + # Parsing and storing Pipeline Template (store this to a file instead of passing it) + with d3m_utils.silence(): + template = utils.decode_pipeline_description(pipeline_description=request.template, + resolver=Resolver(primitives_blocklist=PrimitivesList.BlockList)) + + time_bound_search = request.time_bound_search + time_bound_search = time_bound_search * 60 + + input_data = [load_data(utils.decode_value(x)) for x in request.inputs] + + search = SearchWrappers.remote(search_class=DataDrivenSearch, problem_description=problem_description, + backend=self.backend, primitives_blocklist=PrimitivesList.BlockList, + ranking_function=dummy_ranking_function, n_workers=self.n_workers) + + request_id = search.get_search_id.remote() + search_id = ray.get(request_id) + + # print('got search_id') + self.searches[search_id] = search + request_id = self.searches[search_id].search_request.remote(time_left=time_bound_search, input_data=input_data) + + self.request_mapping[search_id] = request_id + self.solutions[search_id] = [] + self.problem_descriptions[search_id] = problem_description + response = core_pb2.SearchSolutionsResponse(search_id=search_id) + return response + + def GetSearchSolutionsResults(self, request, context): + search_id = request.search_id + logger.info('method=GetSearchSolutionsResults, search_id=%s', search_id) + request_id = self.request_mapping[search_id] + + progress_start = p_timestamp.Timestamp() + progress_end = p_timestamp.Timestamp() + + all_ticks = 0 + done_ticks = 0 + + # Yield running so the client know the search is running. + progress = core_pb2.Progress(state='RUNNING', status='Running Search', start=progress_start) + response = core_pb2.GetSearchSolutionsResultsResponse(progress=progress) + yield response + + has_solution = False + + succeed_pipelines = ray.get(request_id) + time_left_id = self.searches[search_id].get_time_left.remote() + time_left = ray.get(time_left_id) + + while True: + start_time = time.time() + + # if no time left we stop + if time_left < 5: + break + + # case if a signal from EndSolution is sent to stop the search + is_active_id = self.searches[search_id].is_search_active.remote() + is_active = ray.get(is_active_id) + + if not is_active: + logger.info('method=GetSearchSolutionsResults, search_id={} message=SearchStopped'.format(search_id)) + break + + for succeed_pipeline in succeed_pipelines: + has_solution = True + logger.info('method=GetSearchSolutionsResults, search_id={} solution_id={}'.format( + search_id,succeed_pipeline.pipeline.id)) + response = core_pb2.GetSearchSolutionsResultsResponse( + progress=progress, + done_ticks=done_ticks, + all_ticks=all_ticks, + solution_id=succeed_pipeline.pipeline.id, + internal_score=1-succeed_pipeline.rank, + scores=[core_pb2.SolutionSearchScore(scores=encode_scores(succeed_pipeline))] + ) + self.solutions[search_id].append(succeed_pipeline.pipeline.id) + yield response + + finished, running = ray.wait([request_id], timeout=1) + + if finished: + succeed_pipelines = ray.get(request_id) + request_id = self.searches[search_id].search_request.remote(time_left=time_left) + else: + succeed_pipelines = [] + + time.sleep(1) + + time_left -= time.time() - start_time + + if has_solution: + progress_state = 'COMPLETED' + progress_status = 'Search completed' + else: + progress_state = 'ERRORED' + progress_status = 'No solution founded' + + logger.info('method=GetSearchSolutionsResults, search_id={}, status={}, message={}'.format( + search_id, progress_state, progress_status) + ) + progress_end.GetCurrentTime() + progress = core_pb2.Progress(state=progress_state, status=progress_status, + start=progress_start, end=progress_end) + response = core_pb2.GetSearchSolutionsResultsResponse(progress=progress, done_ticks=done_ticks, + all_ticks=all_ticks,) + yield response + + def EndSearchSolutions(self, request, context): + search_id = request.search_id + logger.info('method=EndSearchSolutions search_id=%s', search_id) + ray.kill(self.searches[search_id]) + del self.searches[search_id] + response = core_pb2.EndSearchSolutionsResponse() + return response + + def StopSearchSolutions(self, request, context): + search_id = request.search_id + self.searches[search_id].end_search.remote() + logger.info('method=StopSearchSolutions search_id=%s', search_id) + response = core_pb2.StopSearchSolutionsResponse() + return response + + def DescribeSolution(self, request, context): + solution_id = request.solution_id + logger.info('method=DescribeSolution, solution_id=%s', solution_id) + + pipeline, _, _ = self.get_solution_problem(solution_id) + if pipeline is None: + logger.info('method=DescribeSolution, solution_id=%s, error=Solution_id not found', solution_id) + response = core_pb2.DescribeSolutionResponse() + return response + + with d3m_utils.silence(): + pipeline = utils.encode_pipeline_description(pipeline, ALLOWED_VALUE_TYPES, Path.TEMP_STORAGE_ROOT) + + response = core_pb2.DescribeSolutionResponse(pipeline=pipeline) + return response + + def ScoreSolution(self, request, context): + solution_id = request.solution_id + logger.info('method=SocreSolution, solution_id=%s', solution_id) + + pipeline, problem_description, _ = self.get_solution_problem(solution_id) + if pipeline is None: + logger.info('method=FitSolution, solution_id=%s, status=ERRORED, error=Solution_id not found', solution_id) + response = core_pb2.ScoreSolutionResponse() + return response + + input_data = [load_data(utils.decode_value(x)) for x in request.inputs] + metrics = [utils.decode_performance_metric(metric) for metric in request.performance_metrics] + scoring_pipeline = schemas_utils.get_scoring_pipeline() + data_preparation_params = decode_scoring_configuration(request.configuration) + data_preparation_pipeline = schemas_utils.get_splitting_pipeline(data_preparation_params['method']) + + request_id = self.backend.evaluate_pipeline_request( + problem_description=problem_description, pipeline=pipeline, input_data=input_data, + metrics=metrics, data_preparation_pipeline=data_preparation_pipeline, + scoring_pipeline=scoring_pipeline, data_preparation_params=data_preparation_params) + + response = core_pb2.ScoreSolutionResponse(request_id=request_id) + return response + + def GetScoreSolutionResults(self, request, context): + request_id = request.request_id + logger.info('method=GetScoreSolutionResults, request_id=%s', request_id) + + progress_start = p_timestamp.Timestamp() + progress_end = p_timestamp.Timestamp() + progress_start.GetCurrentTime() + + progress = core_pb2.Progress(state='RUNNING', status='Running score job', start=progress_start) + response = core_pb2.GetScoreSolutionResultsResponse(progress=progress) + yield response + + pipeline_result = self.backend.get_request(request_id) + progress_end.GetCurrentTime() + + if pipeline_result.error is None: + progress = core_pb2.Progress( + state='COMPLETED', + status='Score job COMPLETED', + start=progress_start, + end=progress_end + ) + + response = core_pb2.GetScoreSolutionResultsResponse( + progress=progress, scores=encode_scores(pipeline_result)) + else: + progress = core_pb2.Progress( + state='ERRORED', + status=str(pipeline_result.error), + start=progress_start, + end=progress_end + ) + + response = core_pb2.GetScoreSolutionResultsResponse(progress=progress) + yield response + return + + def FitSolution(self, request, context): + solution_id = request.solution_id + logger.info('method=FitSolution solution_id=%s', solution_id) + + pipeline, problem_description, _ = self.get_solution_problem(solution_id) + if pipeline is None: + logger.info('method=FitSolution, solution_id=%s, status=ERRORED, error=Solution_id not found', solution_id) + response = core_pb2.FitSolutionResponse() + return response + + input_data = [load_data(utils.decode_value(x)) for x in request.inputs] + + expose_outputs = [expose_output for expose_output in request.expose_outputs] + if expose_outputs: + expose_outputs = True + else: + expose_outputs = False + + request_id = self.backend.fit_pipeline_request( + problem_description=problem_description, pipeline=pipeline, + input_data=input_data, expose_outputs=expose_outputs + ) + + response = core_pb2.FitSolutionResponse(request_id=request_id) + return response + + def GetFitSolutionResults(self, request, context): + request_id = request.request_id + logger.info('method=GetFitSolutionResults request_id=%s', request_id) + + progress_start = p_timestamp.Timestamp() + progress_end = p_timestamp.Timestamp() + progress_start.GetCurrentTime() + + progress = core_pb2.Progress(state='RUNNING', status='Running fit job', start=progress_start) + response = core_pb2.GetFitSolutionResultsResponse(progress=progress) + yield response + + pipeline_result = self.backend.get_request(request_id) + progress_end.GetCurrentTime() + + if pipeline_result.error is None: + progress = core_pb2.Progress( + state='COMPLETED', + status='Fit job COMPLETED', + start=progress_start, + end=progress_end + ) + response = core_pb2.GetFitSolutionResultsResponse( + progress=progress, steps=[], exposed_outputs=encode_exposed_values(pipeline_result.exposed_outputs), + fitted_solution_id=pipeline_result.fitted_pipeline_id + ) + else: + progress = core_pb2.Progress( + state='ERRORED', + status=str(pipeline_result.error), + start=progress_start, + end=progress_end + ) + + response = core_pb2.GetFitSolutionResultsResponse(progress=progress) + yield response + return + + def ProduceSolution(self, request, context): + fitted_solution_id = request.fitted_solution_id + logger.info('method=ProduceSolution, fitted_solution_id=%s', fitted_solution_id) + + if not self.backend.fitted_pipeline_id_exists(fitted_solution_id): + logger.info( + 'method=ProduceSolution, fitted_solution_id=%s, status=ERRORED info=No fitted_solution_id found', fitted_solution_id) + response = core_pb2.ProduceSolutionResponse() + return response + + input_data = [load_data(utils.decode_value(x)) for x in request.inputs] + + expose_outputs = [expose_output for expose_output in request.expose_outputs] + if expose_outputs: + expose_outputs = True + else: + expose_outputs = False + + request_id = self.backend.produce_pipeline_request(fitted_pipeline_id=fitted_solution_id, + input_data=input_data, expose_outputs=expose_outputs) + response = core_pb2.ProduceSolutionResponse(request_id=request_id) + return response + + # TODO add expose_outputs to files + def GetProduceSolutionResults(self, request, context): + request_id = request.request_id + logger.info('method=GetProduceSolutionResults, request_id=%s', request_id) + + progress_start = p_timestamp.Timestamp() + progress_end = p_timestamp.Timestamp() + progress_start.GetCurrentTime() + + progress = core_pb2.Progress(state='RUNNING', status='Running produce job', start=progress_start) + response = core_pb2.GetProduceSolutionResultsResponse(progress=progress) + yield response + + pipeline_result = self.backend.get_request(request_id) + progress_end.GetCurrentTime() + + if pipeline_result.error is None: + progress = core_pb2.Progress( + state='COMPLETED', + status='Produce job COMPLETED', + start=progress_start, + end=progress_end + ) + step_progress = [] + + response = core_pb2.GetProduceSolutionResultsResponse( + progress=progress, steps=step_progress, exposed_outputs=encode_exposed_values(pipeline_result.exposed_outputs)) + else: + progress = core_pb2.Progress( + state='ERRORED', + status=str(pipeline_result.error), + start=progress_start, + end=progress_end + ) + + response = core_pb2.GetProduceSolutionResultsResponse(progress=progress) + yield response + return + + def SolutionExport(self, request, context): + solution_id = request.solution_id + rank = request.rank + + try: + pipeline, _, search_id = self.get_solution_problem(solution_id) + except: + pipeline = None + + if pipeline is None: + logger.info('method=SolutionExport, solution_id=%s, status=ERRORED, error=No solution_id found', solution_id) + else: + logger.info('method=SolutionExport solution_id=%s', solution_id) + save_pipeline(pipeline, SearchPath(search_id).pipelines_ranked, rank=rank) + response = core_pb2.SolutionExportResponse() + return response + + # def SaveSolution(self, request, context): + # solution_id = request.solution_id + # logger.info('method=SaveSolution solution_id=%s', solution_id) + # + # if solution_id not in self.manager.solutions: + # logger.info('method=SaveSolution, solution_id=%s, error=Solution_id not found', solution_id) + # response = core_pb2.SaveSolutionResponse() + # else: + # solution_uri = self.manager.save_solution(solution_id) + # response = core_pb2.SaveSolutionResponse(solution_uri=solution_uri) + # return response + + # def LoadSolution(self, request, context): + # solution_uri = request.solution_uri + # logger.info('method=LoadSolution solution_uri=%s', solution_uri) + # + # if not os.path.exists(solution_uri): + # logger.info('method=LoadSolution, solution_uri=%s, error=solution_uri not found', solution_uri) + # response = core_pb2.LoadSolutionResponse() + # else: + # solution_id = self.manager.load_solution(solution_uri) + # response = core_pb2.LoadSolutionResponse(solution_id=solution_id) + # return response + + # def SaveFittedSolution(self, request, context): + # fitted_solution_id = request.fitted_solution_id + # logger.info('method=SaveFittedSolution, fitted_solution_id=%s', fitted_solution_id) + # + # if fitted_solution_id not in self.manager.fitted_solutions: + # logger.info('method=SaveFittedSolution, fitted_solution_id=%s, status=ERRORED, ' + # 'info=No fitted_solution_id found', fitted_solution_id) + # response = core_pb2.SaveFittedSolutionResponse() + # else: + # fitted_solution_uri = self.manager.save_fitted_solution(fitted_solution_id) + # response = core_pb2.SaveFittedSolutionResponse(fitted_solution_uri=fitted_solution_uri) + # return response + + # def LoadFittedSolution(self, request, context): + # fitted_solution_uri = request.fitted_solution_uri + # logger.info('method=LoadFittedSolution solution_uri=%s', fitted_solution_uri) + # + # if not os.path.exists(fitted_solution_uri): + # logger.info('method=LoadFittedSolution, solution_uri=%s, error=solution_uri not found', fitted_solution_uri) + # response = core_pb2.LoadFittedSolutionResponse() + # else: + # fitted_solution_id = self.manager.load_fitted_solution(fitted_solution_uri) + # response = core_pb2.LoadFittedSolutionResponse(fitted_solution_id=fitted_solution_id) + # return response + + # def ScorePredictions(self, request, context): + # logger.info('method=ScorePredictions') + # predictions = utils.decode_value(request.predictions) + # score_input = utils.decode_value(request.score_input) + # problem = utils.decode_problem_description(request.problem) + # metrics = [utils.decode_performance_metric(_metric) for _metric in request.metric] + # + # scores, score_result = self.manager.score_predictions(predictions, score_input, problem, metrics) + # if score_result.has_error(): + # logger.info('method=ScorePredictions, error={}', score_result.error) + # response = core_pb2.ScorePredictionsResponse() + # else: + # scores = self.encode_scores(scores) + # response = core_pb2.ScorePredictionsResponse(scores=scores) + # return response + + def DataAvailable(self, request, context): + user_agent = request.user_agent + version = request.version + time_bound = request.time_bound + + logger.info('method=DataAvailable, agent={}, version={}, time_bound={}'.format( + user_agent, version, time_bound)) + response = core_pb2.DataAvailableResponse() + return response + + def SplitData(self, request, context): + input_data = [load_data(utils.decode_value(x)) for x in request.inputs] + scoring_configuration = decode_scoring_configuration(request.scoring_configuration) + problem_description = utils.decode_problem_description(request.problem) + data_pipeline = schemas_utils.get_splitting_pipeline(scoring_configuration['method']) + + data_random_seed = 0 + outputs, data_result = runtime_module.prepare_data( + data_pipeline=data_pipeline, problem_description=problem_description, + inputs=input_data, data_params=scoring_configuration, context=Context.TESTING, random_seed=data_random_seed, + volumes_dir=EnvVars.D3MSTATICDIR, scratch_dir=Path.TEMP_STORAGE_ROOT, runtime_environment=None, + ) + + if data_result.has_error(): + logger.info('method=SplitData, error={}', data_result.error) + response = core_pb2.SplitDataResponse() + yield response + return + else: + for i, (train_output, test_output, score_output) in enumerate(zip(*outputs)): + uri_list = [] + for output, tag in ( + (train_output, 'train'), + (test_output, 'test'), + (score_output, 'score'), + ): + path = os.path.join( + Path.TEMP_STORAGE_ROOT, '{}_output_{}'.format(tag, i), 'datasetDoc.json') + uri = get_uri(path) + output.save(uri) + uri_list.append(uri) + # response + response = core_pb2.SplitDataResponse( + train_output=value_pb2.Value(dataset_uri=uri_list[0]), + test_output=value_pb2.Value(dataset_uri=uri_list[1]), + score_output=value_pb2.Value(dataset_uri=uri_list[2]), + ) + yield response + + def ListPrimitives(self, request, context): + logger.info('method=ListPrimitives') + primitives_list = [] + for primitive_info in PRIMITIVES_LIST: + primitives_list.append(primitive_pb2.Primitive(**primitive_info)) + response = core_pb2.ListPrimitivesResponse(primitives=primitives_list) + return response + + def Hello(self, request, context): + logger.info('method=Hello') + user_agent = AGENT + version = core_pb2.DESCRIPTOR.GetOptions().Extensions[core_pb2.protocol_version] + allowed_value_types = ALLOWED_VALUE_TYPES + supported_extensions = SUPPORTED_EXTENSIONS + + response = core_pb2.HelloResponse( + user_agent=user_agent, + version=version, + allowed_value_types=allowed_value_types, + supported_extensions=supported_extensions + ) + return response + + def get_solution_problem(self, solution_id): + describe_search_id = None + for search_id, solution_ids in self.solutions.items(): + if solution_id in solution_ids: + describe_search_id = search_id + break + + if describe_search_id is None: + return None, None, None + + solution_path = os.path.join(SearchPath(describe_search_id).pipelines_scored, '{}.json'.format(solution_id)) + + with d3m_utils.silence(): + pipeline = load_pipeline(solution_path) + + problem_description = self.problem_descriptions[describe_search_id] + return pipeline, problem_description, describe_search_id + + +def encode_exposed_values(exposed_values): + encoded_exposed_values = {} + for name, value in exposed_values.items(): + if '.csv' in value: + encoded_exposed_values[name] = utils.encode_value( + {'type': 'csv_uri', 'value': get_uri(value)}, ALLOWED_VALUE_TYPES, Path.TEMP_STORAGE_ROOT) + elif '.json' in value: + encoded_exposed_values[name] = utils.encode_value( + {'type': 'dataset_uri', 'value': get_uri(value)}, ALLOWED_VALUE_TYPES, Path.TEMP_STORAGE_ROOT) + return encoded_exposed_values + + +def decode_scoring_configuration(scoring_configuration): + """ + Decode a scoring configuration from grpc + + Parameters + ---------- + scoring_configuration: core_pb2.ScoringConfiguration + A grpc ScoringConfiguration message. + + Returns + ------- + configuration: dict + A dictionary with the scoring configuration. + """ + method = scoring_configuration.method + configuration = { + 'method': method, + 'train_score_ratio': str(scoring_configuration.train_test_ratio), + 'stratified': str(scoring_configuration.stratified).lower(), + 'shuffle': str(scoring_configuration.shuffle).lower(), + 'randomSeed': str(scoring_configuration.random_seed), + } + if method == 'K_FOLD': + configuration['number_of_folds'] = str(scoring_configuration.folds) + return configuration + + +def load_data(data): + if data['type'] == 'dataset_uri': + return container.dataset.get_dataset(data['value']) + + +def get_uri(path): + return pathlib.Path(os.path.abspath(path)).as_uri() + + +def encode_scores(pipeline_result): + """ + Encode a dict of scores to a GRPC message + + Parameters + ---------- + pipeline_result + A pipeline_result instance that contains the scores and rank to be encoded. + + Returns + ------- + score_message: GRPC + A GRPC message + """ + ranking = { + 'metric': 'RANK', + 'value': pipeline_result.rank, + 'randomSeed': 0, + 'fold': 0, + } + + all_scores = pipeline_result.scores.append(ranking, ignore_index=True) + + scores = list() + for score in all_scores.to_dict('index').values(): + score['random_seed'] = score['randomSeed'] + try: + score['metric'] = {'metric': score['metric']} + except: + score['metric'] = {'metric': problem_module.PerformanceMetric[score['metric']]} + + scores.append(utils.encode_score(score, ALLOWED_VALUE_TYPES, Path.TEMP_STORAGE_ROOT)) + return scores + + +def encode_scoring_configuration(scoring_configuration): + """ + Decode a scoring configuration from grpc + + Parameters + ---------- + scoring_configuration: dict + A dictionary with the scoring configuration. + + Returns + ------- + scoring_configuration: core_pb2.ScoringConfiguration + A grpc ScoringConfiguration message. + """ + if scoring_configuration is None: + return core_pb2.ScoringConfiguration() + else: + method = scoring_configuration['method'] + folds = scoring_configuration.get('number_of_folds', None) + if folds is not None: + folds = int(folds) + train_test_ratio = scoring_configuration.get('train_score_ratio', None) + if train_test_ratio is not None: + train_test_ratio = float(train_test_ratio) + shuffle = scoring_configuration.get('shuffle', None) + if shuffle is not None: + shuffle = json.loads(shuffle.lower()) + random_seed = scoring_configuration.get('randomSeed', None) + if random_seed is not None: + random_seed = int(random_seed) + stratified = scoring_configuration.get('stratified', None) + if stratified is not None: + stratified = json.loads(stratified.lower()) + return core_pb2.ScoringConfiguration(method=method, folds=folds, train_test_ratio=train_test_ratio, + shuffle=shuffle, random_seed=random_seed, stratified=stratified) + + +class Server: + def __init__(self, arguments): + self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) + self.core = Core() + + core_pb2_grpc.add_CoreServicer_to_server(self.core, self.server) + self.server.add_insecure_port('[::]:45042') + + def start(self): + self.server.start() + + def stop(self): + self.server.stop(0) + + +def configure_parser(parser, *, skip_arguments=()): + parser.add_argument( + '-o', '--output-path', type=str, default=os.path.join(os.getcwd(), "output/"), + help="path where the outputs would be stored" + ) + parser.add_argument( + '-v', '--verbose', type=bool, default=True, + help="Display detailed log" + ) + + +def main(): + ray.init(webui_host='127.0.0.1') + # Creating parser + parser = argparse.ArgumentParser(description="Starts server from command line") + configure_parser(parser) + arguments = parser.parse_args() + + # Setup logger + verbose_format = '%(asctime)s %(levelname)-8s %(processName)-15s [%(filename)s:%(lineno)d] %(message)s' + concise_format = '%(asctime)s %(levelname)-8s %(message)s' + log_format = verbose_format if arguments.verbose else concise_format + logging.basicConfig(format=log_format, + handlers=[logging.StreamHandler(), + logging.FileHandler('{}/d3m.log'.format(Path.TEMP_STORAGE_ROOT), 'w', 'utf-8')], + datefmt='%m/%d %H:%M:%S') + root_logger = logging.getLogger() + root_logger.setLevel(logging.INFO) + warnings.filterwarnings('ignore') + + server = Server(arguments) + + try: + load_time = time.time() + server.start() + with d3m_utils.silence(): + d3m_index.load_all(blocklist=PrimitivesList.BlockList) + print('Wait for loading workers for', len(d3m_index.search())*0.3) + time.sleep(len(d3m_index.search())*0.3) + # time.sleep(5) + logger.info('---------- Waiting for Requests ----------') + while True: + time.sleep(_ONE_DAY_IN_SECONDS) + except KeyboardInterrupt: + logger.info('############ STOPPING SERVICE ############') + server.stop() + + +if __name__ == '__main__': + main() diff --git a/axolotl/axolotl/predefined_pipelines/__init__.py b/axolotl/axolotl/predefined_pipelines/__init__.py new file mode 100644 index 0000000..d6eba82 --- /dev/null +++ b/axolotl/axolotl/predefined_pipelines/__init__.py @@ -0,0 +1,133 @@ +import json +import os +import uuid + +import copy +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import PrimitiveStep +from d3m.container import DataFrame +from d3m import utils as d3m_utils + +from axolotl.predefined_pipelines import preprocessor +from axolotl.utils import pipeline as pipeline_utils, schemas as schemas_utils + +__all__ = ['fetch', 'fetch_from_file'] + + +def fetch(input_data, problem_description, predefined_path=None): + if predefined_path is None: + root = os.path.join(os.path.dirname(__file__), '../..') + predefined_path = os.path.join(root, 'axolotl', 'utils', 'resources', 'default_pipelines.json') + # ToDo should use yield + pipelines = list() + pipelines_from_file = fetch_from_file(problem_description, path=predefined_path) + pipelines_from_preprocessors = _fetch_from_preprocessors(input_data, problem_description) + for candiate in ( + pipelines_from_file, + pipelines_from_preprocessors, + ): + pipelines.extend(candiate) + return pipelines + + +def fetch_from_file(problem_description, path): + # ToDo should use yield + task_type, task_subtype, data_types, semi = _get_task_description(problem_description) + + pipelines = [] + with open(path) as file: + possible_pipelines = json.load(file) + with d3m_utils.silence(): + for task_type_in_file, pipeline_infos in possible_pipelines.items(): + if task_type_in_file == task_type: + for pipeline_info in pipeline_infos: + pipeline = pipeline_utils.load_pipeline(pipeline_info) + pipelines.append(pipeline) + return pipelines + + +def _fetch_from_preprocessors(input_data, problem_description): + task_type, task_subtype, data_types, semi = _get_task_description(problem_description) + primitive_candidates = pipeline_utils.get_primitive_candidates(task_type, data_types, semi) + + mapped_task_type = schemas_utils.get_task_mapping(task_type) + if mapped_task_type != task_type: + primitive_candidates += pipeline_utils.get_primitive_candidates(mapped_task_type, data_types, semi) + + pipelines = [] + for primitive_info in primitive_candidates: + if not check_primitive_dataframe_input(primitive_info): + continue + pps = preprocessor.get_preprocessor( + input_data=input_data, problem=problem_description, treatment=primitive_info[1] + ) + for pp in pps: + pipeline_description = copy.deepcopy(pp.pipeline_description) + pipeline_description.id = str(uuid.uuid4()) + pipeline = _complete_pipeline( + pipeline_description=pipeline_description, + dataframe_step=pp.dataset_to_dataframe_step, + primitive_info=primitive_info, + attributes=pp.attributes, + targets=pp.targets, + resolver=pp.resolver + ) + pipelines.append(pipeline) + return pipelines + + +def check_primitive_dataframe_input(primitive_info): + primitive, _ = primitive_info + primitive_arguments = primitive.metadata.query()['primitive_code']['arguments'] + if 'inputs' in primitive_arguments and primitive_arguments['inputs']['type'] == DataFrame: + return True + else: + return False + + +def get_primitive(name): + primitive = index.get_primitive(name) + return primitive + + +def _complete_pipeline(pipeline_description, dataframe_step, attributes, targets, resolver, primitive_info): + primitive, specific_primitive = primitive_info + construct_prediction = 'd3m.primitives.data_transformation.construct_predictions.Common' + construct_prediction_primitive = get_primitive(construct_prediction) + + _add_primitive_to_pipeline(pipeline_description, primitive, resolver, attributes, targets) + _add_primitive_to_pipeline(pipeline_description, construct_prediction_primitive, resolver, + dataframe_step=dataframe_step) + # Get the last step for the output + last_step_idx = len(pipeline_description.steps) - 1 + output = pipeline_utils.int_to_step(last_step_idx) + + # Adding output step to the pieline + pipeline_description.add_output(name='Predictions from the input dataset', data_reference=output) + return pipeline_description + + +def _add_primitive_to_pipeline(pipeline_description, primitive, resolver, attributes=None, targets=None, + dataframe_step=None): + step_model = PrimitiveStep(primitive=primitive, resolver=resolver) + + if dataframe_step is None: + step_model.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) + step_model.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets) + else: + last_step_idx = len(pipeline_description.steps) - 1 + step_model.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, + data_reference=pipeline_utils.int_to_step(last_step_idx)) + step_model.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference=dataframe_step) + step_model.add_output('produce') + pipeline_description.add_step(step_model) + + +def _get_task_description(problem_description): + task_description = schemas_utils.get_task_description(problem_description['problem']['task_keywords']) + task_type = task_description['task_type'] + task_subtype = task_description['task_subtype'] + data_types = task_description['data_types'] + semi = task_description['semi'] + return task_type, task_subtype, data_types, semi diff --git a/axolotl/axolotl/predefined_pipelines/base_preprocessor.py b/axolotl/axolotl/predefined_pipelines/base_preprocessor.py new file mode 100644 index 0000000..60130b2 --- /dev/null +++ b/axolotl/axolotl/predefined_pipelines/base_preprocessor.py @@ -0,0 +1,278 @@ +import typing + +import abc +from d3m import index +from d3m.metadata.base import Context, ArgumentType +from d3m.metadata.pipeline import Pipeline, Resolver, PrimitiveStep + +from axolotl.utils import pipeline as pipeline_utils + +DEFAULT_OUTPUT = '.' + + +class Preprocessor(abc.ABC): + task: str + treatment: str + expected_data_types: set + unsupported_data_types: set + semi: bool + + def __init__(self, metadata, main_resource, data_types, loaded_primitives, problem=None, start_resource='inputs.0'): + self.metadata = metadata + self.main_resource = main_resource + self.data_types = data_types + self.loaded_primitives = loaded_primitives + self.start_resource = start_resource + self.problem = problem + # Creating pipeline + pipeline_description = Pipeline(context=Context.TESTING) + pipeline_description.add_input(name='inputs') + self.pipeline = pipeline_description + self.d2d_step = None + self.attr_step = None + self.targ_step = None + self._generate_pipeline() + + def __init_subclass__(cls, task: str, treatment: str, expected_data_types: set, **kargs): + cls.task = task + cls.treatment = treatment + cls.expected_data_types = expected_data_types + cls.unsupported_data_types = kargs['unsupported_data_types'] if 'unsupported_data_types' in kargs else None + cls.semi = kargs['semi'] if 'semi' in kargs else False + + @classmethod + def check_task_treatment(cls, task, treatment): + if not cls.task: + return True + if not cls.treatment: + return cls.task == task + return cls.task == task and cls.treatment == treatment + + @classmethod + def check_expected_data_types(cls, data_types): + if not cls.expected_data_types: + return True + return any(data_type in cls.expected_data_types for data_type in data_types) + + @classmethod + def check_unsupported_data_types(cls, data_types): + if not cls.unsupported_data_types: + return True + return not any(data_type in cls.unsupported_data_types for data_type in data_types) + + @property + def pipeline_description(self) -> Pipeline: + return self.pipeline + + @property + def dataset_to_dataframe_step(self) -> typing.Optional[str]: + return self.get_output_str(self.d2d_step) if self.d2d_step else None + + @property + def attributes(self) -> typing.Optional[str]: + return self.get_output_str(self.attr_step) if self.attr_step else None + + @property + def targets(self) -> typing.Optional[str]: + return self.get_output_str(self.targ_step) if self.targ_step else None + + @property + def resolver(self) -> Resolver: + return pipeline_utils.BlackListResolver() + + @abc.abstractmethod + def _generate_pipeline(self): + raise NotImplementedError() + + @property + def gpu_budget(self) -> float: + return 0 + + def get_primitive(self, name): + primitive = index.get_primitive(name) + self.download_static_files(primitive) + return primitive + + def common_boilerplate(self): + """ + This boilerplate provides the basic init pipline that contains denormalize and dataset_to_dataframe. + + Arguments + --------- + include_dataset_to_dataframe: bool + Whether to include dataset_to_dataframe step. + include_simple_profiler: bool + whether or not to include simple profiler + """ + metadata = self.metadata + main_resource_id = self.main_resource + start_resource = self.start_resource + + # if there is more that one resource we denormalize + if len(metadata.get_elements(())) > 1: + start_resource = self.add_denormalize_step(start_resource, main_resource_id) + + # Finally we transfer to a dataframe. + dtd_step = self.add_dataset_to_dataframe_step(start_resource) + + simple_profiler_step = self.add_primitive_to_pipeline( + primitive=self.loaded_primitives['SimpleProfiler'], + attributes=dtd_step, + hyperparameters=[ + ('categorical_max_ratio_distinct_values', ArgumentType.VALUE, 1), + ('categorical_max_absolute_distinct_values', ArgumentType.VALUE, None) + ] + ) + self.set_d2d_step(simple_profiler_step) + + def tabular_common(self, target_at_column_parser=False): + self.common_boilerplate() + + # Simple preprocessor + attributes, targets = self.base(target_at_column_parser=target_at_column_parser) + + # Adding Imputer + imputer = self.add_imputer(attributes=attributes) + + attributes = self.add_simple_text_handler(imputer, targets) + self.set_attribute_step(attributes) + self.set_target_step(targets) + + def base(self, target_at_column_parser=False, exclude_attr_columns=None): + dataset_dataframe_step_pos = self.d2d_step + + # Step 2: ColumnParser + column_parser_step = self.add_column_parser_step(data_reference=dataset_dataframe_step_pos) + + # Step 3: ExtractAttributes + attributes_step = self.add_extract_col_by_semantic_types_step( + column_parser_step, + ['https://metadata.datadrivendiscovery.org/types/Attribute'], + exclude_attr_columns + ) + target_source = column_parser_step if target_at_column_parser else dataset_dataframe_step_pos + + # Step 4: ExtractTargets + targets_step = self.add_extract_col_by_semantic_types_step( + target_source, + ['https://metadata.datadrivendiscovery.org/types/TrueTarget'] + ) + return attributes_step, targets_step + + def add_imputer(self, attributes): + # SklearnImputer + primitive = self.loaded_primitives['Imputer'] + configuration = \ + primitive.metadata.query()['primitive_code']['class_type_arguments']['Hyperparams'].configuration + hyperparameters = [] + if 'return_result' in configuration: + hyperparameters.append( + ('return_result', ArgumentType.VALUE, 'replace') + ) + if 'use_semantic_types' in configuration: + hyperparameters.append( + ('use_semantic_types', ArgumentType.VALUE, True) + ) + hyperparameters.append( + ('error_on_no_input', ArgumentType.VALUE, False) + ) + imputer = self.add_primitive_to_pipeline( + primitive=primitive, + attributes=attributes, + hyperparameters=hyperparameters + ) + return imputer + + def add_extract_col_by_semantic_types_step(self, data_reference, target_semantic_types, exclude_columns=None): + if exclude_columns: + hyperparameters = [ + ('exclude_columns', ArgumentType.VALUE, exclude_columns), + ('semantic_types', ArgumentType.VALUE, target_semantic_types) + ] + else: + hyperparameters = [ + ('semantic_types', ArgumentType.VALUE, target_semantic_types) + ] + step = self.add_primitive_to_pipeline( + primitive=self.loaded_primitives['ExtractColumnsBySemanticTypes'], + attributes=data_reference, + hyperparameters=hyperparameters + ) + return step + + def add_denormalize_step(self, start_resource, data): + denormalize_step = self.add_primitive_to_pipeline( + primitive=self.loaded_primitives['Denormalize'], + attributes=start_resource, + hyperparameters=[ + ('starting_resource', ArgumentType.VALUE, data) + ] + ) + return denormalize_step + + def add_dataset_to_dataframe_step(self, start_resource): + d2d_step = self.add_primitive_to_pipeline( + primitive=self.loaded_primitives['DatasetToDataFrame'], + attributes=start_resource + ) + return d2d_step + + def add_column_parser_step(self, data_reference, to_parse=None): + if to_parse: + hyperparameters = [ + ('parse_semantic_types', ArgumentType.VALUE, to_parse) + ] + else: + hyperparameters = [] + column_parser = self.add_primitive_to_pipeline( + primitive=self.loaded_primitives['ColumnParser'], + attributes=data_reference, + hyperparameters=hyperparameters + ) + return column_parser + + def add_simple_text_handler(self, attributes, targets): + text_encoder = self.add_primitive_to_pipeline( + primitive=self.loaded_primitives['TextEncoder'], + attributes=attributes, + hyperparameters=[ + ('encoder_type', ArgumentType.VALUE, 'tfidf') + ], + targets=targets + ) + return text_encoder + + def download_static_files(self, primitive): + primitive_metadata = primitive.metadata.query() + output = DEFAULT_OUTPUT + redownload = False + index.download_files(primitive_metadata, output, redownload) + + def add_primitive_to_pipeline(self, primitive, attributes, hyperparameters=[], targets=None, + produce_collection=False): + inputs_ref = attributes if isinstance(attributes, str) else self.get_output_str(attributes) + step = PrimitiveStep(primitive=primitive, resolver=self.resolver) + step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=inputs_ref) + for hyperparam in hyperparameters: + name, argument_type, data = hyperparam + step.add_hyperparameter(name=name, argument_type=argument_type, data=data) + if targets: + outputs_ref = targets if isinstance(targets, str) else self.get_output_str(targets) + step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=outputs_ref) + step.add_output('produce') + if produce_collection: + step.add_output('produce_collection') + self.pipeline.add_step(step) + return step + + def get_output_str(self, step): + return pipeline_utils.int_to_step(step.index) + + def set_attribute_step(self, attributes): + self.attr_step = attributes + + def set_target_step(self, targets): + self.targ_step = targets + + def set_d2d_step(self, dataset_2_dataframe): + self.d2d_step = dataset_2_dataframe \ No newline at end of file diff --git a/axolotl/axolotl/predefined_pipelines/preprocessor.py b/axolotl/axolotl/predefined_pipelines/preprocessor.py new file mode 100644 index 0000000..907f5d3 --- /dev/null +++ b/axolotl/axolotl/predefined_pipelines/preprocessor.py @@ -0,0 +1,350 @@ +from d3m import index +from d3m.metadata import base as metadata_base +from d3m.metadata.base import ArgumentType +from d3m.metadata.problem import TaskKeyword + +from axolotl.predefined_pipelines.base_preprocessor import Preprocessor +from axolotl.utils import pipeline as pipeline_utils, schemas as schemas_utils + + +def get_preprocessor(input_data, problem, treatment): + metadata = input_data.metadata + task_description = schemas_utils.get_task_description(problem['problem']['task_keywords']) + task_type = task_description['task_type'] + semi = task_description['semi'] + data_types = task_description['data_types'] + task = pipeline_utils.infer_primitive_family(task_type=task_type, data_types=data_types, is_semi=semi) + main_resource = pipeline_utils.get_tabular_resource_id(dataset=input_data) + + # Loading primitives + primitives = { + 'DatasetToDataFrame': 'd3m.primitives.data_transformation.dataset_to_dataframe.Common', + 'ColumnParser': 'd3m.primitives.data_transformation.column_parser.Common', + 'ExtractColumnsBySemanticTypes': 'd3m.primitives.data_transformation.extract_columns_by_semantic_types.Common', + 'Denormalize': 'd3m.primitives.data_transformation.denormalize.Common', + 'Imputer': 'd3m.primitives.data_cleaning.imputer.SKlearn', + 'SimpleProfiler': 'd3m.primitives.schema_discovery.profiler.Common', + 'TextEncoder': 'd3m.primitives.data_transformation.encoder.DistilTextEncoder', + } + loaded_primitives = dict() + + try: + for primitive_name in primitives.keys(): + loaded_primitives[primitive_name] = index.get_primitive(primitives[primitive_name]) + except Exception as e: + print("Cannot load primitive {}".format(e)) + + candidates = [] + for preprocessor in preprocessors: + if preprocessor.check_task_treatment(task, treatment) \ + and preprocessor.check_expected_data_types(data_types) \ + and preprocessor.check_unsupported_data_types(data_types): + candidates.append(preprocessor(metadata, main_resource, data_types, loaded_primitives, problem)) + if not candidates: + candidates.append(TabularPreprocessor(metadata, main_resource, data_types, loaded_primitives)) + return candidates + + +class TimeSeriesTabularPreprocessor(Preprocessor, task=metadata_base.PrimitiveFamily.TIME_SERIES_CLASSIFICATION.name, + treatment=metadata_base.PrimitiveFamily.CLASSIFICATION.name, + expected_data_types=None, + unsupported_data_types={TaskKeyword.TABULAR, TaskKeyword.RELATIONAL}): + def _generate_pipeline(self): + time_series_featurization_primitive = self.get_primitive( + 'd3m.primitives.feature_extraction.random_projection_timeseries_featurization.DSBOX' + ) + time_series_to_list_primitive = self.get_primitive( + 'd3m.primitives.data_preprocessing.time_series_to_list.DSBOX' + ) + + # denormalize -> dataset_to_df + self.common_boilerplate() + dataset_to_dataframe_step = self.d2d_step + + # timeseries_to_list + timeseries_tolist_step = self.add_primitive_to_pipeline( + primitive=time_series_to_list_primitive, + attributes=dataset_to_dataframe_step, + ) + # timeseries_featurization + timeseries_featurization_step = self.add_primitive_to_pipeline( + primitive=time_series_featurization_primitive, + attributes=timeseries_tolist_step, + ) + # extract_col_by_semantic + attr_step = self.add_extract_col_by_semantic_types_step( + timeseries_featurization_step, + ['https://metadata.datadrivendiscovery.org/types/Attribute'] + ) + # extract_col_by_semantic + targ_step = self.add_extract_col_by_semantic_types_step( + dataset_to_dataframe_step, + ['https://metadata.datadrivendiscovery.org/types/TrueTarget'] + ) + self.set_attribute_step(attr_step) + self.set_target_step(targ_step) + + +class TimeSeriesPreprocessor(Preprocessor, task=metadata_base.PrimitiveFamily.TIME_SERIES_CLASSIFICATION.name, + treatment=metadata_base.PrimitiveFamily.TIME_SERIES_CLASSIFICATION.name, + expected_data_types=None, + unsupported_data_types={TaskKeyword.TABULAR, TaskKeyword.RELATIONAL}): + def _generate_pipeline(self): + time_series_formatter_primitive = self.get_primitive( + 'd3m.primitives.data_preprocessing.data_cleaning.DistilTimeSeriesFormatter' + ) + ts_formatter = self.add_primitive_to_pipeline( + primitive=time_series_formatter_primitive, + attributes=self.start_resource + ) + + dtd_step = self.add_dataset_to_dataframe_step(ts_formatter) + dtd_without_ts_format = self.add_dataset_to_dataframe_step(self.start_resource) + + extract_target_step = self.add_extract_col_by_semantic_types_step( + dtd_without_ts_format, + ['https://metadata.datadrivendiscovery.org/types/TrueTarget'] + ) + target_column_parser_step = self.add_column_parser_step( + extract_target_step, + to_parse=[ + "http://schema.org/Boolean", + "http://schema.org/Integer", + "http://schema.org/Float", + "https://metadata.datadrivendiscovery.org/types/FloatVector" + ] + ) + self.set_d2d_step(dtd_without_ts_format) + self.set_attribute_step(dtd_step) + self.set_target_step(target_column_parser_step) + + +class TimeSeriesForecastingTabularPreprocessor(Preprocessor, + task=metadata_base.PrimitiveFamily.TIME_SERIES_FORECASTING.name, + treatment=metadata_base.PrimitiveFamily.TIME_SERIES_FORECASTING.name, + expected_data_types={TaskKeyword.GROUPED.name}): + # TODO: Pipeline will fail for integer target because simple_profiler profiles it as Categorical data, + # not Float or Integer. + def _generate_pipeline(self): + grouping_compose_primitive = self.get_primitive( + 'd3m.primitives.data_transformation.grouping_field_compose.Common' + ) + + self.common_boilerplate() + + # Do not parse categorical data or GroupingCompose will fail. + column_parser = self.add_column_parser_step( + self.d2d_step, [ + "http://schema.org/DateTime", + "http://schema.org/Boolean", + "http://schema.org/Integer", + "http://schema.org/Float", + "https://metadata.datadrivendiscovery.org/types/FloatVector" + ] + ) + + attribute_step = self.add_extract_col_by_semantic_types_step( + column_parser, ['https://metadata.datadrivendiscovery.org/types/Attribute'] + ) + + grouping = self.add_primitive_to_pipeline( + primitive=grouping_compose_primitive, + attributes=attribute_step + ) + + target_step = self.add_extract_col_by_semantic_types_step(column_parser, [ + 'https://metadata.datadrivendiscovery.org/types/TrueTarget' + ]) + self.set_attribute_step(grouping) + self.set_target_step(target_step) + + +class AudioPreprocessor(Preprocessor, task=metadata_base.PrimitiveFamily.DIGITAL_SIGNAL_PROCESSING.name, + treatment=None, + expected_data_types=None): + + def _generate_pipeline(self): + audio_reader_primitive = self.get_primitive( + 'd3m.primitives.data_preprocessing.audio_reader.DistilAudioDatasetLoader' + ) + audio_feature_extraction_primitive = self.get_primitive( + 'd3m.primitives.feature_extraction.audio_transfer.DistilAudioTransfer' + ) + audio_reader = self.add_primitive_to_pipeline( + primitive=audio_reader_primitive, + attributes=self.start_resource, + produce_collection=True + ) + column_parser = self.add_column_parser_step( + data_reference=audio_reader, + to_parse=[ + 'http://schema.org/Boolean', + 'http://schema.org/Integer', + 'http://schema.org/Float', + 'https://metadata.datadrivendiscovery.org/types/FloatVector' + ] + ) + audio_feature = self.add_primitive_to_pipeline( + primitive=audio_feature_extraction_primitive, + attributes='steps.{}.produce_collection'.format(audio_reader.index), + ) + target_step = self.add_extract_col_by_semantic_types_step( + column_parser, + [ + 'https://metadata.datadrivendiscovery.org/types/TrueTarget', + 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget' + ] + ) + self.set_d2d_step(audio_reader) + self.set_attribute_step(audio_feature) + self.set_target_step(target_step) + + +class ImageDataFramePreprocessor(Preprocessor, task=metadata_base.PrimitiveFamily.DIGITAL_IMAGE_PROCESSING.name, + treatment=None, + expected_data_types={TaskKeyword.IMAGE.name}): + def _generate_pipeline(self): + image_reader_primitive = self.get_primitive('d3m.primitives.data_preprocessing.image_reader.Common') + image_feature_extraction_primitive = self.get_primitive( + 'd3m.primitives.feature_extraction.image_transfer.DistilImageTransfer') + + self.common_boilerplate() + dataset_to_dataframe_step = self.d2d_step + + image_reader = self.add_primitive_to_pipeline( + primitive=image_reader_primitive, + attributes=dataset_to_dataframe_step, + hyperparameters=[('return_result', ArgumentType.VALUE, 'replace')] + ) + column_parser = self.add_column_parser_step( + data_reference=image_reader, + to_parse=[ + 'http://schema.org/Boolean', + 'http://schema.org/Integer', + 'http://schema.org/Float', + 'https://metadata.datadrivendiscovery.org/types/FloatVector' + ] + ) + image_feature_extraction = self.add_primitive_to_pipeline( + primitive=image_feature_extraction_primitive, + attributes=column_parser + ) + target_step = self.add_extract_col_by_semantic_types_step( + data_reference=dataset_to_dataframe_step, + target_semantic_types=['https://metadata.datadrivendiscovery.org/types/TrueTarget'], + ) + self.set_attribute_step(image_feature_extraction) + self.set_target_step(target_step) + + +class ImageTensorPreprocessor(Preprocessor, task=metadata_base.PrimitiveFamily.DIGITAL_IMAGE_PROCESSING.name, + treatment=None, + expected_data_types={TaskKeyword.IMAGE.name}): + def _generate_pipeline(self): + dataframe_to_tensor_primitive = self.get_primitive( + 'd3m.primitives.data_preprocessing.dataframe_to_tensor.DSBOX' + ) + resnet50_featurizer_primitive = self.get_primitive( + 'd3m.primitives.feature_extraction.resnet50_image_feature.DSBOX' + ) + + self.common_boilerplate() + dataset_to_dataframe_step = self.d2d_step + + dataframe_to_tensor = self.add_primitive_to_pipeline( + primitive=dataframe_to_tensor_primitive, + attributes=dataset_to_dataframe_step, + hyperparameters=[('return_result', ArgumentType.VALUE, 'replace')] + ) + resnet50_featurizer = self.add_primitive_to_pipeline( + primitive=resnet50_featurizer_primitive, + attributes=dataframe_to_tensor, + hyperparameters=[('return_result', ArgumentType.VALUE, 'replace')] + ) + target_step = self.add_extract_col_by_semantic_types_step( + dataset_to_dataframe_step, + ['https://metadata.datadrivendiscovery.org/types/TrueTarget'] + ) + self.set_attribute_step(resnet50_featurizer) + self.set_target_step(target_step) + + +class TabularPreprocessor(Preprocessor, task=None, treatment=None, expected_data_types={TaskKeyword.TABULAR.name}): + def _generate_pipeline(self): + return self.tabular_common() + + +class CollaborativeFilteringPreprocessor(Preprocessor, task=metadata_base.PrimitiveFamily.COLLABORATIVE_FILTERING.name, + treatment=None, + expected_data_types=None): + def _generate_pipeline(self): + return self.tabular_common(target_at_column_parser=True) + + +class TextPreprocessor(Preprocessor, task=None, treatment=None, + expected_data_types={TaskKeyword.TEXT}): + def _generate_pipeline(self): + text_reader_primitive = self.get_primitive('d3m.primitives.data_preprocessing.text_reader.Common') + + self.common_boilerplate() + + # Simple preprocessor + attributes, targets = self.base() + + text_reader_step = self.add_primitive_to_pipeline( + primitive=text_reader_primitive, + attributes=attributes, + hyperparameters=[('return_result', ArgumentType.VALUE, 'replace')] + ) + imputer = self.add_imputer(text_reader_step) + attributes = self.add_simple_text_handler(imputer, targets) + self.set_attribute_step(attributes) + self.set_target_step(targets) + + +class TextSent2VecPreprocessor(Preprocessor, task=None, treatment=None, expected_data_types={TaskKeyword.TEXT.name}): + def _generate_pipeline(self): + sent2_vec_primitive =self.get_primitive('d3m.primitives.feature_extraction.nk_sent2vec.Sent2Vec') + + self.common_boilerplate() + + # Simple preprocessor + attributes, targets = self.base() + + sent2vec = self.add_primitive_to_pipeline( + primitive=sent2_vec_primitive, + attributes=attributes, + ) + + imputer = self.add_imputer(sent2vec) + self.set_attribute_step(imputer) + self.set_target_step(targets) + + +class LupiPreprocessor(Preprocessor, task=None, treatment=None, + expected_data_types={TaskKeyword.LUPI.name}): + def _generate_pipeline(self): + self.common_boilerplate() + + privileged_column_indices = [info['column_index'] for info in self.problem['inputs'][0]['privileged_data']] + attributes, targets = self.base(exclude_attr_columns=privileged_column_indices) + + imputer = self.add_imputer(attributes) + self.set_attribute_step(imputer) + self.set_target_step(targets) + + +preprocessors = [ + # TODO DSBOX installation has error + # TimeSeriesTabularPreprocessor, + TimeSeriesPreprocessor, + TimeSeriesForecastingTabularPreprocessor, + AudioPreprocessor, + ImageDataFramePreprocessor, + # TODO DSBOX installation has error + # ImageTensorPreprocessor, + CollaborativeFilteringPreprocessor, + TextSent2VecPreprocessor, + TextPreprocessor, + LupiPreprocessor +] diff --git a/axolotl/axolotl/utils/__init__.py b/axolotl/axolotl/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/axolotl/axolotl/utils/data_problem.py b/axolotl/axolotl/utils/data_problem.py new file mode 100644 index 0000000..96b4f34 --- /dev/null +++ b/axolotl/axolotl/utils/data_problem.py @@ -0,0 +1,340 @@ +import uuid +import numpy +import pandas as pd +from d3m.container import pandas as container_pandas +from d3m.container.dataset import Dataset +from d3m.metadata import base as metadata_base +from d3m.metadata.problem import Problem + +from axolotl.utils.schemas import PROBLEM_DEFINITION + + +def make_unique_columns(data): + """ + Parameters + ---------- + data : pd.DataFrame + A dataframe to fix the column names. + + Returns + ------- + The original dataframe where the columns are strings and has a unique name/ + """ + seen_columns_name = {} + column_names = [] + for column in data.columns: + if column in seen_columns_name: + column_name = str(column) + '_' + str(seen_columns_name[column]) + seen_columns_name[column] += 1 + else: + seen_columns_name[column] = 0 + column_name = str(column) + column_names.append(column_name) + data.columns = column_names + return data + + +def get_dataset(input_data, target_index=-2, index_column=-1, semantic_types=None, parse=False): + """ + A function that has as input a dataframe, and generates a D3M dataset. + + Parameters + ---------- + input_data : pd.DataFrame + The dataframe to be converted to d3m Dataset. + target_index : int + The index of the target, if index is not present, it will be ignored. + index_column : int + The index of the index target, if not provided it will look for d3m index, if not generate one. + semantic_types : Sequence[Sequence[str]] + A list of semantic types to be applied. The sequence must be of the same length of + the dataframe columns. + parse : + A flag to determine if the dataset will contain parsed columns. By default is set to fault + to make it compatible with most of D3M current infrastructure. + + Returns + ------- + A D3M dataset. + """ + data = make_unique_columns(input_data.copy(deep=True)) + if semantic_types is None: + semantic_types = [[] for i in range(len(data.columns))] + for i, _type in enumerate(input_data.dtypes): + if _type == float: + semantic_types[i].append('http://schema.org/Float') + elif _type == int: + semantic_types[i].append('http://schema.org/Integer') + + resources = {} + + if 'd3mIndex' in data.columns: + index_column = list(data.columns).index("d3mIndex") + else: + if index_column == -1: + data.insert(0, 'd3mIndex', range(len(data))) + semantic_types.insert(0, []) + target_index += 1 + index_column = 0 + + data = container_pandas.DataFrame(data) + + # remove this + if not parse: + data = data.astype(str) + metadata = metadata_base.DataMetadata() + + resources['learningData'] = data + + metadata = metadata.update(('learningData',), { + 'structural_type': type(data), + 'semantic_types': [ + 'https://metadata.datadrivendiscovery.org/types/Table', + 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint', + ], + 'dimension': { + 'name': 'rows', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], + 'length': len(data), + }, + }) + + metadata = metadata.update(('learningData', metadata_base.ALL_ELEMENTS), { + 'dimension': { + 'name': 'columns', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], + 'length': len(data.columns), + }, + }) + + for i, column_name in enumerate(data.columns): + if i == index_column: + metadata = metadata.update(('learningData', metadata_base.ALL_ELEMENTS, i), { + 'name': column_name, + 'structural_type': numpy.int64, + 'semantic_types': [ + 'http://schema.org/Integer', + 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', + ], + }) + else: + _structural_type = str + if semantic_types[i]: + _semantic_types = semantic_types[i] + if 'http://schema.org/Float' in _semantic_types: + _structural_type = numpy.float64 + elif 'http://schema.org/Integer' in _semantic_types: + _structural_type = numpy.int64 + else: + _semantic_types = ['https://metadata.datadrivendiscovery.org/types/UnknownType'] + + if not parse: + _structural_type = str + if i == target_index: + _semantic_types += ['https://metadata.datadrivendiscovery.org/types/SuggestedTarget'] + else: + _semantic_types += ['https://metadata.datadrivendiscovery.org/types/Attribute'] + + metadata = metadata.update(('learningData', metadata_base.ALL_ELEMENTS, i), { + 'name': column_name, + 'structural_type': _structural_type, + 'semantic_types': _semantic_types + }) + + dataset_id = str(uuid.uuid4()) + dataset_metadata = { + 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, + 'structural_type': Dataset, + 'id': dataset_id, + 'name': dataset_id, + 'digest': str(uuid.uuid4()), + 'dimension': { + 'name': 'resources', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'], + 'length': len(resources), + }, + } + + metadata = metadata.update((), dataset_metadata) + + dataset = Dataset(resources, metadata) + return dataset + + +def import_dataframe(data_frame, *, index_column=-1, semantic_types=None): + """ + Function that transforms a dataframe into a dataset. + + data_frame : pd.DataFrame + The input dataframe to be converted to d3m Dataset. + index_column : int + The index of the index column. + semantic_types : Sequence[Sequence[str]] + A list of semantic types to be applied. The sequence must be of the same length of + the dataframe columns. + + Returns + ------- + A D3M dataset. + """ + data = get_dataset(input_data=data_frame, index_column=index_column, semantic_types=semantic_types) + return data + + +def import_input_data(x, y=None, *, target_index=None, index_column=-1, semantic_types=None, parse=False): + """ + Function that takes an np.array or a dataframe and convert them to a D3M dataset. + + x : Union[pd.DataFrame, np.array] + Input features or the features with targets if target index is specified. + y : Union[pd.DataFrame, np.array] + input features or the features with targets if target index is specified. + target_index : int + The index of the target, if index is not present, it will be ignored. + index_column : int + The index of the index target, if not provided it will look for d3m index, if not generate one. + semantic_types : Sequence[Sequence[str]] + A list of semantic types to be applied. The sequence must be of the same length of + the dataframe columns. + parse : + A flag to determine if the dataset will contain parsed columns. By default is set to fault + to make it compatible with most of D3M current infrastructure. + + Returns + ------- + A D3M dataset. + """ + + if y is not None and target_index is not None: + print('Ignoring target index, using y as target') + + _target_index = -1 + if y is not None: + _x = pd.DataFrame(x) + _y = pd.DataFrame(y) + input_data = pd.concat((_x, _y), axis=1) + _target_index = len(_x.columns) + elif target_index is not None: + input_data = x + else: + raise ValueError('Targets (y) or target index should be provide') + + if _target_index != -1: + target_index = _target_index + data = get_dataset(input_data=input_data, target_index=target_index, + index_column=index_column, semantic_types=semantic_types, parse=parse) + + return data + + +def generate_problem_description(dataset, task=None, *, task_keywords=None, performance_metrics=None): + """ + A function that simplifies the generation of a problem description. + + Parameters + ---------- + dataset : Dataset + Dataset to be use for pipeline search. + task : str + A string that represent the problem type, currently only supported: ``binary_classification`` and + ``regression``. + task_keywords : List[TaskKeyword] + A list of TaskKeyword. + performance_metrics: List[PerformanceMetric] + A list of PerformanceMetric. + + Returns + ------- + A Problem + """ + dataset_id = dataset.metadata.query(())['id'] + problem_id = dataset_id + '_problem' + schema = 'https://metadata.datadrivendiscovery.org/schemas/v0/problem.json' + version = '4.0.0' + + target_column_index = None + + for i in range(dataset.metadata.query(('learningData', metadata_base.ALL_ELEMENTS,))['dimension']['length']): + if 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget' in \ + dataset.metadata.query(('learningData', metadata_base.ALL_ELEMENTS, i,))['semantic_types']: + target_column_index = i + break + + if target_column_index is None: + raise ValueError('Input dataframe does not contains targets') + + inputs = { + 'dataset_id': dataset_id, + 'targets': [{ + 'column_index': target_column_index, + 'column_name': dataset.metadata.query(('learningData', metadata_base.ALL_ELEMENTS, i,))['name'], + 'resource_id': 'learningData', + 'target_index': 0 + }] + } + + problem = None + if task is None: + if performance_metrics is not None and task_keywords is not None: + problem = { + 'performance_metrics': performance_metrics, + 'task_keywords': task_keywords + } + else: + if task in PROBLEM_DEFINITION: + problem = PROBLEM_DEFINITION[task] + else: + raise ValueError(task + """ task is not supported in default definitions. + You can define your own task by specifying the task_keywords and performance metrics.""") + + problem_description = { + 'id': problem_id, + 'schema': schema, + 'version': version, + 'inputs': [inputs], + 'problem': problem + } + + return Problem(problem_description) + + +def generate_dataset_problem(x, y=None, task=None, *, target_index=None, index_column=-1, + semantic_types=None, parse=False, task_keywords=None, performance_metrics=None): + """ + Function that takes an np.array or a dataframe and convert them to a D3M dataset. + + x : Union[pd.DataFrame, np.array] + Input features or the features with targets if target index is specified. + y : Union[pd.DataFrame, np.array] + input features or the features with targets if target index is specified. + task : str + A string that represent the problem type, currently only supported: ``binary_classification`` and + ``regression``. + target_index : int + The index of the target, if index is not present, it will be ignored. + index_column : int + The index of the index target, if not provided it will look for d3m index, if not generate one. + semantic_types : Sequence[Sequence[str]] + A list of semantic types to be applied. The sequence must be of the same length of + the dataframe columns. + parse : + A flag to determine if the dataset will contain parsed columns. By default is set to fault + to make it compatible with most of D3M current infrastructure. + task_keywords : List[TaskKeyword] + A list of TaskKeyword. + performance_metrics: List[PerformanceMetric] + A list of PerformanceMetric. + + Returns + ------- + dataset : Dataset + A D3M dataset. + problem_description : Problem + A D3M problem. + """ + dataset = import_input_data(x, y=y, target_index=target_index, index_column=index_column, + semantic_types=semantic_types, parse=parse) + problem_description = generate_problem_description(dataset=dataset, task=task, task_keywords=task_keywords, + performance_metrics=performance_metrics) + + return dataset, problem_description diff --git a/axolotl/axolotl/utils/pipeline.py b/axolotl/axolotl/utils/pipeline.py new file mode 100644 index 0000000..5a180dd --- /dev/null +++ b/axolotl/axolotl/utils/pipeline.py @@ -0,0 +1,542 @@ +import os +import pprint +import typing +import uuid +import json + +import matplotlib.pyplot as plt +from matplotlib.pyplot import figure +import networkx as nx +import pandas + +import d3m +from d3m import container +from d3m import utils as d3m_utils +from d3m.container import utils as container_utils +from d3m.metadata import base as metadata_base +from d3m.metadata.pipeline import Pipeline, PlaceholderStep, PrimitiveStep, SubpipelineStep, get_pipeline, Resolver +from d3m.metadata.pipeline_run import PipelineRun +from d3m.metadata import problem as problem_module +from d3m.primitive_interfaces import base +from d3m.container.pandas import DataFrame + + +class PipelineResult: + """ + A class that captures the output of multiple operations around the system. + + Parameters + ---------- + pipeline: Pipeline + The pipeline used for the run (fit/score) + fitted_pipeline_id: str + The id of the fitted pipeline used to produce the result. + + Attributes + ---------- + pipeline: Pipeline + Pipeline used for the run (fit/score) + fitted_pipeline_id: str + The id of the fitted pipeline used to produce the result. + status: str + A string representing the status of the run (PENDING, RUNNING, COMPLETED, ERRORED) + error: typing.Union[Exception, typing.List[Exception]] + An error of list of errors occured during the execution of the pipeline or fitted pipeline. + exposed_outputs: typing.Dict[str, typing.Any] + A dictionary containing the name of te exposed output and the value, this could be a string + of the path of the stored output or the object itself. + output: container.DataFrame + A dataframe of the pipeline output, this could be a string if the output is stored. + pipeline_run + A pipeline run, or the path where is stored. + method_called: str + The method that it was called while generating this result. (fit, produce) + scores: pandas.DataFrame + A dataframe containing the scores of the evaluated pipeline. + rank: float + The rank of the pipeline from 0 to 1, where 0 is the best. + """ + def __init__(self, *, pipeline: Pipeline = None, fitted_pipeline_id: str = None): + self.pipeline = pipeline + self.fitted_pipeline_id: str = fitted_pipeline_id + self.status: str = None + self.error: typing.Union[Exception, typing.List[Exception]] = None + self.exposed_outputs: typing.Dict[str, typing.Any] = None + self.output: container.DataFrame = None + self.pipeline_run = None + self.method_called: str = None + self.scores: pandas.DataFrame = None + self.rank: float = None + + def __str__(self): + string_representation = {} + + for name, value in self.__dict__.items(): + if not name.startswith('__') and not callable(name): + if value is not None: + string_representation[name] = str(value) + + return pprint.pformat(string_representation).replace("\\n", "") + + def __repr__(self): + base_string = 'PipelineResult' + if self.pipeline is not None: + base_string += ' pipeline_id:{}'.format(self.pipeline.id) + + if self.fitted_pipeline_id is not None: + base_string += ' fitted_pipeline_id:{}'.format(self.fitted_pipeline_id) + + return base_string + + +class PrimitivesList: + # root = os.path.dirname(__file__) + # black_list = os.path.join(root, 'axolotl', 'utils', 'resources', 'blacklist.json') + with open(os.path.join(os.path.dirname(__file__), 'resources', 'blocklist.json'), 'r') as file: + BlockList = json.load(file) + + +class BlackListResolver(Resolver): + """ + A resolver to resolve primitives and pipelines. + + It resolves primitives from available primitives on the system, + and resolves pipelines from files in pipeline search paths. + + Attributes + ---------- + strict_resolving : bool + If resolved primitive does not fully match specified primitive reference, raise an exception? + pipeline_search_paths : Sequence[str] + A list of paths to directories with pipelines to resolve from. + Their files should be named ``.json`` or ``.yml``. + + Parameters + ---------- + strict_resolving : bool + If resolved primitive does not fully match specified primitive reference, raise an exception? + pipeline_search_paths : Sequence[str] + A list of paths to directories with pipelines to resolve from. + Their files should be named ``.json`` or ``.yml``. + respect_environment_variable : bool + Use also (colon separated) pipeline search paths from ``PIPELINES_PATH`` environment variable? + """ + + def __init__(self, black_list=PrimitivesList.BlockList, *, strict_resolving: bool = False, strict_digest: bool = False, + pipeline_search_paths: typing.Sequence[str] = None, + respect_environment_variable: bool = True, load_all_primitives: bool = True, + primitives_blocklist: typing.Collection[str] = None) -> None: + super().__init__(strict_resolving=strict_resolving, strict_digest=strict_digest, + pipeline_search_paths=pipeline_search_paths, + respect_environment_variable=respect_environment_variable, + load_all_primitives=load_all_primitives, primitives_blocklist=primitives_blocklist) + self.black_list = black_list + if len(black_list) == 0: + self.black_list = None + + def _get_primitive(self, primitive_description: typing.Dict) -> typing.Optional[typing.Type[base.PrimitiveBase]]: + if not self._primitives_loaded: + self._primitives_loaded = True + + d3m.index.load_all(blacklist=self.black_list) + + return d3m.index.get_primitive_by_id(primitive_description['id']) + + +def load_pipeline(pipeline_file: typing.Union[str, typing.Dict]): + """ + Load pipeline from a pipeline URI + + Parameters + ---------- + pipeline_file: Union[str, dict] + The URI pointing to a json file of pipeline or dict of string that is a pipeline + + Returns + ------- + pipeline: Pipeline + An object of Pipeline + + """ + if isinstance(pipeline_file, dict): + try: + with d3m_utils.silence(): + pipeline = Pipeline.from_json_structure(pipeline_file) + except: + pipeline = None + else: + with d3m_utils.silence(): + pipeline = get_pipeline(pipeline_path=pipeline_file, load_all_primitives=False) + return pipeline + + +def save_pipeline(pipeline, path, *, rank=None): + """ + A function that make a copy of an already scored pipeline to scored directory according with specifications. + + Parameters + ---------- + pipeline : Pipeline + A pipeline to be save into the path + path: str + Path where the pipeline will be stored + rank : float + A float that represents the rank of the pipeline. + """ + + pipeline_path = os.path.join(path, '{}.json'.format(pipeline.id)) + + with open(pipeline_path, 'w') as file: + pipeline.to_json(file, indent=2, sort_keys=True, ensure_ascii=False) + + if rank is not None: + rank_path = os.path.join(path, '{}.rank'.format(pipeline.id)) + with open(rank_path, 'w') as file: + file.write('{rank}'.format(rank=rank)) + + +def save_pipeline_run(pipeline_run, path): + """ + A function that make a copy of an already scored pipeline to scored directory according with specifications. + + Parameters + ---------- + pipeline_run : PipelineRun + A pipeline_run to be save into the path + path: str + Path where the pipeline_run will be stored + + Returns + ------- + pipeline_run_path : str + Path where the pipeline_run is stored. + """ + + if pipeline_run is None: + return + + if isinstance(pipeline_run, list): + first = True + pipeline_run_path = os.path.join(path, '{}.yml'.format(pipeline_run[0].pipeline['id'])) + with d3m_utils.silence(): + with open(pipeline_run_path, 'w') as file: + for run in pipeline_run: + run.to_yaml(file, appending=not first) + first = False + else: + pipeline_run_path = os.path.join(path, '{}.yml'.format(pipeline_run.pipeline['id'])) + with d3m_utils.silence(): + with open(pipeline_run_path, 'w') as file: + pipeline_run.to_yaml(file) + + return pipeline_run_path + + +def save_exposed_values(values, output_id, output_dir): + """ + A function to save the exposed values of a PipelineResult. + + Parameters + ---------- + values : Union[dict[str, container], container] + A container to be stored into the path + output_id : str + An id that identify the values. + output_dir : str + The path where the values are going to be store. + + Returns + ------- + A dict of names and stored paths. + + """ + output_paths = {} + output_path = os.path.join(output_dir, output_id) + unique_id = str(uuid.uuid4()) + + def get_file_path(path): + files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))] + file_path = "" + if 'data.csv' in files: + file_path = os.path.join(path, 'data.csv') + elif 'datasetDoc.json' in files: + file_path = os.path.join(path, 'datasetDoc.json') + return file_path + + if isinstance(values, dict): + for name, value in values.items(): + _output_path = os.path.join(output_path, output_id, unique_id, name) + container_utils.save_container(value, _output_path) + output_paths[name] = get_file_path(_output_path) + else: + _output_path = os.path.join(output_path, output_id, unique_id, 'output') + container_utils.save_container(values, _output_path) + output_paths['output'] = get_file_path(_output_path) + + return output_paths + + +def plot_pipeline(pipeline): + figure(num=None, figsize=(10, 12), dpi=80, facecolor='w', edgecolor='k') + graph, nodes_info = get_pipeline_graph(pipeline) + + the_table = plt.table(cellText=nodes_info, colWidths=[0.05, 0.5], colLabels=['Step', 'Primitive'], loc='right') + the_table.set_fontsize(25) + the_table.scale(2, 1) + pos = nx.kamada_kawai_layout(graph, scale=3) + grafo_labels = nx.get_edge_attributes(graph, 'label') + edges_label = nx.draw_networkx_edge_labels(graph, pos, edge_labels=grafo_labels, font_size=7) + nx.draw(graph, pos=pos, node_size=900, alpha=0.5, font_size=16, edges_label=edges_label, with_labels=True, scale=5) + + +def __get_header(index, step): + if isinstance(step, PrimitiveStep): + header = 'steps.' + str(index) + ' - ' + step.primitive.metadata.query()['python_path'] + elif isinstance(step, PlaceholderStep): + header = 'steps.' + str(index) + ' - ' + 'PlaceHolderStep' + elif isinstance(step, SubpipelineStep): + header = 'steps.' + str(index) + ' - ' + 'SubPipeline' + return header + + +def get_pipeline_graph(pipeline): + graph = nx.DiGraph() + nodes_info = [] + + for i in range(0, len(pipeline.steps)): + nodes_info.append([str(i), pipeline.steps[i].primitive.metadata.query()['python_path']]) + + if isinstance(pipeline.steps[i], PrimitiveStep) or isinstance(pipeline.steps[i], PlaceholderStep): + target = i + graph.add_node(target) + for argument in pipeline.steps[i].arguments.keys(): + data = pipeline.steps[i].arguments[argument]['data'] + if 'input' in data: + source = 'inputs' + else: + index = int(data.split('.')[1]) + source = index + label = argument + '-' + data + graph.add_edge(source, target, label=label) + + for hp in pipeline.steps[i].hyperparams.keys(): + if pipeline.steps[i].hyperparams[hp]['type'] == metadata_base.ArgumentType.PRIMITIVE: + index = pipeline.steps[i].hyperparams[hp]['data'] + source = index + label = 'Step {} hyperparam - {}'.format(i, hp) + graph.add_edge(source, target, label=label) + else: + # TODO add support here for subpipelines + continue + + for i in range(0, len(pipeline.outputs)): + index = int(pipeline.outputs[i]['data'].split('.')[1]) + source = index + label = 'outputs.{}'.format(i) + graph.add_edge(source, 'output', label=label) + + return graph, nodes_info + + +def infer_primitive_family(task_type: str, data_types: typing.Iterable, is_semi: bool = False) -> typing.Optional[str]: + """ + Infer target primitive family by task and data_types + + Parameters + ---------- + task_type: str + The task type + data_types: typing.Iterable + The data types + is_semi: bool + Is semi supervised probelm + + Returns + ------- + str + The primitive family + """ + + #TODO temp solution + if problem_module.TaskKeyword.CLASSIFICATION == task_type and \ + problem_module.TaskKeyword.TIME_SERIES in data_types and \ + problem_module.TaskKeyword.GROUPED in data_types: + return metadata_base.PrimitiveFamily.CLASSIFICATION + if problem_module.TaskKeyword.CLASSIFICATION == task_type and \ + problem_module.TaskKeyword.TIME_SERIES in data_types: + return metadata_base.PrimitiveFamily.TIME_SERIES_CLASSIFICATION.name + if problem_module.TaskKeyword.FORECASTING and problem_module.TaskKeyword.TIME_SERIES in data_types: + return metadata_base.PrimitiveFamily.TIME_SERIES_FORECASTING.name + if problem_module.TaskKeyword.CLASSIFICATION == task_type and is_semi: + return metadata_base.PrimitiveFamily.SEMISUPERVISED_CLASSIFICATION.name + if problem_module.TaskKeyword.IMAGE in data_types: + return metadata_base.PrimitiveFamily.DIGITAL_IMAGE_PROCESSING.name + if problem_module.TaskKeyword.VIDEO in data_types: + return metadata_base.PrimitiveFamily.DIGITAL_SIGNAL_PROCESSING.name + + return task_type + + +def check_black_list(primitive_name: str, extra_block: typing.List=[]) -> bool: + """ + Check if the primitive is in the black list, which is from `LIST.BlACK_LIST` + + Parameters + ---------- + primitive_name: str + The name of the primitive + + Returns + ------- + bool + + """ + banned_terms = PrimitivesList.BlockList + extra_block + for banned_element in banned_terms: + if banned_element in primitive_name: + return True + return False + + +def get_primitive_candidates(task_type: str, data_types: typing.Iterable, semi: bool, + extra_block: typing.List=[]) -> typing.List: + """ + Get a list of primitive candidates related to the task type except those primitives in `BLACK_LIST` + + Parameters + ---------- + task_type: str + The task type + data_types: typing.Iterable + The data types + semi: bool + Is it semi-supervised problem + + Returns + ------- + list + A list of primitives + """ + specific_task = infer_primitive_family(task_type, data_types, semi) + primitives_path = d3m.index.search() + primitives = list() + for primitive_path in primitives_path: + if check_black_list(primitive_path, extra_block): + continue + try: + with d3m_utils.silence(): + primitive = d3m.index.get_primitive(primitive_path) + primitive_family = primitive.metadata.query()['primitive_family'].name + if primitive_family == task_type: + primitives.append((primitive, task_type)) + elif primitive_family == specific_task: + primitives.append((primitive, specific_task)) + # TODO what exception? + except Exception as e: + continue + return primitives + + +def int_to_step(n_step: int) -> str: + """ + Convert the step number to standard str step format + + Parameters + ---------- + n_step: int + + Returns + ------- + str + str format in "steps..produce" + """ + return 'steps.' + str(n_step) + '.produce' + + +def get_primitives(primitives_dict): + """ + A function that loads and returns a dictionary of primitives + + Parameters + ---------- + primitives_dict: dict[str, str] + A dictionary that contains the alias and the primitives to load. + + Returns + ------- + loaded_primitives_dict: dict[str, str] + A dictionary containing the aliases and the loaded primitives. + """ + loaded_primitives_dict = {} + for primitive_name in primitives_dict.keys(): + loaded_primitives_dict[primitive_name] = d3m.index.get_primitive(primitives_dict[primitive_name]) + return loaded_primitives_dict + + +def get_tabular_resource_id(dataset): + """ + A function that retrieves the main resource id + + Parameters + ---------- + dataset: Dataset + A dataset. + + Returns + ------- + resource_id: str + An id of the main resource. + """ + + resource_id = None + for dataset_resource_id in dataset.keys(): + if dataset.metadata.has_semantic_type((dataset_resource_id,), + 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint'): + resource_id = dataset_resource_id + break + + if resource_id is None: + tabular_resource_ids = [dataset_resource_id for dataset_resource_id, dataset_resource in dataset.items() if + isinstance(dataset_resource, container.DataFrame)] + if len(tabular_resource_ids) == 1: + resource_id = tabular_resource_ids[0] + + if resource_id is None: + resource_id = 'learningData' + + return resource_id + + +def query_multiple_terms(metadata, list_queries): + data = metadata.query() + valid_queries = [] + for query in list_queries: + if query in data: + valid_queries.append(query) + data = data[query] + else: + break + if len(valid_queries) == len(list_queries): + return data + + +def filter_primitives_by_dataframe_input(primitive_info): + primitives_dataframe_input = [] + for info in primitive_info: + primitive, task = info + arguments = query_multiple_terms( + primitive.metadata, ['primitive_code', 'class_type_arguments']) + + has_dataframe_arguments = True + for argument, value in arguments.items(): + if argument == 'Params' or argument == 'Hyperparams': + continue + else: + if value != DataFrame: + has_dataframe_arguments = False + break + if has_dataframe_arguments: + primitives_dataframe_input.append(info) + + return primitives_dataframe_input + diff --git a/axolotl/axolotl/utils/resources.py b/axolotl/axolotl/utils/resources.py new file mode 100644 index 0000000..9fba49e --- /dev/null +++ b/axolotl/axolotl/utils/resources.py @@ -0,0 +1,31 @@ +import os +import shutil +import signal +from contextlib import contextmanager + + +class TimeoutException(Exception): + pass + + +@contextmanager +def time_limit(seconds): + def signal_handler(signum, frame): + raise TimeoutException("Timed out!") + signal.signal(signal.SIGALRM, signal_handler) + signal.alarm(seconds) + try: + yield + finally: + signal.alarm(0) + + +def check_directory(dir_name): + dir_name = os.path.abspath(dir_name) + if not os.path.exists(dir_name): + os.makedirs(dir_name) + + +def copy_file(source_path, target_path): + path = os.path.join(target_path, os.path.basename(source_path)) + shutil.copyfile(source_path, path) diff --git a/axolotl/axolotl/utils/resources/blocklist.json b/axolotl/axolotl/utils/resources/blocklist.json new file mode 100644 index 0000000..da6a81e --- /dev/null +++ b/axolotl/axolotl/utils/resources/blocklist.json @@ -0,0 +1,31 @@ +[ + "d3m.primitives.classification.xgboost_dart.Common", + "d3m.primitives.classification.canonical_correlation_forests.UBC", + "d3m.primitives.classification.logistic_regression.UBC", + "d3m.primitives.classification.multilayer_perceptron.UBC", + "d3m.primitives.classification.simple_cnaps.UBC", + "d3m.primitives.clustering.kmeans_clustering.UBC", + "d3m.primitives.dimensionality_reduction.principal_component_analysis.UBC", + "d3m.primitives.feature_extraction.boc.UBC", + "d3m.primitives.feature_extraction.bow.UBC", + "3m.primitives.feature_extraction.googlenet_cnn.UBC", + "d3m.primitives.feature_extraction.convolutional_neural_network.UBC", + "d3m.primitives.schema_discovery.semantic_type.UBC", + "d3m.primitives.regression.linear_regression.UBC", + "d3m.primitives.operator.diagonal_mvn.UBC", + "d3m.primitives.feature_extraction.resnet_cnn.UBC", + "d3m.primitives.feature_extraction.mobilenet_cnn.UBC", + "d3m.primitives.feature_extraction.vggnet_cnn.UBC", + "d3m.primitives.regression.canonical_correlation_forests.UBC", + "d3m.primitives.regression.multilayer_perceptron.UBC", + "d3m.primitives.schema_discovery.semantic_type.UBC", + "d3m.primitives.data_transformation.missing_indicator.DistilMissingIndicator", + "d3m.primitives.data_transformation.graph_to_edge_list.DSBOX", + "d3m.primitives.feature_construction.graph_transformer.GCN", + "d3m.primitives.feature_extraction.huber_pca.Cornell", + "d3m.primitives.natural_language_processing.glda.Fastlv", + "d3m.primitives.feature_construction.corex_continuous.DSBOX", + "d3m.primitives.natural_language_processing.glda.Fastlvm", + "d3m.primitives.classification.xgboost_dart.Common", + "d3m.primitives.classification.global_causal_discovery.ClassifierRPI" +] diff --git a/axolotl/axolotl/utils/resources/default_pipelines.json b/axolotl/axolotl/utils/resources/default_pipelines.json new file mode 100644 index 0000000..e20a499 --- /dev/null +++ b/axolotl/axolotl/utils/resources/default_pipelines.json @@ -0,0 +1,64 @@ +{ + "CLASSIFICATION": [ + {"id": "6a520746-108c-45bf-a6d8-c875b5a9d326","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json","created": "2020-01-16T20:40:25.541426Z","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.8.produce","name": "output predictions"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","version": "0.3.0","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","name": "Extract a DataFrame from a Dataset"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"categorical_max_ratio_distinct_values": {"type": "VALUE","data": 1},"categorical_max_absolute_distinct_values": {"type": "VALUE","data": {"case": "unlimited","value": null}}},"outputs": [{"id": "produce"}],"primitive": {"digest": "8b12a9aececdc5b7a4d5ef47cd04cda75592fd24f49922776b614d4bbeeb97f1","id": "e193afa1-b45e-4d29-918f-5bb1fa3b88a7","name": "Determine missing semantic types for columns automatically","python_path": "d3m.primitives.schema_discovery.profiler.Common","version": "0.2.0"},"type": "PRIMITIVE"},{"type": "PRIMITIVE","primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7","version": "0.5.0","python_path": "d3m.primitives.data_transformation.column_parser.Common","name": "Parses strings into their types"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version": "0.3.0","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name": "Extracts columns by semantic type"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.2.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"semantic_types": {"type": "VALUE","data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}},{"type": "PRIMITIVE","primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version": "0.3.0","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name": "Extracts columns by semantic type"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"semantic_types": {"type": "VALUE","data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}},{"type": "PRIMITIVE","primitive": {"id": "d016df89-de62-3c53-87ed-c06bb6a23cde","version": "2019.6.7","python_path": "d3m.primitives.data_cleaning.imputer.SKlearn","name": "sklearn.impute.SimpleImputer"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.3.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "f32dcb25-4cd0-4bb9-9408-ade1edfa2b53","version": "0.1.0","python_path": "d3m.primitives.feature_selection.skfeature.TAMU","name": "Feature Selection"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.5.produce"},"outputs": {"type": "CONTAINER","data": "steps.4.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "1dd82833-5692-39cb-84fb-2455683075f3","version": "2019.6.7","python_path": "d3m.primitives.classification.random_forest.SKlearn","name": "sklearn.ensemble.forest.RandomForestClassifier"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.6.produce"},"outputs": {"type": "CONTAINER","data": "steps.4.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","version": "0.3.0","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","name": "Construct pipeline predictions output"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.7.produce"},"reference": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}]}]}, + {"id": "a6b468a5-4d03-405e-a707-8e377f9ad1c3","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json","created": "2020-01-16T20:40:25.541426Z","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.8.produce","name": "output predictions"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","version": "0.3.0","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","name": "Extract a DataFrame from a Dataset"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"categorical_max_ratio_distinct_values": {"type": "VALUE","data": 1},"categorical_max_absolute_distinct_values": {"type": "VALUE","data": {"case": "unlimited","value": null}}},"outputs": [{"id": "produce"}],"primitive": {"digest": "8b12a9aececdc5b7a4d5ef47cd04cda75592fd24f49922776b614d4bbeeb97f1","id": "e193afa1-b45e-4d29-918f-5bb1fa3b88a7","name": "Determine missing semantic types for columns automatically","python_path": "d3m.primitives.schema_discovery.profiler.Common","version": "0.2.0"},"type": "PRIMITIVE"},{"type": "PRIMITIVE","primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7","version": "0.5.0","python_path": "d3m.primitives.data_transformation.column_parser.Common","name": "Parses strings into their types"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version": "0.3.0","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name": "Extracts columns by semantic type"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.2.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"semantic_types": {"type": "VALUE","data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}},{"type": "PRIMITIVE","primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version": "0.3.0","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name": "Extracts columns by semantic type"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"semantic_types": {"type": "VALUE","data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}},{"type": "PRIMITIVE","primitive": {"id": "d016df89-de62-3c53-87ed-c06bb6a23cde","version": "2019.6.7","python_path": "d3m.primitives.data_cleaning.imputer.SKlearn","name": "sklearn.impute.SimpleImputer"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.3.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "f32dcb25-4cd0-4bb9-9408-ade1edfa2b53","version": "0.1.0","python_path": "d3m.primitives.feature_selection.skfeature.TAMU","name": "Feature Selection"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.5.produce"},"outputs": {"type": "CONTAINER","data": "steps.4.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "01d2c086-91bf-3ca5-b023-5139cf239c77","version": "2019.6.7","python_path": "d3m.primitives.classification.gradient_boosting.SKlearn","name": "sklearn.ensemble.gradient_boosting.GradientBoostingClassifier"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.6.produce"},"outputs": {"type": "CONTAINER","data": "steps.4.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","version": "0.3.0","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","name": "Construct pipeline predictions output"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.7.produce"},"reference": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}]}]}, + {"id": "ef1c483a-34fc-4398-a6b3-063b33786972","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json","created": "2020-01-16T20:40:25.541426Z","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.8.produce","name": "output predictions"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","version": "0.3.0","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","name": "Extract a DataFrame from a Dataset"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"categorical_max_ratio_distinct_values": {"type": "VALUE","data": 1},"categorical_max_absolute_distinct_values": {"type": "VALUE","data": {"case": "unlimited","value": null}}},"outputs": [{"id": "produce"}],"primitive": {"digest": "8b12a9aececdc5b7a4d5ef47cd04cda75592fd24f49922776b614d4bbeeb97f1","id": "e193afa1-b45e-4d29-918f-5bb1fa3b88a7","name": "Determine missing semantic types for columns automatically","python_path": "d3m.primitives.schema_discovery.profiler.Common","version": "0.2.0"},"type": "PRIMITIVE"},{"type": "PRIMITIVE","primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7","version": "0.5.0","python_path": "d3m.primitives.data_transformation.column_parser.Common","name": "Parses strings into their types"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version": "0.3.0","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name": "Extracts columns by semantic type"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.2.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"semantic_types": {"type": "VALUE","data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}},{"type": "PRIMITIVE","primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version": "0.3.0","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name": "Extracts columns by semantic type"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"semantic_types": {"type": "VALUE","data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}},{"type": "PRIMITIVE","primitive": {"id": "d016df89-de62-3c53-87ed-c06bb6a23cde","version": "2019.6.7","python_path": "d3m.primitives.data_cleaning.imputer.SKlearn","name": "sklearn.impute.SimpleImputer"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.3.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "f32dcb25-4cd0-4bb9-9408-ade1edfa2b53","version": "0.1.0","python_path": "d3m.primitives.feature_selection.skfeature.TAMU","name": "Feature Selection"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.5.produce"},"outputs": {"type": "CONTAINER","data": "steps.4.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "c8a28f02-ef4a-35a8-87f1-cf79980f5c3e","version": "2019.6.7","python_path": "d3m.primitives.classification.extra_trees.SKlearn","name": "sklearn.ensemble.forest.ExtraTreesClassifier"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.6.produce"},"outputs": {"type": "CONTAINER","data": "steps.4.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","version": "0.3.0","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","name": "Construct pipeline predictions output"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.7.produce"},"reference": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}]}]} + ], + "REGRESSION": [ + {"id": "efab70e7-461a-42de-a5d7-9bdd98cc05d8","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json","created": "2020-01-16T20:40:25.541426Z","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.8.produce","name": "output predictions"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","version": "0.3.0","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","name": "Extract a DataFrame from a Dataset"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"categorical_max_ratio_distinct_values": {"type": "VALUE","data": 1},"categorical_max_absolute_distinct_values": {"type": "VALUE","data": {"case": "unlimited","value": null}}},"outputs": [{"id": "produce"}],"primitive": {"digest": "8b12a9aececdc5b7a4d5ef47cd04cda75592fd24f49922776b614d4bbeeb97f1","id": "e193afa1-b45e-4d29-918f-5bb1fa3b88a7","name": "Determine missing semantic types for columns automatically","python_path": "d3m.primitives.schema_discovery.profiler.Common","version": "0.2.0"},"type": "PRIMITIVE"},{"type": "PRIMITIVE","primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7","version": "0.5.0","python_path": "d3m.primitives.data_transformation.column_parser.Common","name": "Parses strings into their types"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version": "0.3.0","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name": "Extracts columns by semantic type"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.2.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"semantic_types": {"type": "VALUE","data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}},{"type": "PRIMITIVE","primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version": "0.3.0","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name": "Extracts columns by semantic type"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"semantic_types": {"type": "VALUE","data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}},{"type": "PRIMITIVE","primitive": {"id": "d016df89-de62-3c53-87ed-c06bb6a23cde","version": "2019.6.7","python_path": "d3m.primitives.data_cleaning.imputer.SKlearn","name": "sklearn.impute.SimpleImputer"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.3.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "f32dcb25-4cd0-4bb9-9408-ade1edfa2b53","version": "0.1.0","python_path": "d3m.primitives.feature_selection.skfeature.TAMU","name": "Feature Selection"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.5.produce"},"outputs": {"type": "CONTAINER","data": "steps.4.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "f0fd7a62-09b5-3abc-93bb-f5f999f7cc80","version": "2019.6.7","python_path": "d3m.primitives.regression.random_forest.SKlearn","name": "sklearn.ensemble.forest.RandomForestRegressor"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.6.produce"},"outputs": {"type": "CONTAINER","data": "steps.4.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","version": "0.3.0","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","name": "Construct pipeline predictions output"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.7.produce"},"reference": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}]}]}, + {"id": "a6b468a5-4d03-405e-a707-8e377f9ad1c3","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json","created": "2020-01-16T20:40:25.541426Z","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.8.produce","name": "output predictions"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","version": "0.3.0","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","name": "Extract a DataFrame from a Dataset"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"categorical_max_ratio_distinct_values": {"type": "VALUE","data": 1},"categorical_max_absolute_distinct_values": {"type": "VALUE","data": {"case": "unlimited","value": null}}},"outputs": [{"id": "produce"}],"primitive": {"digest": "8b12a9aececdc5b7a4d5ef47cd04cda75592fd24f49922776b614d4bbeeb97f1","id": "e193afa1-b45e-4d29-918f-5bb1fa3b88a7","name": "Determine missing semantic types for columns automatically","python_path": "d3m.primitives.schema_discovery.profiler.Common","version": "0.2.0"},"type": "PRIMITIVE"},{"type": "PRIMITIVE","primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7","version": "0.5.0","python_path": "d3m.primitives.data_transformation.column_parser.Common","name": "Parses strings into their types"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version": "0.3.0","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name": "Extracts columns by semantic type"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.2.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"semantic_types": {"type": "VALUE","data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}},{"type": "PRIMITIVE","primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version": "0.3.0","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name": "Extracts columns by semantic type"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"semantic_types": {"type": "VALUE","data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}},{"type": "PRIMITIVE","primitive": {"id": "d016df89-de62-3c53-87ed-c06bb6a23cde","version": "2019.6.7","python_path": "d3m.primitives.data_cleaning.imputer.SKlearn","name": "sklearn.impute.SimpleImputer"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.3.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "f32dcb25-4cd0-4bb9-9408-ade1edfa2b53","version": "0.1.0","python_path": "d3m.primitives.feature_selection.skfeature.TAMU","name": "Feature Selection"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.5.produce"},"outputs": {"type": "CONTAINER","data": "steps.4.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "2a031907-6b2c-3390-b365-921f89c8816a","version": "2019.6.7","python_path": "d3m.primitives.regression.gradient_boosting.SKlearn","name": "sklearn.ensemble.gradient_boosting.GradientBoostingRegressor"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.6.produce"},"outputs": {"type": "CONTAINER","data": "steps.4.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","version": "0.3.0","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","name": "Construct pipeline predictions output"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.7.produce"},"reference": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}]}]}, + {"id": "a6b468a5-4d03-405e-a707-8e377f9ad1c3","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json","created": "2020-01-16T20:40:25.541426Z","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.8.produce","name": "output predictions"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","version": "0.3.0","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","name": "Extract a DataFrame from a Dataset"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"categorical_max_ratio_distinct_values": {"type": "VALUE","data": 1},"categorical_max_absolute_distinct_values": {"type": "VALUE","data": {"case": "unlimited","value": null}}},"outputs": [{"id": "produce"}],"primitive": {"digest": "8b12a9aececdc5b7a4d5ef47cd04cda75592fd24f49922776b614d4bbeeb97f1","id": "e193afa1-b45e-4d29-918f-5bb1fa3b88a7","name": "Determine missing semantic types for columns automatically","python_path": "d3m.primitives.schema_discovery.profiler.Common","version": "0.2.0"},"type": "PRIMITIVE"},{"type": "PRIMITIVE","primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7","version": "0.5.0","python_path": "d3m.primitives.data_transformation.column_parser.Common","name": "Parses strings into their types"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version": "0.3.0","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name": "Extracts columns by semantic type"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.2.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"semantic_types": {"type": "VALUE","data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}},{"type": "PRIMITIVE","primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version": "0.3.0","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name": "Extracts columns by semantic type"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"semantic_types": {"type": "VALUE","data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}},{"type": "PRIMITIVE","primitive": {"id": "d016df89-de62-3c53-87ed-c06bb6a23cde","version": "2019.6.7","python_path": "d3m.primitives.data_cleaning.imputer.SKlearn","name": "sklearn.impute.SimpleImputer"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.3.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "f32dcb25-4cd0-4bb9-9408-ade1edfa2b53","version": "0.1.0","python_path": "d3m.primitives.feature_selection.skfeature.TAMU","name": "Feature Selection"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.5.produce"},"outputs": {"type": "CONTAINER","data": "steps.4.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "35321059-2a1a-31fd-9509-5494efc751c7","version": "2019.6.7","python_path": "d3m.primitives.regression.extra_trees.SKlearn","name": "sklearn.ensemble.forest.ExtraTreesRegressor"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.6.produce"},"outputs": {"type": "CONTAINER","data": "steps.4.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","version": "0.3.0","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","name": "Construct pipeline predictions output"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.7.produce"},"reference": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}]}]} + ], + "CLUSTERING": [], + "LINK_PREDICTION": [ + {"id": "ddc6c7e9-64b4-4f9c-af07-5f27461cb940","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z", "inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.3.produce","name": "Predictions"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "cb192a83-63e2-4075-bab9-e6ba1a8365b6","version": "0.1.0","python_path": "d3m.primitives.data_transformation.load_graphs.JHU","name": "Extract a list of Graphs from a Dataset"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "32fec24f-6861-4a4c-88f3-d4ec2bc1b486","version": "0.1.0","python_path": "d3m.primitives.data_preprocessing.largest_connected_component.JHU","name": "jhu.lcc"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.0.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "b940ccbd-9e9b-3166-af50-210bfd79251b","version": "0.1.0","python_path": "d3m.primitives.data_transformation.adjacency_spectral_embedding.JHU","name": "jhu.ase"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"max_dimension": {"type": "VALUE","data": 5},"use_attributes": {"type": "VALUE","data": true}}},{"type": "PRIMITIVE","primitive": {"id": "c9d5da5d-0520-468e-92df-bd3a85bb4fac","version": "0.1.0","python_path": "d3m.primitives.classification.gaussian_classification.JHU","name": "jhu.gclass"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.2.produce"}},"outputs": [{"id": "produce"}]}]}, + {"id": "12a4b6a8-b2e4-4604-afe5-8196bf55a925","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z", "inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.3.produce","name": "Predictions"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "cb192a83-63e2-4075-bab9-e6ba1a8365b6","version": "0.1.0","python_path": "d3m.primitives.data_transformation.load_graphs.JHU","name": "Extract a list of Graphs from a Dataset"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "32fec24f-6861-4a4c-88f3-d4ec2bc1b486","version": "0.1.0","python_path": "d3m.primitives.data_preprocessing.largest_connected_component.JHU","name": "jhu.lcc"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.0.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "b940ccbd-9e9b-3166-af50-210bfd79251b","version": "0.1.0","python_path": "d3m.primitives.data_transformation.adjacency_spectral_embedding.JHU","name": "jhu.ase"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"max_dimension": {"type": "VALUE","data": 5},"use_attributes": {"type": "VALUE","data": true}}},{"type": "PRIMITIVE","primitive": {"id": "5194ef94-3683-319a-9d8d-5c3fdd09de24","version": "0.1.0","python_path": "d3m.primitives.graph_clustering.gaussian_clustering.JHU","name": "jhu.gclust"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.2.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"max_clusters": {"type": "VALUE","data": 10}}}]}, + {"id": "6216f2bd-2f23-4dbf-92d0-f3b40aeac150","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z", "inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.2.produce","name": "Predictions"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "09f2eea8-667c-44b8-a955-6a153ba9ccc3","version": "0.1.0","python_path": "d3m.primitives.link_prediction.data_conversion.JHU","name": "jhu.link_pred_graph_reader"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "b940ccbd-9e9b-3166-af50-210bfd79251b","version": "0.1.0","python_path": "d3m.primitives.data_transformation.adjacency_spectral_embedding.JHU","name": "jhu.ase"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.0.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"which_elbow": {"type": "VALUE","data": 1},"max_dimension": {"type": "VALUE","data": 2},"use_attributes": {"type": "VALUE","data": false}}},{"type": "PRIMITIVE","primitive": {"id": "25e97696-b96f-4f5c-8620-b340fe83414d","version": "0.1.0","python_path": "d3m.primitives.link_prediction.rank_classification.JHU","name": "jhu.link_pred_rc"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}]}]}, + {"id": "0f5d0c4a-2c7f-4a9b-9441-80449c460993","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z", "inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.1.produce","name": "output"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "79012210-2463-4f94-9da6-11bdc5a7e6c4","version": "0.1.2","python_path": "d3m.primitives.data_transformation.load_single_graph.DistilSingleGraphLoader","name": "Load single graph and dataframe into a parseable object"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"},{"id": "produce_target"}]},{"type": "PRIMITIVE","primitive": {"id": "fc138210-c317-4528-81ae-5eed3a1a0267","version": "0.1.1","python_path": "d3m.primitives.link_prediction.link_prediction.DistilLinkPrediction","name": "LinkPrediction"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.0.produce"},"outputs": {"type": "CONTAINER","data": "steps.0.produce_target"}},"outputs": [{"id": "produce"}],"hyperparams": {"metric": {"type": "VALUE","data": "accuracy"}}}]} + + ], + "VERTEX_NOMINATION": [], + "COMMUNITY_DETECTION": [ + {"id": "bfe17a08-bc94-4f6d-8be1-4758e899a6c6","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z", "inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.1.produce","name": "output"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "79012210-2463-4f94-9da6-11bdc5a7e6c4","version": "0.1.2","python_path": "d3m.primitives.data_transformation.load_single_graph.DistilSingleGraphLoader","name": "Load single graph and dataframe into a parseable object"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"},{"id": "produce_target"}]},{"type": "PRIMITIVE","primitive": {"id": "064cec55-39dd-45b7-a663-50d3e17e0c42","version": "0.1.1","python_path": "d3m.primitives.community_detection.community_detection.DistilCommunityDetection","name": "CommunityDetection"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.0.produce"},"outputs": {"type": "CONTAINER","data": "steps.0.produce_target"}},"outputs": [{"id": "produce"}],"hyperparams": {"metric": {"type": "VALUE","data": "normalizedMutualInformation"}}}]}, + {"id": "0f6cafc4-5628-47bc-bbf5-8cab3a7c0e95","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z", "inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.3.produce","name": "Predictions"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "cb192a83-63e2-4075-bab9-e6ba1a8365b6","version": "0.1.0","python_path": "d3m.primitives.data_transformation.load_graphs.JHU","name": "Extract a list of Graphs from a Dataset"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "32fec24f-6861-4a4c-88f3-d4ec2bc1b486","version": "0.1.0","python_path": "d3m.primitives.data_preprocessing.largest_connected_component.JHU","name": "jhu.lcc"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.0.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "8fa6178b-84f7-37d8-87e8-4d3a44c86569","version": "0.1.0","python_path": "d3m.primitives.data_transformation.laplacian_spectral_embedding.JHU","name": "jhu.lse"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"max_dimension": {"type": "VALUE","data": 5},"use_attributes": {"type": "VALUE","data": true}}},{"type": "PRIMITIVE","primitive": {"id": "5194ef94-3683-319a-9d8d-5c3fdd09de24","version": "0.1.0","python_path": "d3m.primitives.graph_clustering.gaussian_clustering.JHU","name": "jhu.gclust"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.2.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"max_clusters": {"type": "VALUE","data": 10}}}]}, + {"id": "ffc49730-eb73-423c-ab6c-acb47300fcfc","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z", "inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.3.produce","name": "Predictions"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "cb192a83-63e2-4075-bab9-e6ba1a8365b6","version": "0.1.0","python_path": "d3m.primitives.data_transformation.load_graphs.JHU","name": "Extract a list of Graphs from a Dataset"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "32fec24f-6861-4a4c-88f3-d4ec2bc1b486","version": "0.1.0","python_path": "d3m.primitives.data_preprocessing.largest_connected_component.JHU","name": "jhu.lcc"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.0.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "8fa6178b-84f7-37d8-87e8-4d3a44c86569","version": "0.1.0","python_path": "d3m.primitives.data_transformation.laplacian_spectral_embedding.JHU","name": "jhu.lse"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"max_dimension": {"type": "VALUE","data": 5},"use_attributes": {"type": "VALUE","data": true}}},{"type": "PRIMITIVE","primitive": {"id": "c9d5da5d-0520-468e-92df-bd3a85bb4fac","version": "0.1.0","python_path": "d3m.primitives.classification.gaussian_classification.JHU","name": "jhu.gclass"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.2.produce"}},"outputs": [{"id": "produce"}]}]} + ], + "GRAPH_MATCHING": [ + {"id": "b5dd2766-da63-4526-a29b-e6322c1f9cc8","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z", "inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.0.produce","name": "Predictions"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "ff22e721-e4f5-32c9-ab51-b90f32603a56","version": "0.1.0","python_path": "d3m.primitives.graph_matching.seeded_graph_matching.JHU","name": "jhu.sgm"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]}]}, + {"id": "2bf14cda-1edd-4abd-a499-422913c075e6","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z", "inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.1.produce","name": "output"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "ae0797506-ea7b-4a7f-a7e4-2f91e2082f05","version": "0.1.2","python_path": "d3m.primitives.data_transformation.load_graphs.DistilGraphLoader","name": "Load graphs into a parseable object"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"},{"id": "produce_target"}]},{"type": "PRIMITIVE","primitive": {"id": "8baea8e6-9d3a-46d7-acf1-04fd593dcd37","version": "0.2.0","python_path": "d3m.primitives.graph_matching.seeded_graph_matching.DistilSeededGraphMatcher","name": "SeededGraphMatcher"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.0.produce"},"outputs": {"type": "CONTAINER","data": "steps.0.produce_target"}},"outputs": [{"id": "produce"}],"hyperparams": {"metric": {"type": "VALUE","data": "accuracy"}}}]} + ], + "COLLABORATIVE_FILTERING": [ + {"id": "8c3a2db6-4449-4a7a-9830-1b9cf2b993d6","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z", "inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.6.produce","name": "output"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","version": "0.3.0","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","name": "Extract a DataFrame from a Dataset"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "e193afa1-b45e-4d29-918f-5bb1fa3b88a7","version": "0.2.0","python_path": "d3m.primitives.schema_discovery.profiler.Common","name": "Determine missing semantic types for columns automatically"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.0.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7","version": "0.6.0","python_path": "d3m.primitives.data_transformation.column_parser.Common","name": "Parses strings into their types"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"parse_semantic_types": {"type": "VALUE","data": ["http://schema.org/Boolean","http://schema.org/Integer","http://schema.org/Float","https://metadata.datadrivendiscovery.org/types/FloatVector"]}}},{"type": "PRIMITIVE","primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version": "0.3.0","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name": "Extracts columns by semantic type"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.2.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"semantic_types": {"type": "VALUE","data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}},{"type": "PRIMITIVE","primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version": "0.3.0","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name": "Extracts columns by semantic type"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.2.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"semantic_types": {"type": "VALUE","data": ["https://metadata.datadrivendiscovery.org/types/Target","https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}},{"type": "PRIMITIVE","primitive": {"id": "a242314d-7955-483f-aed6-c74cd2b880df","version": "0.1.4","python_path": "d3m.primitives.collaborative_filtering.collaborative_filtering_link_prediction.DistilCollaborativeFiltering","name": "Collaborative filtering"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.3.produce"},"outputs": {"type": "CONTAINER","data": "steps.4.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","version": "0.3.0","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","name": "Construct pipeline predictions output"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.5.produce"},"reference": {"type": "CONTAINER","data": "steps.2.produce"}},"outputs": [{"id": "produce"}]}]}, + {"id": "15cea2f3-9eef-4a37-8f04-eea2e30f8d68","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.9.produce","name": "output"}],"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z", "steps": [{"arguments": {"inputs": {"data": "inputs.0","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","name": "Extract a DataFrame from a Dataset","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"columns": {"data": [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "81d7e261-e25b-4721-b091-a31cd46e99ae","name": "Extracts columns","python_path": "d3m.primitives.data_transformation.extract_columns.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.1.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "196152a7-a873-4676-bbde-95627f4b5306","name": "Preprocessing for categorical columns","python_path": "d3m.primitives.column_parser.preprocess_categorical_columns.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.2.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "d639947e-ece0-3a39-a666-e974acf4521d","name": "sklearn.preprocessing.data.StandardScaler","python_path": "d3m.primitives.data_preprocessing.standard_scaler.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"semantic_types": {"data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","name": "Extracts columns by semantic type","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.4.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "26fc8fd3-f6b2-4c65-8afb-edb54ed2a3e4","name": "Label encoder with an unseen category","python_path": "d3m.primitives.data_preprocessing.label_encoder.Common","version": "0.2.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.3.produce","type": "CONTAINER"},"outputs": {"data": "steps.3.produce","type": "CONTAINER"}},"hyperparams": {"alpha": {"data": 0.01,"type": "VALUE"},"beta": {"data": 0.01,"type": "VALUE"},"d": {"data": 50,"type": "VALUE"},"maxiter": {"data": 500,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "e6ee30fa-af68-4bfe-9234-5ca7e7ac8e93","name": "Matrix Completion via Sparse Factorization","python_path": "d3m.primitives.collaborative_filtering.high_rank_imputer.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.6.produce","type": "CONTAINER"},"outputs": {"data": "steps.5.produce","type": "CONTAINER"}},"hyperparams": {"n_estimators": {"data": 50,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "1b2a32a6-0ec5-3ca0-9386-b8b1f1b831d1","name": "sklearn.ensemble.bagging.BaggingClassifier","python_path": "d3m.primitives.classification.bagging.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.7.produce","type": "CONTAINER"}},"hyperparams": {"encoder": {"data": 5,"type": "PRIMITIVE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "39ae30f7-39ed-40af-8679-5cf108499605","name": "Label decoder for UnseenLabelEncoderPrimitive","python_path": "d3m.primitives.data_preprocessing.label_decoder.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.8.produce","type": "CONTAINER"},"reference": {"data": "steps.0.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","name": "Construct pipeline predictions output","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","version": "0.3.0"},"type": "PRIMITIVE"}]}, + {"id": "164f4dfe-fcca-4769-aa10-d0d9f2a72cb3","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.9.produce","name": "output"}],"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z", "steps": [{"arguments": {"inputs": {"data": "inputs.0","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","name": "Extract a DataFrame from a Dataset","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"columns": {"data": [2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "81d7e261-e25b-4721-b091-a31cd46e99ae","name": "Extracts columns","python_path": "d3m.primitives.data_transformation.extract_columns.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.1.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "196152a7-a873-4676-bbde-95627f4b5306","name": "Preprocessing for categorical columns","python_path": "d3m.primitives.column_parser.preprocess_categorical_columns.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.2.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "d639947e-ece0-3a39-a666-e974acf4521d","name": "sklearn.preprocessing.data.StandardScaler","python_path": "d3m.primitives.data_preprocessing.standard_scaler.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.3.produce","type": "CONTAINER"},"outputs": {"data": "steps.3.produce","type": "CONTAINER"}},"hyperparams": {"alpha": {"data": 0.1,"type": "VALUE"},"beta": {"data": 0.01,"type": "VALUE"},"d": {"data": 20,"type": "VALUE"},"maxiter": {"data": 1000,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "e6ee30fa-af68-4bfe-9234-5ca7e7ac8e93","name": "Matrix Completion via Sparse Factorization","python_path": "d3m.primitives.collaborative_filtering.high_rank_imputer.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"semantic_types": {"data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","name": "Extracts columns by semantic type","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.5.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "26fc8fd3-f6b2-4c65-8afb-edb54ed2a3e4","name": "Label encoder with an unseen category","python_path": "d3m.primitives.data_preprocessing.label_encoder.Common","version": "0.2.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.4.produce","type": "CONTAINER"},"outputs": {"data": "steps.6.produce","type": "CONTAINER"}},"hyperparams": {"C": {"data": 100,"type": "VALUE"},"kernel": {"data": {"choice": "rbf","gamma": {"case": "float","value": 0.02}},"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "0ae7d42d-f765-3348-a28c-57d94880aa6a","name": "sklearn.svm.classes.SVC","python_path": "d3m.primitives.classification.svc.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.7.produce","type": "CONTAINER"}},"hyperparams": {"encoder": {"data": 6,"type": "PRIMITIVE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "39ae30f7-39ed-40af-8679-5cf108499605","name": "Label decoder for UnseenLabelEncoderPrimitive","python_path": "d3m.primitives.data_preprocessing.label_decoder.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.8.produce","type": "CONTAINER"},"reference": {"data": "steps.0.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","name": "Construct pipeline predictions output","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","version": "0.3.0"},"type": "PRIMITIVE"}]}, + {"id": "9ea39abe-b164-4eff-918e-c364ce87d167","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.6.produce","name": "output predictions"}],"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z", "steps": [{"arguments": {"inputs": {"data": "inputs.0","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","name": "Extract a DataFrame from a Dataset","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"columns": {"data": [1,2],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "81d7e261-e25b-4721-b091-a31cd46e99ae","name": "Extracts columns","python_path": "d3m.primitives.data_transformation.extract_columns.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.1.produce","type": "CONTAINER"}},"hyperparams": {"convert": {"data": true,"type": "VALUE"},"to_type": {"data": {"encoding": "pickle","value": "gANjYnVpbHRpbnMKaW50CnEALg=="},"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "196152a7-a873-4676-bbde-95627f4b5306","name": "Preprocessing for categorical columns","python_path": "d3m.primitives.column_parser.preprocess_categorical_columns.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"columns": {"data": [3],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "81d7e261-e25b-4721-b091-a31cd46e99ae","name": "Extracts columns","python_path": "d3m.primitives.data_transformation.extract_columns.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.3.produce","type": "CONTAINER"}},"hyperparams": {"convert": {"data": false,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "196152a7-a873-4676-bbde-95627f4b5306","name": "Preprocessing for categorical columns","python_path": "d3m.primitives.column_parser.preprocess_categorical_columns.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.2.produce","type": "CONTAINER"},"outputs": {"data": "steps.4.produce","type": "CONTAINER"}},"hyperparams": {"alpha": {"data": 1,"type": "VALUE"},"beta": {"data": 1,"type": "VALUE"},"d": {"data": 100,"type": "VALUE"},"maxiter": {"data": 1000,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "e6ee30fa-af68-4bfe-9234-5ca7e7ac8e93","name": "Matrix Completion via Sparse Factorization","python_path": "d3m.primitives.collaborative_filtering.high_rank_imputer.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.5.produce","type": "CONTAINER"},"reference": {"data": "steps.0.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","name": "Construct pipeline predictions output","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","version": "0.3.0"},"type": "PRIMITIVE"}]}, + {"id": "c4019fda-d205-4f89-9acf-5741e45e601a","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.9.produce","name": "output"}],"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z", "steps": [{"arguments": {"inputs": {"data": "inputs.0","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","name": "Extract a DataFrame from a Dataset","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"columns": {"data": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "81d7e261-e25b-4721-b091-a31cd46e99ae","name": "Extracts columns","python_path": "d3m.primitives.data_transformation.extract_columns.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.1.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "196152a7-a873-4676-bbde-95627f4b5306","name": "Preprocessing for categorical columns","python_path": "d3m.primitives.column_parser.preprocess_categorical_columns.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.2.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "d639947e-ece0-3a39-a666-e974acf4521d","name": "sklearn.preprocessing.data.StandardScaler","python_path": "d3m.primitives.data_preprocessing.standard_scaler.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"semantic_types": {"data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","name": "Extracts columns by semantic type","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.4.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "26fc8fd3-f6b2-4c65-8afb-edb54ed2a3e4","name": "Label encoder with an unseen category","python_path": "d3m.primitives.data_preprocessing.label_encoder.Common","version": "0.2.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.3.produce","type": "CONTAINER"},"outputs": {"data": "steps.3.produce","type": "CONTAINER"}},"hyperparams": {"alpha": {"data": 0.01,"type": "VALUE"},"beta": {"data": 0.001,"type": "VALUE"},"d": {"data": 90,"type": "VALUE"},"maxiter": {"data": 1000,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "e6ee30fa-af68-4bfe-9234-5ca7e7ac8e93","name": "Matrix Completion via Sparse Factorization","python_path": "d3m.primitives.collaborative_filtering.high_rank_imputer.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.6.produce","type": "CONTAINER"},"outputs": {"data": "steps.5.produce","type": "CONTAINER"}},"hyperparams": {"C": {"data": 100,"type": "VALUE"},"kernel": {"data": {"choice": "rbf","gamma": {"case": "float","value": 0.01}},"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "0ae7d42d-f765-3348-a28c-57d94880aa6a","name": "sklearn.svm.classes.SVC","python_path": "d3m.primitives.classification.svc.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.7.produce","type": "CONTAINER"}},"hyperparams": {"encoder": {"data": 5,"type": "PRIMITIVE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "39ae30f7-39ed-40af-8679-5cf108499605","name": "Label decoder for UnseenLabelEncoderPrimitive","python_path": "d3m.primitives.data_preprocessing.label_decoder.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.8.produce","type": "CONTAINER"},"reference": {"data": "steps.0.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","name": "Construct pipeline predictions output","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","version": "0.3.0"},"type": "PRIMITIVE"}]}, + {"id": "e1a156e9-0e34-4def-b960-5ad5f3a910a1","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.9.produce","name": "output"}],"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","steps": [{"arguments": {"inputs": {"data": "inputs.0","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","name": "Extract a DataFrame from a Dataset","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"columns": {"data": [2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "81d7e261-e25b-4721-b091-a31cd46e99ae","name": "Extracts columns","python_path": "d3m.primitives.data_transformation.extract_columns.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.1.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7","name": "Parses strings into their types","python_path": "d3m.primitives.data_transformation.column_parser.Common","version": "0.5.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.2.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "196152a7-a873-4676-bbde-95627f4b5306","name": "Preprocessing for categorical columns","python_path": "d3m.primitives.column_parser.preprocess_categorical_columns.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"semantic_types": {"data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","name": "Extracts columns by semantic type","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.4.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "26fc8fd3-f6b2-4c65-8afb-edb54ed2a3e4","name": "Label encoder with an unseen category","python_path": "d3m.primitives.data_preprocessing.label_encoder.Common","version": "0.2.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.3.produce","type": "CONTAINER"},"outputs": {"data": "steps.3.produce","type": "CONTAINER"}},"hyperparams": {"alpha": {"data": 0.01,"type": "VALUE"},"beta": {"data": 0.001,"type": "VALUE"},"d": {"data": 100,"type": "VALUE"},"maxiter": {"data": 1000,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "e6ee30fa-af68-4bfe-9234-5ca7e7ac8e93","name": "Matrix Completion via Sparse Factorization","python_path": "d3m.primitives.collaborative_filtering.high_rank_imputer.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.6.produce","type": "CONTAINER"},"outputs": {"data": "steps.5.produce","type": "CONTAINER"}},"hyperparams": {"C": {"data": 100,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "0ae7d42d-f765-3348-a28c-57d94880aa6a","name": "sklearn.svm.classes.SVC","python_path": "d3m.primitives.classification.svc.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.7.produce","type": "CONTAINER"}},"hyperparams": {"encoder": {"data": 5,"type": "PRIMITIVE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "39ae30f7-39ed-40af-8679-5cf108499605","name": "Label decoder for UnseenLabelEncoderPrimitive","python_path": "d3m.primitives.data_preprocessing.label_decoder.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.8.produce","type": "CONTAINER"},"reference": {"data": "steps.0.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","name": "Construct pipeline predictions output","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","version": "0.3.0"},"type": "PRIMITIVE"}]} + ], + "OBJECT_DETECTION": [ + {"id": "f0aeacc2-3147-4a35-ac75-449e3f92f286", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.2.produce", "name": "output_predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e", "version": "0.2.0", "python_path": "d3m.primitives.data_transformation.denormalize.Common", "name": "Denormalize datasets"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common", "name": "Extract a DataFrame from a Dataset"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"dataframe_resource": {"type": "VALUE", "data": "learningData"}}}, {"type": "PRIMITIVE", "primitive": {"id": "d921be1e-b158-4ab7-abb3-cb1b17f42639", "version": "0.1.0", "python_path": "d3m.primitives.object_detection.retinanet", "name": "retina_net"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}, "outputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}]}, + {"id": "dd2d98ed-5d94-4245-a0c9-0861ed7bc177","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","inputs": [{"name": "input dataset"}],"outputs": [{"data": "steps.4.produce","name": "predictions of input dataset"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e","version": "0.2.0","python_path": "d3m.primitives.data_transformation.denormalize.Common","name": "Denormalize datasets"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","version": "0.3.0","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","name": "Extract a DataFrame from a Dataset"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.0.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version": "0.3.0","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name": "Extracts columns by semantic type"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"semantic_types": {"type": "VALUE","data": ["https://metadata.datadrivendiscovery.org/types/PrimaryMultiKey","https://metadata.datadrivendiscovery.org/types/FileName"]}}},{"type": "PRIMITIVE","primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version": "0.3.0","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name": "Extracts columns by semantic type"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"semantic_types": {"type": "VALUE","data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}},{"type": "PRIMITIVE","primitive": {"id": "dsbox-featurizer-object-detection-yolo","version": "1.5.3","python_path": "d3m.primitives.feature_extraction.yolo.DSBOX","name": "DSBox Object Detection YOLO"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.2.produce"},"outputs": {"type": "CONTAINER","data": "steps.3.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"epochs": {"type": "VALUE","data": 200},"use_fitted_weight": {"type": "VALUE","data": false}}}]}, + {"id":"acdb068f-be85-48b1-81cc-e65d7b148d74","schema":"https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","inputs":[{"name":"input dataset"}],"outputs":[{"data":"steps.4.produce","name":"predictions of input dataset"}],"steps":[{"type":"PRIMITIVE","primitive":{"id":"f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e","version":"0.2.0","python_path":"d3m.primitives.data_transformation.denormalize.Common","name":"Denormalize datasets"},"arguments":{"inputs":{"type":"CONTAINER","data":"inputs.0"}},"outputs":[{"id":"produce"}]},{"type":"PRIMITIVE","primitive":{"id":"4b42ce1e-9b98-4a25-b68e-fad13311eb65","version":"0.3.0","python_path":"d3m.primitives.data_transformation.dataset_to_dataframe.Common","name":"Extract a DataFrame from a Dataset"},"arguments":{"inputs":{"type":"CONTAINER","data":"steps.0.produce"}},"outputs":[{"id":"produce"}]},{"type":"PRIMITIVE","primitive":{"id":"4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version":"0.4.0","python_path":"d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name":"Extracts columns by semantic type"},"arguments":{"inputs":{"type":"CONTAINER","data":"steps.1.produce"}},"outputs":[{"id":"produce"}],"hyperparams":{"semantic_types":{"type":"VALUE","data":["https://metadata.datadrivendiscovery.org/types/PrimaryMultiKey","https://metadata.datadrivendiscovery.org/types/FileName"]}}},{"type":"PRIMITIVE","primitive":{"id":"4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version":"0.4.0","python_path":"d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name":"Extracts columns by semantic type"},"arguments":{"inputs":{"type":"CONTAINER","data":"steps.1.produce"}},"outputs":[{"id":"produce"}],"hyperparams":{"semantic_types":{"type":"VALUE","data":["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}},{"type":"PRIMITIVE","primitive":{"id":"dsbox-featurizer-object-detection-yolo","version":"1.5.3","python_path":"d3m.primitives.feature_extraction.yolo.DSBOX","name":"DSBox Object Detection YOLO"},"arguments":{"inputs":{"type":"CONTAINER","data":"steps.2.produce"},"outputs":{"type":"CONTAINER","data":"steps.3.produce"}},"outputs":[{"id":"produce"}]}]} + ], + "VERTEX_CLASSIFICATION": [ + {"id": "704163cb-eb0d-4771-8258-5e057503a437","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.1.produce","name": "output"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "79012210-2463-4f94-9da6-11bdc5a7e6c4","version": "0.1.2","python_path": "d3m.primitives.data_transformation.load_single_graph.DistilSingleGraphLoader","name": "Load single graph and dataframe into a parseable object"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"},{"id": "produce_target"}]},{"type": "PRIMITIVE","primitive": {"id": "0130828c-1ac0-47a9-a167-f05bae5a3146","version": "0.1.1","python_path": "d3m.primitives.vertex_nomination.vertex_nomination.DistilVertexNomination","name": "VertexNomination"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.0.produce"},"outputs": {"type": "CONTAINER","data": "steps.0.produce_target"}},"outputs": [{"id": "produce"}],"hyperparams": {"metric": {"type": "VALUE","data": "accuracy"}}}]}, + {"id": "15cea2f3-9eef-4a37-8f04-eea2e30f8d68","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.9.produce","name": "output"}],"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","steps": [{"arguments": {"inputs": {"data": "inputs.0","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","name": "Extract a DataFrame from a Dataset","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"columns": {"data": [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "81d7e261-e25b-4721-b091-a31cd46e99ae","name": "Extracts columns","python_path": "d3m.primitives.data_transformation.extract_columns.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.1.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "196152a7-a873-4676-bbde-95627f4b5306","name": "Preprocessing for categorical columns","python_path": "d3m.primitives.column_parser.preprocess_categorical_columns.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.2.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "d639947e-ece0-3a39-a666-e974acf4521d","name": "sklearn.preprocessing.data.StandardScaler","python_path": "d3m.primitives.data_preprocessing.standard_scaler.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"semantic_types": {"data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","name": "Extracts columns by semantic type","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.4.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "26fc8fd3-f6b2-4c65-8afb-edb54ed2a3e4","name": "Label encoder with an unseen category","python_path": "d3m.primitives.data_preprocessing.label_encoder.Common","version": "0.2.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.3.produce","type": "CONTAINER"},"outputs": {"data": "steps.3.produce","type": "CONTAINER"}},"hyperparams": {"alpha": {"data": 0.01,"type": "VALUE"},"beta": {"data": 0.01,"type": "VALUE"},"d": {"data": 50,"type": "VALUE"},"maxiter": {"data": 500,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "e6ee30fa-af68-4bfe-9234-5ca7e7ac8e93","name": "Matrix Completion via Sparse Factorization","python_path": "d3m.primitives.collaborative_filtering.high_rank_imputer.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.6.produce","type": "CONTAINER"},"outputs": {"data": "steps.5.produce","type": "CONTAINER"}},"hyperparams": {"n_estimators": {"data": 50,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "1b2a32a6-0ec5-3ca0-9386-b8b1f1b831d1","name": "sklearn.ensemble.bagging.BaggingClassifier","python_path": "d3m.primitives.classification.bagging.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.7.produce","type": "CONTAINER"}},"hyperparams": {"encoder": {"data": 5,"type": "PRIMITIVE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "39ae30f7-39ed-40af-8679-5cf108499605","name": "Label decoder for UnseenLabelEncoderPrimitive","python_path": "d3m.primitives.data_preprocessing.label_decoder.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.8.produce","type": "CONTAINER"},"reference": {"data": "steps.0.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","name": "Construct pipeline predictions output","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","version": "0.3.0"},"type": "PRIMITIVE"}]}, + {"id": "164f4dfe-fcca-4769-aa10-d0d9f2a72cb3","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.9.produce","name": "output"}],"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","steps": [{"arguments": {"inputs": {"data": "inputs.0","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","name": "Extract a DataFrame from a Dataset","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"columns": {"data": [2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "81d7e261-e25b-4721-b091-a31cd46e99ae","name": "Extracts columns","python_path": "d3m.primitives.data_transformation.extract_columns.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.1.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "196152a7-a873-4676-bbde-95627f4b5306","name": "Preprocessing for categorical columns","python_path": "d3m.primitives.column_parser.preprocess_categorical_columns.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.2.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "d639947e-ece0-3a39-a666-e974acf4521d","name": "sklearn.preprocessing.data.StandardScaler","python_path": "d3m.primitives.data_preprocessing.standard_scaler.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.3.produce","type": "CONTAINER"},"outputs": {"data": "steps.3.produce","type": "CONTAINER"}},"hyperparams": {"alpha": {"data": 0.1,"type": "VALUE"},"beta": {"data": 0.01,"type": "VALUE"},"d": {"data": 20,"type": "VALUE"},"maxiter": {"data": 1000,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "e6ee30fa-af68-4bfe-9234-5ca7e7ac8e93","name": "Matrix Completion via Sparse Factorization","python_path": "d3m.primitives.collaborative_filtering.high_rank_imputer.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"semantic_types": {"data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","name": "Extracts columns by semantic type","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.5.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "26fc8fd3-f6b2-4c65-8afb-edb54ed2a3e4","name": "Label encoder with an unseen category","python_path": "d3m.primitives.data_preprocessing.label_encoder.Common","version": "0.2.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.4.produce","type": "CONTAINER"},"outputs": {"data": "steps.6.produce","type": "CONTAINER"}},"hyperparams": {"C": {"data": 100,"type": "VALUE"},"kernel": {"data": {"choice": "rbf","gamma": {"case": "float","value": 0.02}},"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "0ae7d42d-f765-3348-a28c-57d94880aa6a","name": "sklearn.svm.classes.SVC","python_path": "d3m.primitives.classification.svc.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.7.produce","type": "CONTAINER"}},"hyperparams": {"encoder": {"data": 6,"type": "PRIMITIVE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "39ae30f7-39ed-40af-8679-5cf108499605","name": "Label decoder for UnseenLabelEncoderPrimitive","python_path": "d3m.primitives.data_preprocessing.label_decoder.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.8.produce","type": "CONTAINER"},"reference": {"data": "steps.0.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","name": "Construct pipeline predictions output","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","version": "0.3.0"},"type": "PRIMITIVE"}]}, + {"id": "9ea39abe-b164-4eff-918e-c364ce87d167","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.6.produce","name": "output predictions"}],"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","steps": [{"arguments": {"inputs": {"data": "inputs.0","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","name": "Extract a DataFrame from a Dataset","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"columns": {"data": [1,2],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "81d7e261-e25b-4721-b091-a31cd46e99ae","name": "Extracts columns","python_path": "d3m.primitives.data_transformation.extract_columns.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.1.produce","type": "CONTAINER"}},"hyperparams": {"convert": {"data": true,"type": "VALUE"},"to_type": {"data": {"encoding": "pickle","value": "gANjYnVpbHRpbnMKaW50CnEALg=="},"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "196152a7-a873-4676-bbde-95627f4b5306","name": "Preprocessing for categorical columns","python_path": "d3m.primitives.column_parser.preprocess_categorical_columns.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"columns": {"data": [3],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "81d7e261-e25b-4721-b091-a31cd46e99ae","name": "Extracts columns","python_path": "d3m.primitives.data_transformation.extract_columns.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.3.produce","type": "CONTAINER"}},"hyperparams": {"convert": {"data": false,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "196152a7-a873-4676-bbde-95627f4b5306","name": "Preprocessing for categorical columns","python_path": "d3m.primitives.column_parser.preprocess_categorical_columns.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.2.produce","type": "CONTAINER"},"outputs": {"data": "steps.4.produce","type": "CONTAINER"}},"hyperparams": {"alpha": {"data": 1,"type": "VALUE"},"beta": {"data": 1,"type": "VALUE"},"d": {"data": 100,"type": "VALUE"},"maxiter": {"data": 1000,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "e6ee30fa-af68-4bfe-9234-5ca7e7ac8e93","name": "Matrix Completion via Sparse Factorization","python_path": "d3m.primitives.collaborative_filtering.high_rank_imputer.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.5.produce","type": "CONTAINER"},"reference": {"data": "steps.0.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","name": "Construct pipeline predictions output","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","version": "0.3.0"},"type": "PRIMITIVE"}]}, + {"id": "c4019fda-d205-4f89-9acf-5741e45e601a","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.9.produce","name": "output"}],"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","steps": [{"arguments": {"inputs": {"data": "inputs.0","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","name": "Extract a DataFrame from a Dataset","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"columns": {"data": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "81d7e261-e25b-4721-b091-a31cd46e99ae","name": "Extracts columns","python_path": "d3m.primitives.data_transformation.extract_columns.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.1.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "196152a7-a873-4676-bbde-95627f4b5306","name": "Preprocessing for categorical columns","python_path": "d3m.primitives.column_parser.preprocess_categorical_columns.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.2.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "d639947e-ece0-3a39-a666-e974acf4521d","name": "sklearn.preprocessing.data.StandardScaler","python_path": "d3m.primitives.data_preprocessing.standard_scaler.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"semantic_types": {"data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","name": "Extracts columns by semantic type","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.4.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "26fc8fd3-f6b2-4c65-8afb-edb54ed2a3e4","name": "Label encoder with an unseen category","python_path": "d3m.primitives.data_preprocessing.label_encoder.Common","version": "0.2.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.3.produce","type": "CONTAINER"},"outputs": {"data": "steps.3.produce","type": "CONTAINER"}},"hyperparams": {"alpha": {"data": 0.01,"type": "VALUE"},"beta": {"data": 0.001,"type": "VALUE"},"d": {"data": 90,"type": "VALUE"},"maxiter": {"data": 1000,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "e6ee30fa-af68-4bfe-9234-5ca7e7ac8e93","name": "Matrix Completion via Sparse Factorization","python_path": "d3m.primitives.collaborative_filtering.high_rank_imputer.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.6.produce","type": "CONTAINER"},"outputs": {"data": "steps.5.produce","type": "CONTAINER"}},"hyperparams": {"C": {"data": 100,"type": "VALUE"},"kernel": {"data": {"choice": "rbf","gamma": {"case": "float","value": 0.01}},"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "0ae7d42d-f765-3348-a28c-57d94880aa6a","name": "sklearn.svm.classes.SVC","python_path": "d3m.primitives.classification.svc.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.7.produce","type": "CONTAINER"}},"hyperparams": {"encoder": {"data": 5,"type": "PRIMITIVE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "39ae30f7-39ed-40af-8679-5cf108499605","name": "Label decoder for UnseenLabelEncoderPrimitive","python_path": "d3m.primitives.data_preprocessing.label_decoder.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.8.produce","type": "CONTAINER"},"reference": {"data": "steps.0.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","name": "Construct pipeline predictions output","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","version": "0.3.0"},"type": "PRIMITIVE"}]}, + {"id": "e1a156e9-0e34-4def-b960-5ad5f3a910a1","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.9.produce","name": "output"}],"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","steps": [{"arguments": {"inputs": {"data": "inputs.0","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","name": "Extract a DataFrame from a Dataset","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"columns": {"data": [2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "81d7e261-e25b-4721-b091-a31cd46e99ae","name": "Extracts columns","python_path": "d3m.primitives.data_transformation.extract_columns.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.1.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7","name": "Parses strings into their types","python_path": "d3m.primitives.data_transformation.column_parser.Common","version": "0.5.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.2.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "196152a7-a873-4676-bbde-95627f4b5306","name": "Preprocessing for categorical columns","python_path": "d3m.primitives.column_parser.preprocess_categorical_columns.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"semantic_types": {"data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","name": "Extracts columns by semantic type","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.4.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "26fc8fd3-f6b2-4c65-8afb-edb54ed2a3e4","name": "Label encoder with an unseen category","python_path": "d3m.primitives.data_preprocessing.label_encoder.Common","version": "0.2.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.3.produce","type": "CONTAINER"},"outputs": {"data": "steps.3.produce","type": "CONTAINER"}},"hyperparams": {"alpha": {"data": 0.01,"type": "VALUE"},"beta": {"data": 0.001,"type": "VALUE"},"d": {"data": 100,"type": "VALUE"},"maxiter": {"data": 1000,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "e6ee30fa-af68-4bfe-9234-5ca7e7ac8e93","name": "Matrix Completion via Sparse Factorization","python_path": "d3m.primitives.collaborative_filtering.high_rank_imputer.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.6.produce","type": "CONTAINER"},"outputs": {"data": "steps.5.produce","type": "CONTAINER"}},"hyperparams": {"C": {"data": 100,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "0ae7d42d-f765-3348-a28c-57d94880aa6a","name": "sklearn.svm.classes.SVC","python_path": "d3m.primitives.classification.svc.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.7.produce","type": "CONTAINER"}},"hyperparams": {"encoder": {"data": 5,"type": "PRIMITIVE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "39ae30f7-39ed-40af-8679-5cf108499605","name": "Label decoder for UnseenLabelEncoderPrimitive","python_path": "d3m.primitives.data_preprocessing.label_decoder.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.8.produce","type": "CONTAINER"},"reference": {"data": "steps.0.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","name": "Construct pipeline predictions output","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","version": "0.3.0"},"type": "PRIMITIVE"}]}, + {"id": "0f6cafc4-5628-47bc-bbf5-8cab3a7c0e95","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.3.produce","name": "Predictions"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "cb192a83-63e2-4075-bab9-e6ba1a8365b6","version": "0.1.0","python_path": "d3m.primitives.data_transformation.load_graphs.JHU","name": "Extract a list of Graphs from a Dataset"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "32fec24f-6861-4a4c-88f3-d4ec2bc1b486","version": "0.1.0","python_path": "d3m.primitives.data_preprocessing.largest_connected_component.JHU","name": "jhu.lcc"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.0.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "8fa6178b-84f7-37d8-87e8-4d3a44c86569","version": "0.1.0","python_path": "d3m.primitives.data_transformation.laplacian_spectral_embedding.JHU","name": "jhu.lse"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"max_dimension": {"type": "VALUE","data": 5},"use_attributes": {"type": "VALUE","data": true}}},{"type": "PRIMITIVE","primitive": {"id": "5194ef94-3683-319a-9d8d-5c3fdd09de24","version": "0.1.0","python_path": "d3m.primitives.graph_clustering.gaussian_clustering.JHU","name": "jhu.gclust"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.2.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"max_clusters": {"type": "VALUE","data": 10}}}]}, + {"id": "ffc49730-eb73-423c-ab6c-acb47300fcfc","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.3.produce","name": "Predictions"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "cb192a83-63e2-4075-bab9-e6ba1a8365b6","version": "0.1.0","python_path": "d3m.primitives.data_transformation.load_graphs.JHU","name": "Extract a list of Graphs from a Dataset"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "32fec24f-6861-4a4c-88f3-d4ec2bc1b486","version": "0.1.0","python_path": "d3m.primitives.data_preprocessing.largest_connected_component.JHU","name": "jhu.lcc"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.0.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "8fa6178b-84f7-37d8-87e8-4d3a44c86569","version": "0.1.0","python_path": "d3m.primitives.data_transformation.laplacian_spectral_embedding.JHU","name": "jhu.lse"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"max_dimension": {"type": "VALUE","data": 5},"use_attributes": {"type": "VALUE","data": true}}},{"type": "PRIMITIVE","primitive": {"id": "c9d5da5d-0520-468e-92df-bd3a85bb4fac","version": "0.1.0","python_path": "d3m.primitives.classification.gaussian_classification.JHU","name": "jhu.gclass"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.2.produce"}},"outputs": [{"id": "produce"}]}]}, + {"id": "4a2fb696-bf29-410d-934d-c4b17b273938","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.1.produce","name": "Results"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "a22f9bd3-818e-44e9-84a3-9592c5a85408","version": "1.7.8","python_path": "d3m.primitives.data_transformation.vertex_classification_parser.VertexClassificationParser","name": "Vertex Classification Parser"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "dca25a46-7a5f-48d9-ac9b-d14d4d671b0b","version": "1.7.8","python_path": "d3m.primitives.classification.vertex_nomination.VertexClassification","name": "Vertex Classification"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.0.produce"}},"outputs": [{"id": "produce"}]}]}, + {"id": "2e216966-bd3b-4b53-9933-7ce9a88de6d1","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.9.produce","name": "output"}],"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","steps": [{"arguments": {"inputs": {"data": "inputs.0","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","name": "Extract a DataFrame from a Dataset","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"columns": {"data": [2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "81d7e261-e25b-4721-b091-a31cd46e99ae","name": "Extracts columns","python_path": "d3m.primitives.data_transformation.extract_columns.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.1.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "196152a7-a873-4676-bbde-95627f4b5306","name": "Preprocessing for categorical columns","python_path": "d3m.primitives.column_parser.preprocess_categorical_columns.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.2.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "d639947e-ece0-3a39-a666-e974acf4521d","name": "sklearn.preprocessing.data.StandardScaler","python_path": "d3m.primitives.data_preprocessing.standard_scaler.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.3.produce","type": "CONTAINER"}},"hyperparams": {"alpha": {"data": 0.1,"type": "VALUE"},"d": {"data": 15,"type": "VALUE"},"epsilon": {"data": 0.1,"type": "VALUE"},"maxiter": {"data": 5000,"type": "VALUE"},"t": {"data": 0.001,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "7c357e6e-7124-4f2a-8371-8021c8c95cc9","name": "Huber PCA","python_path": "d3m.primitives.feature_extraction.huber_pca.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"semantic_types": {"data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","name": "Extracts columns by semantic type","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.5.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "26fc8fd3-f6b2-4c65-8afb-edb54ed2a3e4","name": "Label encoder with an unseen category","python_path": "d3m.primitives.data_preprocessing.label_encoder.Common","version": "0.2.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.4.produce","type": "CONTAINER"},"outputs": {"data": "steps.6.produce","type": "CONTAINER"}},"hyperparams": {"C": {"data": 1000,"type": "VALUE"},"kernel": {"data": {"choice": "rbf","gamma": {"case": "float","value": 0.01}},"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "0ae7d42d-f765-3348-a28c-57d94880aa6a","name": "sklearn.svm.classes.SVC","python_path": "d3m.primitives.classification.svc.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.7.produce","type": "CONTAINER"}},"hyperparams": {"encoder": {"data": 6,"type": "PRIMITIVE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "39ae30f7-39ed-40af-8679-5cf108499605","name": "Label decoder for UnseenLabelEncoderPrimitive","python_path": "d3m.primitives.data_preprocessing.label_decoder.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.8.produce","type": "CONTAINER"},"reference": {"data": "steps.0.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","name": "Construct pipeline predictions output","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","version": "0.3.0"},"type": "PRIMITIVE"}]}, + {"id": "4f678918-1de5-4db4-8c1c-d7dd0e3b2bec","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.11.produce","name": "output"}],"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","steps": [{"arguments": {"inputs": {"data": "inputs.0","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","name": "Extract a DataFrame from a Dataset","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"columns": {"data": [2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "81d7e261-e25b-4721-b091-a31cd46e99ae","name": "Extracts columns","python_path": "d3m.primitives.data_transformation.extract_columns.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.1.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7","name": "Parses strings into their types","python_path": "d3m.primitives.data_transformation.column_parser.Common","version": "0.5.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.2.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "196152a7-a873-4676-bbde-95627f4b5306","name": "Preprocessing for categorical columns","python_path": "d3m.primitives.column_parser.preprocess_categorical_columns.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.3.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "d639947e-ece0-3a39-a666-e974acf4521d","name": "sklearn.preprocessing.data.StandardScaler","python_path": "d3m.primitives.data_preprocessing.standard_scaler.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.4.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "d016df89-de62-3c53-87ed-c06bb6a23cde","name": "sklearn.impute.SimpleImputer","python_path": "d3m.primitives.data_cleaning.imputer.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.5.produce","type": "CONTAINER"}},"hyperparams": {"alpha": {"data": 0.1,"type": "VALUE"},"d": {"data": 20,"type": "VALUE"},"epsilon": {"data": 1,"type": "VALUE"},"t": {"data": 0.001,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "7c357e6e-7124-4f2a-8371-8021c8c95cc9","name": "Huber PCA","python_path": "d3m.primitives.feature_extraction.huber_pca.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"semantic_types": {"data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","name": "Extracts columns by semantic type","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.7.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "26fc8fd3-f6b2-4c65-8afb-edb54ed2a3e4","name": "Label encoder with an unseen category","python_path": "d3m.primitives.data_preprocessing.label_encoder.Common","version": "0.2.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.6.produce","type": "CONTAINER"},"outputs": {"data": "steps.8.produce","type": "CONTAINER"}},"hyperparams": {"C": {"data": 1000,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "0ae7d42d-f765-3348-a28c-57d94880aa6a","name": "sklearn.svm.classes.SVC","python_path": "d3m.primitives.classification.svc.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.9.produce","type": "CONTAINER"}},"hyperparams": {"encoder": {"data": 8,"type": "PRIMITIVE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "39ae30f7-39ed-40af-8679-5cf108499605","name": "Label decoder for UnseenLabelEncoderPrimitive","python_path": "d3m.primitives.data_preprocessing.label_decoder.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.10.produce","type": "CONTAINER"},"reference": {"data": "steps.0.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","name": "Construct pipeline predictions output","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","version": "0.3.0"},"type": "PRIMITIVE"}]}, + {"id": "94db5247-7827-468a-81b6-6b709af86d5c","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.9.produce","name": "output"}],"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","steps": [{"arguments": {"inputs": {"data": "inputs.0","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","name": "Extract a DataFrame from a Dataset","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"columns": {"data": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "81d7e261-e25b-4721-b091-a31cd46e99ae","name": "Extracts columns","python_path": "d3m.primitives.data_transformation.extract_columns.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.1.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "196152a7-a873-4676-bbde-95627f4b5306","name": "Preprocessing for categorical columns","python_path": "d3m.primitives.column_parser.preprocess_categorical_columns.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.2.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "d639947e-ece0-3a39-a666-e974acf4521d","name": "sklearn.preprocessing.data.StandardScaler","python_path": "d3m.primitives.data_preprocessing.standard_scaler.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.3.produce","type": "CONTAINER"}},"hyperparams": {"alpha": {"data": 0.1,"type": "VALUE"},"d": {"data": 50,"type": "VALUE"},"epsilon": {"data": 0.1,"type": "VALUE"},"maxiter": {"data": 2000,"type": "VALUE"},"t": {"data": 0.001,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "7c357e6e-7124-4f2a-8371-8021c8c95cc9","name": "Huber PCA","python_path": "d3m.primitives.feature_extraction.huber_pca.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"semantic_types": {"data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","name": "Extracts columns by semantic type","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.5.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "26fc8fd3-f6b2-4c65-8afb-edb54ed2a3e4","name": "Label encoder with an unseen category","python_path": "d3m.primitives.data_preprocessing.label_encoder.Common","version": "0.2.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.4.produce","type": "CONTAINER"},"outputs": {"data": "steps.6.produce","type": "CONTAINER"}},"hyperparams": {"C": {"data": 1000,"type": "VALUE"},"kernel": {"data": {"choice": "rbf","gamma": {"case": "float","value": 0.01}},"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "0ae7d42d-f765-3348-a28c-57d94880aa6a","name": "sklearn.svm.classes.SVC","python_path": "d3m.primitives.classification.svc.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.7.produce","type": "CONTAINER"}},"hyperparams": {"encoder": {"data": 6,"type": "PRIMITIVE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "39ae30f7-39ed-40af-8679-5cf108499605","name": "Label decoder for UnseenLabelEncoderPrimitive","python_path": "d3m.primitives.data_preprocessing.label_decoder.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.8.produce","type": "CONTAINER"},"reference": {"data": "steps.0.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","name": "Construct pipeline predictions output","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","version": "0.3.0"},"type": "PRIMITIVE"}]}, + {"id": "7cb3e0eb-2f3e-4756-9c4e-1cc2852c84b9","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.9.produce","name": "output"}],"schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","steps": [{"arguments": {"inputs": {"data": "inputs.0","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65","name": "Extract a DataFrame from a Dataset","python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"columns": {"data": [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "81d7e261-e25b-4721-b091-a31cd46e99ae","name": "Extracts columns","python_path": "d3m.primitives.data_transformation.extract_columns.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.1.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "196152a7-a873-4676-bbde-95627f4b5306","name": "Preprocessing for categorical columns","python_path": "d3m.primitives.column_parser.preprocess_categorical_columns.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.2.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "d639947e-ece0-3a39-a666-e974acf4521d","name": "sklearn.preprocessing.data.StandardScaler","python_path": "d3m.primitives.data_preprocessing.standard_scaler.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.3.produce","type": "CONTAINER"}},"hyperparams": {"alpha": {"data": 0.1,"type": "VALUE"},"d": {"data": 25,"type": "VALUE"},"epsilon": {"data": 0.01,"type": "VALUE"},"maxiter": {"data": 5000,"type": "VALUE"},"t": {"data": 0.0005,"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "7c357e6e-7124-4f2a-8371-8021c8c95cc9","name": "Huber PCA","python_path": "d3m.primitives.feature_extraction.huber_pca.Cornell","version": "v0.1.1"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.0.produce","type": "CONTAINER"}},"hyperparams": {"semantic_types": {"data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"],"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","name": "Extracts columns by semantic type","python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","version": "0.3.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.5.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "26fc8fd3-f6b2-4c65-8afb-edb54ed2a3e4","name": "Label encoder with an unseen category","python_path": "d3m.primitives.data_preprocessing.label_encoder.Common","version": "0.2.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.4.produce","type": "CONTAINER"},"outputs": {"data": "steps.6.produce","type": "CONTAINER"}},"hyperparams": {"C": {"data": 5000,"type": "VALUE"},"kernel": {"data": {"choice": "rbf","gamma": {"case": "float","value": 0.1}},"type": "VALUE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "0ae7d42d-f765-3348-a28c-57d94880aa6a","name": "sklearn.svm.classes.SVC","python_path": "d3m.primitives.classification.svc.SKlearn","version": "2019.6.7"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.7.produce","type": "CONTAINER"}},"hyperparams": {"encoder": {"data": 6,"type": "PRIMITIVE"}},"outputs": [{"id": "produce"}],"primitive": {"id": "39ae30f7-39ed-40af-8679-5cf108499605","name": "Label decoder for UnseenLabelEncoderPrimitive","python_path": "d3m.primitives.data_preprocessing.label_decoder.Common","version": "0.1.0"},"type": "PRIMITIVE"},{"arguments": {"inputs": {"data": "steps.8.produce","type": "CONTAINER"},"reference": {"data": "steps.0.produce","type": "CONTAINER"}},"outputs": [{"id": "produce"}],"primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736","name": "Construct pipeline predictions output","python_path": "d3m.primitives.data_transformation.construct_predictions.Common","version": "0.3.0"},"type": "PRIMITIVE"}]}, + {"id":"c50643d6-9f82-44fb-ae6e-e40ee96b6899","schema":"https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","inputs":[{"name":"input dataset"}],"outputs":[{"data":"steps.5.produce","name":"predictions of input dataset"}],"steps":[{"type":"PRIMITIVE","primitive":{"id":"f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e","version":"0.2.0","python_path":"d3m.primitives.data_transformation.denormalize.Common","name":"Denormalize datasets"},"arguments":{"inputs":{"type":"CONTAINER","data":"inputs.0"}},"outputs":[{"id":"produce"}],"hyperparams":{"starting_resource":{"type":"VALUE","data":null},"recursive":{"type":"VALUE","data":true},"many_to_many":{"type":"VALUE","data":false},"discard_not_joined_tabular_resources":{"type":"VALUE","data":false}}},{"type":"PRIMITIVE","primitive":{"id":"4b42ce1e-9b98-4a25-b68e-fad13311eb65","version":"0.3.0","python_path":"d3m.primitives.data_transformation.dataset_to_dataframe.Common","name":"Extract a DataFrame from a Dataset"},"arguments":{"inputs":{"type":"CONTAINER","data":"steps.0.produce"}},"outputs":[{"id":"produce"}]},{"type":"PRIMITIVE","primitive":{"id":"4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version":"0.3.0","python_path":"d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name":"Extracts columns by semantic type"},"arguments":{"inputs":{"type":"CONTAINER","data":"steps.1.produce"}},"outputs":[{"id":"produce"}],"hyperparams":{"semantic_types":{"type":"VALUE","data":["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}},{"type":"PRIMITIVE","primitive":{"id":"7d61e488-b5bb-4c79-bad6-f1dc07292bf4","version":"1.0.0","python_path":"d3m.primitives.feature_construction.sdne.DSBOX","name":"SDNE"},"arguments":{"inputs":{"type":"CONTAINER","data":"steps.0.produce"}},"outputs":[{"id":"produce"}],"hyperparams":{"beta":{"type":"VALUE","data":4},"alpha":{"type":"VALUE","data":0.00001},"dimension":{"type":"VALUE","data":128},"epochs":{"type":"VALUE","data":200},"lr":{"type":"VALUE","data":0.0005}}},{"type": "PRIMITIVE","primitive": {"id":"7ddf2fd8-2f7f-4e53-96a7-0d9f5aeecf93","version":"1.5.3","python_path":"d3m.primitives.data_transformation.to_numeric.DSBOX","name":"ISI DSBox To Numeric DataFrame"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.3.produce"}},"outputs":[{"id":"produce"}]},{"type":"PRIMITIVE","primitive":{"id":"1dd82833-5692-39cb-84fb-2455683075f3","version":"2019.6.7","python_path":"d3m.primitives.classification.random_forest.SKlearn","name":"sklearn.ensemble.forest.RandomForestClassifier"},"arguments":{"inputs":{"type":"CONTAINER","data":"steps.4.produce"},"outputs":{"type":"CONTAINER","data":"steps.2.produce"}},"outputs":[{"id":"produce"}],"hyperparams":{"max_depth":{"type":"VALUE","data":{"case":"int","value":30}},"min_samples_leaf":{"type":"VALUE","data":{"case":"absolute","value":2}},"min_samples_split":{"type":"VALUE","data":{"case":"absolute","value":2}},"max_features":{"type":"VALUE","data":{"case":"calculated","value":"sqrt"}},"n_estimators":{"type":"VALUE","data":100},"add_index_columns":{"type":"VALUE","data":true},"use_semantic_types":{"type":"VALUE","data":false},"error_on_no_input":{"type":"VALUE","data":true}}}]}, + {"id":"fc1eee7f-6435-4001-9cf6-6d24330d9b1c","schema":"https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","inputs":[{"name":"input dataset"}],"outputs":[{"data":"steps.4.produce","name":"predictions of input dataset"}],"steps":[{"type":"PRIMITIVE","primitive":{"id":"f31f8c1f-d1c5-43e5-a4b2-2ae4a761ef2e","version":"0.2.0","python_path":"d3m.primitives.data_transformation.denormalize.Common","name":"Denormalize datasets"},"arguments":{"inputs":{"type":"CONTAINER","data":"inputs.0"}},"outputs":[{"id":"produce"}],"hyperparams":{"starting_resource":{"type":"VALUE","data":null},"recursive":{"type":"VALUE","data":true},"many_to_many":{"type":"VALUE","data":false},"discard_not_joined_tabular_resources":{"type":"VALUE","data":false}}},{"type":"PRIMITIVE","primitive":{"id":"4b42ce1e-9b98-4a25-b68e-fad13311eb65","version":"0.3.0","python_path":"d3m.primitives.data_transformation.dataset_to_dataframe.Common","name":"Extract a DataFrame from a Dataset"},"arguments":{"inputs":{"type":"CONTAINER","data":"steps.0.produce"}},"outputs":[{"id":"produce"}]},{"type":"PRIMITIVE","primitive":{"id":"4503a4c6-42f7-45a1-a1d4-ed69699cf5e1","version":"0.3.0","python_path":"d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common","name":"Extracts columns by semantic type"},"arguments":{"inputs":{"type":"CONTAINER","data":"steps.1.produce"}},"outputs":[{"id":"produce"}],"hyperparams":{"semantic_types":{"type":"VALUE","data":["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}},{"type":"PRIMITIVE","primitive":{"id":"48572851-b86b-4fda-961d-f3f466adb58e","version":"1.0.0","python_path":"d3m.primitives.feature_construction.gcn_mixhop.DSBOX","name":"GCN"},"arguments":{"inputs":{"type":"CONTAINER","data":"steps.0.produce"},"outputs":{"type":"CONTAINER","data":"steps.2.produce"}},"outputs":[{"id":"produce"}],"hyperparams":{"epochs":{"type":"VALUE","data":200},"adjacency_order":{"type":"VALUE","data":3}}},{"type":"PRIMITIVE","primitive":{"id":"1dd82833-5692-39cb-84fb-2455683075f3","version":"2019.6.7","python_path":"d3m.primitives.classification.random_forest.SKlearn","name":"sklearn.ensemble.forest.RandomForestClassifier"},"arguments":{"inputs":{"type":"CONTAINER","data":"steps.3.produce"},"outputs":{"type":"CONTAINER","data":"steps.2.produce"}},"outputs":[{"id":"produce"}],"hyperparams":{"max_depth":{"type":"VALUE","data":{"case":"int","value":30}},"min_samples_leaf":{"type":"VALUE","data":{"case":"absolute","value":2}},"min_samples_split":{"type":"VALUE","data":{"case":"absolute","value":2}},"max_features":{"type":"VALUE","data":{"case":"calculated","value":"sqrt"}},"n_estimators":{"type":"VALUE","data":100},"add_index_columns":{"type":"VALUE","data":true},"use_semantic_types":{"type":"VALUE","data":false},"error_on_no_input":{"type":"VALUE","data":true}}}]}, + {"id": "ddc6c7e9-64b4-4f9c-af07-5f27461cb940","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.3.produce","name": "Predictions"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "cb192a83-63e2-4075-bab9-e6ba1a8365b6","version": "0.1.0","python_path": "d3m.primitives.data_transformation.load_graphs.JHU","name": "Extract a list of Graphs from a Dataset"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "32fec24f-6861-4a4c-88f3-d4ec2bc1b486","version": "0.1.0","python_path": "d3m.primitives.data_preprocessing.largest_connected_component.JHU","name": "jhu.lcc"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.0.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "b940ccbd-9e9b-3166-af50-210bfd79251b","version": "0.1.0","python_path": "d3m.primitives.data_transformation.adjacency_spectral_embedding.JHU","name": "jhu.ase"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"max_dimension": {"type": "VALUE","data": 5},"use_attributes": {"type": "VALUE","data": true}}},{"type": "PRIMITIVE","primitive": {"id": "c9d5da5d-0520-468e-92df-bd3a85bb4fac","version": "0.1.0","python_path": "d3m.primitives.classification.gaussian_classification.JHU","name": "jhu.gclass"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.2.produce"}},"outputs": [{"id": "produce"}]}]}, + {"id": "12a4b6a8-b2e4-4604-afe5-8196bf55a925","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.3.produce","name": "Predictions"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "cb192a83-63e2-4075-bab9-e6ba1a8365b6","version": "0.1.0","python_path": "d3m.primitives.data_transformation.load_graphs.JHU","name": "Extract a list of Graphs from a Dataset"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "32fec24f-6861-4a4c-88f3-d4ec2bc1b486","version": "0.1.0","python_path": "d3m.primitives.data_preprocessing.largest_connected_component.JHU","name": "jhu.lcc"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.0.produce"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "b940ccbd-9e9b-3166-af50-210bfd79251b","version": "0.1.0","python_path": "d3m.primitives.data_transformation.adjacency_spectral_embedding.JHU","name": "jhu.ase"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"max_dimension": {"type": "VALUE","data": 5},"use_attributes": {"type": "VALUE","data": true}}},{"type": "PRIMITIVE","primitive": {"id": "5194ef94-3683-319a-9d8d-5c3fdd09de24","version": "0.1.0","python_path": "d3m.primitives.graph_clustering.gaussian_clustering.JHU","name": "jhu.gclust"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.2.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"max_clusters": {"type": "VALUE","data": 10}}}]}, + {"id": "6216f2bd-2f23-4dbf-92d0-f3b40aeac150","schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-01-21T20:00:00.000000Z","inputs": [{"name": "inputs"}],"outputs": [{"data": "steps.2.produce","name": "Predictions"}],"steps": [{"type": "PRIMITIVE","primitive": {"id": "09f2eea8-667c-44b8-a955-6a153ba9ccc3","version": "0.1.0","python_path": "d3m.primitives.link_prediction.data_conversion.JHU","name": "jhu.link_pred_graph_reader"},"arguments": {"inputs": {"type": "CONTAINER","data": "inputs.0"}},"outputs": [{"id": "produce"}]},{"type": "PRIMITIVE","primitive": {"id": "b940ccbd-9e9b-3166-af50-210bfd79251b","version": "0.1.0","python_path": "d3m.primitives.data_transformation.adjacency_spectral_embedding.JHU","name": "jhu.ase"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.0.produce"}},"outputs": [{"id": "produce"}],"hyperparams": {"which_elbow": {"type": "VALUE","data": 1},"max_dimension": {"type": "VALUE","data": 2},"use_attributes": {"type": "VALUE","data": false}}},{"type": "PRIMITIVE","primitive": {"id": "25e97696-b96f-4f5c-8620-b340fe83414d","version": "0.1.0","python_path": "d3m.primitives.link_prediction.rank_classification.JHU","name": "jhu.link_pred_rc"},"arguments": {"inputs": {"type": "CONTAINER","data": "steps.1.produce"}},"outputs": [{"id": "produce"}]}]} + ], + "FORECASTING": [] +} diff --git a/axolotl/axolotl/utils/resources/scoring_pipeline.yml b/axolotl/axolotl/utils/resources/scoring_pipeline.yml new file mode 100644 index 0000000..e95ecd5 --- /dev/null +++ b/axolotl/axolotl/utils/resources/scoring_pipeline.yml @@ -0,0 +1,31 @@ +id: f596cd77-25f8-4d4c-a350-bb30ab1e58f6 +schema: https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json +source: + name: Mitar +created: "2020-04-18T11:42:44.138742Z" +name: Scoring pipeline +description: |- + A general scoring pipeline. +inputs: + - name: predictions + - name: score dataset +outputs: + - name: scores + data: steps.0.produce +steps: + # Step 0. + - type: PRIMITIVE + primitive: + id: 799802fb-2e11-4ab7-9c5e-dda09eb52a70 + version: 0.5.0 + python_path: d3m.primitives.evaluation.compute_scores.Core + name: Compute scores given the metrics to use + arguments: + inputs: + type: CONTAINER + data: inputs.0 + score_dataset: + type: CONTAINER + data: inputs.1 + outputs: + - id: produce diff --git a/axolotl/axolotl/utils/resources/splitting_pipelines.json b/axolotl/axolotl/utils/resources/splitting_pipelines.json new file mode 100644 index 0000000..d7b0639 --- /dev/null +++ b/axolotl/axolotl/utils/resources/splitting_pipelines.json @@ -0,0 +1,7 @@ +{ + "HOLDOUT_FIXED": {"id": "9c18472e-fff7-4129-93f6-1ab996e82adb", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2018-10-27T01:30:10.245934Z", "inputs": [{"name": "folds"}, {"name": "full dataset"}], "outputs": [{"data": "steps.0.produce", "name": "train datasets"}, {"data": "steps.2.produce", "name": "test datasets"}, {"data": "steps.1.produce", "name": "score datasets"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "1654f000-2178-4520-be4c-a95bc26b8d3a", "version": "0.1.0", "python_path": "d3m.primitives.evaluation.fixed_split_dataset_split.Commmon", "name": "Fixed split tabular dataset splits", "digest": "4ebb8d32da071e84370aa978f0b455a592fb2cc88181d669bcf8081ecd98fa00"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}, "dataset": {"type": "CONTAINER", "data": "inputs.1"}}, "outputs": [{"id": "produce"}, {"id": "produce_score_data"}]}, {"type": "PRIMITIVE", "primitive": {"id": "744c4090-e2f6-489e-8efc-8b1e051bfad6", "version": "0.2.0", "python_path": "d3m.primitives.evaluation.redact_columns.Common", "name": "Redact columns for evaluation", "digest": "e59c835f0ec9e720525b11e8f1409fd3733b41802d75905851c6a35b43168310"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce_score_data"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/PrivilegedData"]}, "add_semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/RedactedPrivilegedData", "https://metadata.datadrivendiscovery.org/types/MissingData"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "744c4090-e2f6-489e-8efc-8b1e051bfad6", "version": "0.2.0", "python_path": "d3m.primitives.evaluation.redact_columns.Common", "name": "Redact columns for evaluation", "digest": "e59c835f0ec9e720525b11e8f1409fd3733b41802d75905851c6a35b43168310"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}, "add_semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/RedactedTarget", "https://metadata.datadrivendiscovery.org/types/MissingData"]}}}], "source": {"name": "Mitar"}, "name": "Fixed split of tabular datasets", "description": "A pipeline which splits a tabular dataset in a way that uses for the test\n(score) split a fixed list of primary index values or row indices of the main\nresource to be used.\n", "digest": "28193e7483794e5bd164c352e02e90090d9cda17abfe542b2393a4ecb58c0bb8"}, + "K_FOLD": {"id": "c8ed65df-aa68-4ee0-bbb5-c5f76a40bcf8", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2018-07-27T19:39:00.676949Z", "inputs": [{"name": "folds"}, {"name": "full dataset"}], "outputs": [{"data": "steps.0.produce", "name": "train datasets"}, {"data": "steps.2.produce", "name": "test datasets"}, {"data": "steps.1.produce", "name": "score datasets"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "bfedaf3a-6dd0-4a83-ad83-3a50fe882bf8", "version": "0.1.0", "python_path": "d3m.primitives.evaluation.kfold_dataset_split.Common", "name": "K-fold cross-validation tabular dataset splits", "digest": "8fc8fd388ed30e8e13c0c04880b0dd81051cd15ae7416a962d79b8187be65fbc"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}, "dataset": {"type": "CONTAINER", "data": "inputs.1"}}, "outputs": [{"id": "produce"}, {"id": "produce_score_data"}]}, {"type": "PRIMITIVE", "primitive": {"id": "744c4090-e2f6-489e-8efc-8b1e051bfad6", "version": "0.2.0", "python_path": "d3m.primitives.evaluation.redact_columns.Common", "name": "Redact columns for evaluation", "digest": "e59c835f0ec9e720525b11e8f1409fd3733b41802d75905851c6a35b43168310"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce_score_data"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/PrivilegedData"]}, "add_semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/RedactedPrivilegedData", "https://metadata.datadrivendiscovery.org/types/MissingData"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "744c4090-e2f6-489e-8efc-8b1e051bfad6", "version": "0.2.0", "python_path": "d3m.primitives.evaluation.redact_columns.Common", "name": "Redact columns for evaluation", "digest": "e59c835f0ec9e720525b11e8f1409fd3733b41802d75905851c6a35b43168310"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}, "add_semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/RedactedTarget", "https://metadata.datadrivendiscovery.org/types/MissingData"]}}}], "source": {"name": "Mitar"}, "name": "K-fold split of tabular datasets", "description": "K-fold split of tabular datasets for cross-validation.\n", "digest": "c1546da06d12b4f435973bc335a54ca7486ba51a7067c65e58e397236cecad73"}, + "k-fold-timeseries-split": {"id": "5bed1f23-ac17-4b52-9d06-a5b77a6aea51", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2019-04-08T16:18:27.250294Z", "inputs": [{"name": "folds"}, {"name": "full dataset"}], "outputs": [{"data": "steps.0.produce", "name": "train datasets"}, {"data": "steps.2.produce", "name": "test datasets"}, {"data": "steps.1.produce", "name": "score datasets"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "002f9ad1-46e3-40f4-89ed-eeffbb3a102b", "version": "0.3.0", "python_path": "d3m.primitives.evaluation.kfold_time_series_split.Common", "name": "K-fold cross-validation timeseries dataset splits", "digest": "e06a27b03f9cea879c21e012b031f84c2a7b37193987134481db1117f05e9657"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}, "dataset": {"type": "CONTAINER", "data": "inputs.1"}}, "outputs": [{"id": "produce"}, {"id": "produce_score_data"}]}, {"type": "PRIMITIVE", "primitive": {"id": "744c4090-e2f6-489e-8efc-8b1e051bfad6", "version": "0.2.0", "python_path": "d3m.primitives.evaluation.redact_columns.Common", "name": "Redact columns for evaluation", "digest": "e59c835f0ec9e720525b11e8f1409fd3733b41802d75905851c6a35b43168310"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce_score_data"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/PrivilegedData"]}, "add_semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/RedactedPrivilegedData", "https://metadata.datadrivendiscovery.org/types/MissingData"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "744c4090-e2f6-489e-8efc-8b1e051bfad6", "version": "0.2.0", "python_path": "d3m.primitives.evaluation.redact_columns.Common", "name": "Redact columns for evaluation", "digest": "e59c835f0ec9e720525b11e8f1409fd3733b41802d75905851c6a35b43168310"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}, "add_semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/RedactedTarget", "https://metadata.datadrivendiscovery.org/types/MissingData"]}}}], "source": {"name": "Jeffrey Gleason"}, "name": "K-fold split of timeseries datasets", "description": "K-fold split of timeseries datasets for cross-validation.\n", "digest": "33aea0b6bd864a383020eb9d1f64fda193e20bb8690ee516809004d805f9614a"}, + "TRAINING_DATA": {"id": "79ce71bd-db96-494b-a455-14f2e2ac5040", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2018-10-26T00:48:08.341897Z", "inputs": [{"name": "folds"}, {"name": "full dataset"}], "outputs": [{"data": "steps.0.produce", "name": "train datasets"}, {"data": "steps.2.produce", "name": "test datasets"}, {"data": "steps.1.produce", "name": "score datasets"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "48c683ad-da9e-48cf-b3a0-7394dba5e5d2", "version": "0.1.0", "python_path": "d3m.primitives.evaluation.no_split_dataset_split.Common", "name": "No-split tabular dataset splits", "digest": "869d62e577148338d1c732347d6d0bf2119ae9af6b90037fda5044ab0eef01dc"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}, "dataset": {"type": "CONTAINER", "data": "inputs.1"}}, "outputs": [{"id": "produce"}, {"id": "produce_score_data"}]}, {"type": "PRIMITIVE", "primitive": {"id": "744c4090-e2f6-489e-8efc-8b1e051bfad6", "version": "0.2.0", "python_path": "d3m.primitives.evaluation.redact_columns.Common", "name": "Redact columns for evaluation", "digest": "e59c835f0ec9e720525b11e8f1409fd3733b41802d75905851c6a35b43168310"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce_score_data"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/PrivilegedData"]}, "add_semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/RedactedPrivilegedData", "https://metadata.datadrivendiscovery.org/types/MissingData"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "744c4090-e2f6-489e-8efc-8b1e051bfad6", "version": "0.2.0", "python_path": "d3m.primitives.evaluation.redact_columns.Common", "name": "Redact columns for evaluation", "digest": "e59c835f0ec9e720525b11e8f1409fd3733b41802d75905851c6a35b43168310"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}, "add_semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/RedactedTarget", "https://metadata.datadrivendiscovery.org/types/MissingData"]}}}], "source": {"name": "Mitar"}, "name": "No split of tabular datasets", "description": "A pipeline which splits a tabular dataset in a way that for all splits it\nproduces the same (full) dataset. It still redacts the test split.\nUseful for unsupervised learning tasks.\n", "digest": "690373622142f12dc078657246b8f2f6c070ebd32720321d786a3f0c653d55cc"}, + "HOLDOUT": {"id": "3c11d171-e2ad-4d26-a034-04f3b062306c", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2018-07-28T01:24:39.642266Z", "inputs": [{"name": "folds"}, {"name": "full dataset"}], "outputs": [{"data": "steps.0.produce", "name": "train datasets"}, {"data": "steps.2.produce", "name": "test datasets"}, {"data": "steps.1.produce", "name": "score datasets"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "3fcc6dc4-6681-4c86-948e-066d14e7d803", "version": "0.1.0", "python_path": "d3m.primitives.evaluation.train_score_dataset_split.Common", "name": "Train-score tabular dataset splits", "digest": "f65655f435f9e703e00f174dae743f93fee5c10aa2016d2398f4d53bee8d5bae"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}, "dataset": {"type": "CONTAINER", "data": "inputs.1"}}, "outputs": [{"id": "produce"}, {"id": "produce_score_data"}]}, {"type": "PRIMITIVE", "primitive": {"id": "744c4090-e2f6-489e-8efc-8b1e051bfad6", "version": "0.2.0", "python_path": "d3m.primitives.evaluation.redact_columns.Common", "name": "Redact columns for evaluation", "digest": "e59c835f0ec9e720525b11e8f1409fd3733b41802d75905851c6a35b43168310"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce_score_data"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/PrivilegedData"]}, "add_semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/RedactedPrivilegedData", "https://metadata.datadrivendiscovery.org/types/MissingData"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "744c4090-e2f6-489e-8efc-8b1e051bfad6", "version": "0.2.0", "python_path": "d3m.primitives.evaluation.redact_columns.Common", "name": "Redact columns for evaluation", "digest": "e59c835f0ec9e720525b11e8f1409fd3733b41802d75905851c6a35b43168310"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}, "add_semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/RedactedTarget", "https://metadata.datadrivendiscovery.org/types/MissingData"]}}}], "source": {"name": "Mitar"}, "name": "Train-test split of tabular datasets", "description": "Train-test split of tabular datasets.\n", "digest": "675ee3e96e9b1bfba41694b6289a889ef6fc96e5477b89c8267871b941e4d78e"} +} \ No newline at end of file diff --git a/axolotl/axolotl/utils/schemas.py b/axolotl/axolotl/utils/schemas.py new file mode 100644 index 0000000..b70187f --- /dev/null +++ b/axolotl/axolotl/utils/schemas.py @@ -0,0 +1,472 @@ +import os +import copy +import json +import typing +import logging +import math +import random +import binascii + +from d3m import container +from d3m.metadata.problem import TaskKeyword, PerformanceMetric +from d3m.metadata.pipeline import Pipeline +from d3m import utils as d3m_utils + +from axolotl.utils import pipeline as pipeline_utils + +logger = logging.getLogger(__name__) + + +# ContainerType = typing.Union[container.Dataset, container.DataFrame, container.ndarray, container.List] +ContainerType = container.Dataset + +resource_dir = os.path.dirname(__file__) +SPLITTING_PIPELINES_DIR = os.path.join(resource_dir, 'resources', 'splitting_pipelines.json') +SCORING_PIPELINES_DIR = os.path.join(resource_dir, 'resources', 'scoring_pipeline.yml') +PIPELINES_DB_DIR = os.path.join(resource_dir, 'resources', 'default_pipelines.json') + +TASK_TYPE = { + TaskKeyword.CLASSIFICATION, TaskKeyword.REGRESSION, + TaskKeyword.CLUSTERING, TaskKeyword.LINK_PREDICTION, + TaskKeyword.VERTEX_NOMINATION, TaskKeyword.COMMUNITY_DETECTION, + TaskKeyword.GRAPH_MATCHING, TaskKeyword.COLLABORATIVE_FILTERING, + TaskKeyword.OBJECT_DETECTION, TaskKeyword.VERTEX_CLASSIFICATION, + TaskKeyword.FORECASTING +} + +TASK_SUBTYPES = { + TaskKeyword.MULTIVARIATE, + TaskKeyword.BINARY, + TaskKeyword.NONOVERLAPPING, + TaskKeyword.OVERLAPPING, + TaskKeyword.UNIVARIATE, + TaskKeyword.MULTICLASS, + TaskKeyword.MULTILABEL, +} + +DATA_TYPES = { + TaskKeyword.TIME_SERIES, + TaskKeyword.AUDIO, + TaskKeyword.TABULAR, + TaskKeyword.TEXT, + TaskKeyword.VIDEO, + TaskKeyword.GRAPH, + TaskKeyword.IMAGE, + TaskKeyword.GEOSPATIAL, + TaskKeyword.RELATIONAL, + TaskKeyword.GROUPED, + TaskKeyword.LUPI +} + +CLASSIFICATION_METRICS = [ + {'metric': PerformanceMetric.ACCURACY, 'params': {}}, + {'metric': PerformanceMetric.PRECISION, 'params': {}}, + {'metric': PerformanceMetric.RECALL, 'params': {}}, + {'metric': PerformanceMetric.F1, 'params': {}}, + {'metric': PerformanceMetric.F1_MICRO, 'params': {}}, + {'metric': PerformanceMetric.F1_MACRO, 'params': {}}, + {'metric': PerformanceMetric.ROC_AUC, 'params': {}}, +] + +BINARY_CLASSIFICATION_METRICS = [ + {'metric': PerformanceMetric.ACCURACY, 'params': {}}, +] + +MULTICLASS_CLASSIFICATION_METRICS = [ + {'metric': PerformanceMetric.ACCURACY, 'params': {}}, + {'metric': PerformanceMetric.F1_MICRO, 'params': {}}, + {'metric': PerformanceMetric.F1_MACRO, 'params': {}}, +] + +MULTILABEL_CLASSIFICATION_METRICS = [ + {'metric': PerformanceMetric.ACCURACY, 'params': {}}, +] + +REGRESSION_METRICS = [ + {'metric': PerformanceMetric.MEAN_ABSOLUTE_ERROR, 'params': {}}, + {'metric': PerformanceMetric.MEAN_SQUARED_ERROR, 'params': {}}, + {'metric': PerformanceMetric.ROOT_MEAN_SQUARED_ERROR, 'params': {}}, + {'metric': PerformanceMetric.R_SQUARED, 'params': {}}, +] + +CLUSTERING_METRICS = [ + {'metric': PerformanceMetric.NORMALIZED_MUTUAL_INFORMATION, 'params': {}}, +] + +LINK_PREDICTION_METRICS = [ + {'metric': PerformanceMetric.ACCURACY, 'params': {}}, +] + +VERTEX_NOMINATION_METRICS = [ + {'metric': PerformanceMetric.ACCURACY, 'params': {}}, +] + +COMMUNITY_DETECTION_METRICS = [ + {'metric': PerformanceMetric.NORMALIZED_MUTUAL_INFORMATION, 'params': {}}, +] + +GRAPH_CLUSTERING_METRICS = [] + +GRAPH_MATCHING_METRICS = [ + {'metric': PerformanceMetric.ACCURACY, 'params': {}} +] + +TIME_SERIES_FORECASTING_METRICS = REGRESSION_METRICS + +COLLABORATIVE_FILTERING_METRICS = REGRESSION_METRICS + +OBJECT_DETECTION_METRICS = [ + {'metric': PerformanceMetric.OBJECT_DETECTION_AVERAGE_PRECISION, 'params': {}}, +] + +MULTICLASS_VERTEX_METRICS = MULTICLASS_CLASSIFICATION_METRICS + +SEMI_SUPERVISED_MULTICLASS_CLASSIFICATION_METRICS = MULTICLASS_CLASSIFICATION_METRICS + +SEMI_SUPERVISED_REGRESSION_METRICS = REGRESSION_METRICS + +DATA_PREPARATION_PARAMS = { + 'k_fold_tabular': { + 'method': 'K_FOLD', + 'number_of_folds': '3', + 'stratified': 'false', + 'shuffle': 'true', + 'randomSeed': '42', + }, + + 'holdout': { + 'method': 'HOLDOUT', + 'train_score_ratio': '0.2', + 'shuffle': 'true', + 'stratified': 'true', + 'randomSeed': '42', + }, + + 'no_stratified_holdout': { + 'method': 'HOLDOUT', + 'train_score_ratio': '0.2', + 'shuffle': 'true', + 'stratified': 'false', + 'randomSeed': '42', + }, + + 'no_split': { + 'method': 'TRAINING_DATA', + 'number_of_folds': '1', + 'stratified': 'true', + 'shuffle': 'true', + 'randomSeed': '42', + }, +} + +PROBLEM_DEFINITION = { + 'binary_classification': { + 'performance_metrics': BINARY_CLASSIFICATION_METRICS, + 'task_keywords': [TaskKeyword.CLASSIFICATION, TaskKeyword.BINARY] + }, + 'regression': { + 'performance_metrics': REGRESSION_METRICS, + 'task_keywords': [TaskKeyword.UNIVARIATE, TaskKeyword.REGRESSION] + } + +} + + +def get_task_description(keywords) -> dict: + """ + A function that parse the keywords from the problem and map them to + TaskType, SubTasktype and data type eg. tabular, images, audio, etc + + Parameters + ---------- + keywords: List[d3m.problem.TaskKeyword] + List of keywords that comes from d3m problem description + + Returns + ------- + dict + { + task_type: str + task_subtype: str + data_types: list + semi: bool + } + """ + + task_type = None + task_subtype = None + data_types = [] + semi = False + for keyword in keywords: + if keyword in TASK_TYPE: + task_type = keyword.name + elif keyword in TASK_SUBTYPES: + task_subtype = keyword.name + elif keyword in DATA_TYPES: + data_types.append(keyword.name) + elif keyword.name == TaskKeyword.SEMISUPERVISED: + semi = True + + # if data_types is empty we assume is tabular: + if not data_types: + data_types.append(TaskKeyword.TABULAR) + + return {'task_type': task_type, 'task_subtype': task_subtype, 'data_types': data_types, 'semi': semi} + + +def get_metrics_from_task(task_des, perf_metrics=None): + """ + Provides a dictionary of metrics ready to use for perfromance_metrics + + Parameters + ---------- + task_des: dict + A dictionary describe the task + perf_metrics: dict + A dictionary specifying the needed performance metric parameters + + Returns + ------- + performance_metrics: dict + A dict containing performance metrics. + """ + # For the case thet the user only want to run a full pipeline + task_type = task_des['task_type'] + task_subtype = task_des['task_subtype'] + data_types = task_des['data_types'] + if not task_des: + return None + if TaskKeyword.CLASSIFICATION == task_type or \ + TaskKeyword.VERTEX_CLASSIFICATION == task_type: + if task_des['semi']: + # TODO: Temporary solution to binary semi supervised classification + metrics = SEMI_SUPERVISED_MULTICLASS_CLASSIFICATION_METRICS + elif TaskKeyword.BINARY == task_subtype: + metrics = BINARY_CLASSIFICATION_METRICS + elif TaskKeyword.MULTICLASS == task_subtype: + metrics = MULTICLASS_CLASSIFICATION_METRICS + elif TaskKeyword.MULTILABEL == task_subtype: + metrics = MULTILABEL_CLASSIFICATION_METRICS + else: + metrics = CLASSIFICATION_METRICS + elif TaskKeyword.REGRESSION == task_type: + metrics = REGRESSION_METRICS + elif TaskKeyword.CLUSTERING == task_type: + metrics = CLUSTERING_METRICS + elif TaskKeyword.LINK_PREDICTION == task_type: + metrics = LINK_PREDICTION_METRICS + elif TaskKeyword.VERTEX_NOMINATION == task_type: + metrics = VERTEX_NOMINATION_METRICS + elif TaskKeyword.COMMUNITY_DETECTION == task_type: + metrics = COMMUNITY_DETECTION_METRICS + elif TaskKeyword.GRAPH_MATCHING == task_type: + metrics = GRAPH_MATCHING_METRICS + elif TaskKeyword.TIME_SERIES in data_types and TaskKeyword.FORECASTING: + metrics = TIME_SERIES_FORECASTING_METRICS + elif TaskKeyword.COLLABORATIVE_FILTERING == task_type: + metrics = COLLABORATIVE_FILTERING_METRICS + elif TaskKeyword.OBJECT_DETECTION == task_type: + metrics = OBJECT_DETECTION_METRICS + else: + raise ValueError('Task keywords not supported, keywords: {}'.format(task_des)) + + for i, metric in enumerate(metrics): + for perf_metric in perf_metrics: + if perf_metric['metric'] == metric['metric'] and 'params' in perf_metric: + copy_metric = copy.deepcopy(metric) + copy_metric['params']['pos_label'] = perf_metric['params']['pos_label'] + metrics[i] = copy_metric + logger.info('get_metrics_from_task:metrics: {}'.format(metrics)) + return metrics + + +def get_eval_configuration(task_type: str, data_types: typing.Sequence, semi: bool) -> typing.Dict: + """ + Determines which method of evaluation to use, cross_fold, holdout, etc. + + Parameters + ---------- + task_type: str + task type + data_types: list + data types + semi: bool + is it semi-supervised problem + + Returns + ------- + eval_configuration: dict + A dict that contains the evaluation method to use. + """ + + # for the case of no problem return None. + if not task_type: + return {} + + if semi: + # Splitting semi may get empty ground truth, which can cause error in sklearn metric. + return DATA_PREPARATION_PARAMS['no_split'] + + if TaskKeyword.CLASSIFICATION == task_type: + # These data types tend to take up a lot of time to run, so no k_fold. + if TaskKeyword.AUDIO in data_types or TaskKeyword.VIDEO in data_types \ + or TaskKeyword.IMAGE in data_types: + return DATA_PREPARATION_PARAMS['holdout'] + else: + return DATA_PREPARATION_PARAMS['k_fold_tabular'] + elif TaskKeyword.REGRESSION in data_types: + return DATA_PREPARATION_PARAMS['no_stratified_holdout'] + else: + return DATA_PREPARATION_PARAMS['no_split'] + + +def get_splitting_pipeline(splitting_name: str) -> Pipeline: + with open(SPLITTING_PIPELINES_DIR) as file: + splitting_pipelines = json.load(file) + + if splitting_name in splitting_pipelines: + return pipeline_utils.load_pipeline(splitting_pipelines[splitting_name]) + else: + raise ValueError("{} not supported".format(splitting_name)) + + +def get_scoring_pipeline() -> Pipeline: + with open(SCORING_PIPELINES_DIR, 'r') as pipeline_file: + with d3m_utils.silence(): + pipeline = Pipeline.from_yaml(pipeline_file) + return pipeline + + +def get_pipelines_db(): + with open(PIPELINES_DB_DIR) as file: + pipelines_dict = json.load(file) + return pipelines_dict + + +def get_task_mapping(task: str) -> str: + """ + Map the task in problem_doc to the task types that are currently supported + + Parameters + ---------- + task: str + The task type in problem_doc + + Returns + ------- + str + One of task types that are supported + + """ + mapping = { + 'LINK_PREDICTION': 'CLASSIFICATION', + TaskKeyword.VERTEX_CLASSIFICATION: 'CLASSIFICATION', + 'COMMUNITY_DETECTION': 'CLASSIFICATION', + 'GRAPH_MATCHING': 'CLASSIFICATION', + TaskKeyword.FORECASTING: 'REGRESSION', + 'OBJECT_DETECTION': 'CLASSIFICATION', + 'VERTEX_CLASSIFICATION': 'CLASSIFICATION', + } + if task in mapping: + return mapping[task] + else: + return task + + + +def hex_to_binary(hex_identifier): + return binascii.unhexlify(hex_identifier) + + +def binary_to_hex(identifier): + hex_identifier = binascii.hexlify(identifier) + return hex_identifier.decode() + + +def summarize_performance_metrics(performance_metrics): + """ + A function that averages all the folds if they exist. + + Parameters + ---------- + performance_metrics: dict + A dictionary containing the fold, metrics targets and values from evaluation. + """ + sumarized_performance_metrics = {} + + for metric in performance_metrics.metric.unique(): + mean = performance_metrics[performance_metrics.metric == metric]['value'].mean() + std = performance_metrics[performance_metrics.metric == metric]['value'].std() + if math.isnan(std): + std = 0 + sumarized_performance_metrics[metric] = { + 'mean': mean, + 'std': std, + } + return sumarized_performance_metrics + + +def compute_score(sumarized_performance_metrics): + """ + A function that computes the internal score based on the average normalized metrics. + + Parameters + ---------- + sumarized_performance_metrics: dict + A dictionary containing the summarized version. + """ + score = 0 + + for metric, info in sumarized_performance_metrics.items(): + score += PerformanceMetric[metric].normalize(info['mean']) + + score = score / float(len(sumarized_performance_metrics)) + return score + + +def compute_rank(sumarized_performance_metrics): + """ + A function that computes the rank based on the average normalized metrics. + + Parameters + ---------- + sumarized_performance_metrics: dict + A dictionary containing the summarized version. + """ + ranks = {} + mean = 0 + for metric, info in sumarized_performance_metrics.items(): + try: + ranks[metric] = PerformanceMetric[metric].normalize(abs(info['mean'] - info['std'])) + except: + ranks[metric] = 0 + mean += ranks[metric] + + mean = mean / len(sumarized_performance_metrics) + # rank = 1 - ranks[min(ranks.keys(), key=(lambda k: ranks[k]))] + random.randint(10, 30)**-6 + rank = 1 - mean + + # We add some randomness on the rank to avoid duplications + noise = 0 + sign = -1 if random.randint(0, 1) == 0 else 1 + range_0 = -9 + range_1 = -5 + if rank < 1e-5: + range_0 = -12 + range_1 = -9 + + for i in range(range_0, range_1): + noise += random.randint(0, 9) * 10 ** i + rank = rank + noise * sign + if rank < 0: + rank *= -1 + return rank + + +def random_rank(): + ranks = 0 + average_number = 5 + for i in range(average_number): + ranks += random.uniform(0, 1) + ranks = ranks/average_number + return ranks diff --git a/axolotl/examples/build_search_algorithm.ipynb b/axolotl/examples/build_search_algorithm.ipynb new file mode 100644 index 0000000..49f2d28 --- /dev/null +++ b/axolotl/examples/build_search_algorithm.ipynb @@ -0,0 +1,284 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Axolotl Build dummy search method example [Binary Classification]." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example, we are showcasing different components of the system.\n", + "- Loading syntethic data for a binary classification task.\n", + "- Easy use of the backend.\n", + "- Creation of custom rank function as well as a simple search method.\n", + "- Use of simple interface for search.\n", + "- Exploring searched pipelines." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import multiple utils we will be using" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import uuid\n", + "import random\n", + "import pandas as pd\n", + "from pprint import pprint\n", + "from sklearn.datasets import make_classification\n", + "\n", + "from d3m import container\n", + "from d3m.metadata.pipeline import Pipeline\n", + "\n", + "from axolotl.utils import data_problem\n", + "from axolotl.backend.ray import RayRunner\n", + "from axolotl.algorithms.base import PipelineSearchBase\n", + "from axolotl.utils import pipeline as pipeline_utils, schemas as schemas_utils" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generate synthetic data and import it to the system" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "x, y = make_classification(n_samples=100, n_features=20)\n", + "dataset, problem_description = data_problem.generate_dataset_problem(x, y, 'binary_classification')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Make an instance of the runner that is in charge of evaluating and running pipelines." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2020-07-11 19:07:52,353\tINFO resource_spec.py:212 -- Starting Ray with 3.56 GiB memory available for workers and up to 1.79 GiB for objects. You can adjust these settings with ray.init(memory=, object_store_memory=).\n", + "2020-07-11 19:07:52,793\tINFO services.py:1170 -- View the Ray dashboard at \u001b[1m\u001b[32mlocalhost:8265\u001b[39m\u001b[22m\n" + ] + } + ], + "source": [ + "backend = RayRunner(random_seed=42, volumes_dir=None, n_workers=4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a random rank function." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def random_rank(pipeline_result):\n", + " if pipeline_result.status == 'COMPLETED':\n", + " pipeline_result.rank = random.uniform(0, 1)\n", + " return pipeline_result" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a predefined Search algorithm that is loading some predefined pipelines previosuly stored." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "class PredefinedSearch(PipelineSearchBase):\n", + " def __init__(self, problem_description, backend, *, primitives_blocklist=None, ranking_function=None):\n", + " super().__init__(problem_description=problem_description, backend=backend,\n", + " primitives_blocklist=primitives_blocklist, ranking_function=ranking_function)\n", + " if self.ranking_function is None:\n", + " self.ranking_function = random_rank\n", + " self.task_description = schemas_utils.get_task_description(self.problem_description['problem']['task_keywords'])\n", + "\n", + " self.available_pipelines = self._return_pipelines(\n", + " self.task_description['task_type'], self.task_description['task_subtype'], self.task_description['data_types'])\n", + "\n", + " # Selection of a data preparation pipeline, we provide some predefine options such as train_test_split, k_fold, etc\n", + " # as well as the user can provide their own.\n", + " self.data_preparation_pipeline = schemas_utils.get_splitting_pipeline(\"K_FOLD\")\n", + " \n", + " # Get the metrics to evaluate the pipelines based on the problem description.\n", + " self.metrics = self.problem_description['problem']['performance_metrics']\n", + "\n", + " # Pipeline to be use for scoring, we recommend using the one provided.\n", + " self.scoring_pipeline = schemas_utils.get_scoring_pipeline()\n", + " \n", + " # Get the parameters for the datapreparation pipeline, such as number of folds, and so on.\n", + " self.data_preparation_params = schemas_utils.DATA_PREPARATION_PARAMS['k_fold_tabular']\n", + "\n", + " self.offset = 10\n", + " self.current_pipeline_index = 0\n", + "\n", + " def _search(self, time_left):\n", + " # Read all the pipelines to be evaluated\n", + " pipelines_to_eval = self.available_pipelines[self.current_pipeline_index: self.current_pipeline_index+self.offset]\n", + " self.current_pipeline_index += self.offset\n", + " \n", + " # Evaluate the pipelines.\n", + " pipeline_results = self.backend.evaluate_pipelines(\n", + " problem_description=self.problem_description, pipelines=pipelines_to_eval, input_data=self.input_data,\n", + " metrics=self.metrics, data_preparation_pipeline=self.data_preparation_pipeline,\n", + " scoring_pipeline=self.scoring_pipeline, data_preparation_params=self.data_preparation_params)\n", + "\n", + " return [self.ranking_function(pipeline_result) for pipeline_result in pipeline_results]\n", + "\n", + " def _return_pipelines(self, task_type, task_subtype, data_type):\n", + " pipeline_candidates = []\n", + " for pipeline_dict in schemas_utils.get_pipelines_db()['CLASSIFICATION']:\n", + " pipeline = pipeline_utils.load_pipeline(pipeline_dict)\n", + " pipeline.id = str(uuid.uuid4())\n", + " pipeline.created = Pipeline().created\n", + " pipeline_candidates.append(pipeline)\n", + "\n", + " return pipeline_candidates\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create an instance of the search and fit with the input_data." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# The method fit search for the best pipeline based on the time butget and fit the best pipeline based on the rank with the input_data.\n", + "search = PredefinedSearch(problem_description=problem_description, backend=backend)\n", + "fitted_pipeline, pipeline_result = search.search_fit(input_data=[dataset], time_limit=30)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Print information about scores of the succeded pipelines." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----------------------------------------------------\n", + "Pipeline id: dbca8f6a-ad11-4e32-9bc1-b49c554ff224\n", + "Rank: 0.11134933476057562\n", + " metric value normalized randomSeed fold\n", + "0 ACCURACY 0.588235 0.588235 42 0\n", + "1 ACCURACY 0.878788 0.878788 42 1\n", + "2 ACCURACY 0.818182 0.818182 42 2\n" + ] + } + ], + "source": [ + "for pipeline_result in search.history:\n", + " print('-' * 52)\n", + " print('Pipeline id:', pipeline_result.pipeline.id)\n", + " print('Rank:', pipeline_result.rank)\n", + " print(pipeline_result.scores)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Display succeded pipelines" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABSYAAALkCAYAAAAS1et6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAMTQAADE0B0s6tTgAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOzdeXxU5d3///eZJZnsmUw2spAAgYAgwYCA4gouKIJLK1B3re1dretDLd/6s9X21tb71rZab++7rcXWoihWAQXUgihSkFUIuwiBBEhIQvZ1MjNnzu8PJIUCmkDIJPB6/pUczjnXZ0KYB+c9n+u6DMuyLAEAAAAAAABAF7KFugAAAAAAAAAAZx6CSQAAAAAAAABdjmASAAAAAAAAQJcjmAQAAAAAAADQ5QgmAQAAAAAAAHQ5gkkAAAAAAAAAXY5gEgAAAAAAAECXI5gEAAAAAAAA0OUIJgEAAAAAAAB0OYJJAAAAAAAAAF2OYBIAAAAAAABAlyOYBAAAAAAAANDlCCYBAAAAAAAAdDmCSQAAAAAAAABdjmASAAAAAAAAQJcjmAQAAAAAAADQ5QgmAQAAAAAAAHQ5gkkAAAAAAAAAXY5gEgAAAAAAAECXI5gEAAAAAAAA0OUIJgEAAAAAAAB0OYJJAAAAAAAAAF2OYBIAAAAAAABAlyOYBAAAAAAAANDlCCYBAAAAAAAAdDmCSQAAAAAAAABdjmASAAAAAAAAQJcjmAQAAAAAAADQ5QgmAQAAAAAAAHQ5gkkAAAAAAAAAXY5gEgAAAAAAAECXI5gEAAAAAAAA0OUIJgEAAAAAAAB0OYJJAAAAAAAAAF2OYBIAAAAAAABAlyOYBAAAAAAAANDlCCYBAAAAAAAAdDmCSQAAAAAAAABdjmASAAAAAAAAQJcjmAQAAAAAAADQ5QgmAQAAAAAAAHQ5gkkAAAAAAAAAXY5gEgAAAAAAAECXI5gEAAAAAAAA0OUIJgEAAAAAAAB0OYJJAAAAAAAAAF2OYBIAAAAAAABAlyOYBAAAAAAAANDlCCYBAAAAAAAAdDmCSQAAAAAAAABdjmASAAAAAAAAQJcjmAQAAAAAAADQ5RyhLgAAAOBUCJhBVTb6VF7vVWlti2pbfAqYlhx2Q/ERYUqLj1BKrEuJ0WFy2PmsFgAAAOhqBJMAAOC0UtHg1RfFNVpeWKVGr182GZIhOe02GYZkWZLfDEqWFJSlaJdTY/p5NDzLreQYV6jLBwAAAM4YhmVZVqiLAAAAOFlVja2at6FUG0pqJRnyRIUpMswuwzCOe41lWWr2mapq8kmylJcer4l5afJEh3dZ3QAAAMCZimASAAD0aMGgpTVF1ZpTUCK/GVRqrOuEpmYHzKDK6r1y2m26fli6zs1OkM12/FATAAAAwMkhmAQAAD1Wa8DUrDV7taaoWkkx4YpxOU/6ng1evw40tOrc7ARNOTdT4Q57J1QKAAAA4N8RTAIAgB6pNWBqxopibSqpU2ZCpOyd2N1oBi3tqW7W0PQ43XpeFuEkAAAAcAqwBSUAAOhxgkFLs9bs1aaSOvXu5FBSkuw2Q1kJkdpYUqe31+5VMMjnuAAAAEBnI5gEAAA9zpqiaq0pqlZmQuQpWwfSZjPUOyFSq3cfHAsAAABA5yKYBAAAPUplY6vmFJQoKSa80zsl/53dZigpJlxzCkpU1dh6SscCAAAAzjQEkwAAIOQKCgr01FNPqaio6FvPnb+hVH4z2Ckb3bRHjMspvxnU/I37u2Q8AAAA4ExBMAkAAHqMigavNpTUKjXW1SXjlezcpqKtBUqNdalgX40qGrxdMi4AAABwJnCEugAAAIChQ4dqyJAhstu/effrL4prJBly2Lvms9WSwm1qaapX9lnDJBlaV1yj8UN6dcnYAAAAwOmOjkkAABByNptNDodDhnH8NSMDZlDLC6vkiQrrwsr+xRMVpmWFVQqYwZCMDwAAAJxuDMuyrFAXAQAAzmwFBQWaO3eu7rjjDmVnZ2vJkiVasmSJ7rvvPq1fv14bN25URU2Dvqgwdd7F45SQktZ2bXXZPq1eOEdDzh+ngK9Ve77cqNaWJkVEx6nv2cOV1ndg27ktjfX6bPZryskbqZy8UUfUsGn5xyop3Kbxt90vSfps9l/V0tjwrxMsS3Utfr32m18oNytVe/bs0WeffaaysjK1trYqMjJSvXr10tixY5WSknJqf2AAAADAaYCp3AAAoNuaM2eOwsLCdMEFF2jL3kotn/0Prf90vi664XY5w8KPOHfPtg3y+1qV0X+wbHa7SnZu08Zli2QFLaXnDOrw2ANHXKiv1q2Qv7VFA8+9UJJUVutVQ8CmyspKzZgxQwkJCRozZoxcLpcaGxu1e/duVVZWEkwCAAAA7UAwCQAAuq3o6GhNnTpVhmGoOnK/+o3wqqxgicp2f6XM3LOPOLe5sV4XXHuzXJHRkqTM/kO0fP5Mfbn2n0rt0192e8f+25PSu5+Kt21QMBho67o0q5t1oNmUWblbfr9ft912m6Kiotquueiii07yFQMAAABnDtaYBAAA3daoUaPa1p2sbfHJk5opSWpuqDvq3F59BrSFkpLkCAtT5oAh8vtaVVNeqtbWVtXX159UPU67TXVev8LDD3Zrbt26VcEga04CAAAAJ4KOSQAA0G3Fx8e3fR0wLYW5XJIkX6v3qHOjYt3HPdZQU6Vde8sUFx1xUvUYhuQ3LQ05Z4g2btyoBQsW6OOPP1ZmZqZycnI0ZMgQRUdHf/uNAAAAANAxCQAAui+b7V//VXHYDZ3Iln1WMKidO3cqOjpamb17f+N533ovS3LaDTkcDt122226++67dd5558k0TS1cuFAvvfSSioqKOl4kAAAAcAaiYxIAAPQI8RFh8pvHDw+b6muOOtZYW62KAwfUN2OgBg4aqGAgIEnyH6Pjsrnx6Onh/85vBhXncrZ9n5GRoYyMDElSVVWV/vjHP2rJkiW64447vvVeAAAAwJmOjkkAANAjpMVHSN/QMbl/91fyNje2fR9obdUXyz6RYbNr5AUXyWazyREWpvCISFWXlxxxbe2BMtVVlh91T7vTqYDP968D1sE6mpubjzrX7XYrPDz8mH8GAAAA4Gh0TAIAgB4hJdaloCwdbz53ZHSsVn34jjIHDJFhs2nDyqVqqK3W2OtvUrjrX2tLZg44Wzs3rNIXn8xTcnq2WpoatG/HFsXEe1RfU3nEPeMTU3VgX5G2rf5MsZ4UHWholdvVV0uXLlVhYaEGDBggt9utYDCobdu2qaGhQaNHjz6lPwcAAADgdEEwCQAAeoTE6DBFu5zym8cOJnsPylPA16o9X25U+f5StfgCuvS676nPoLwjzus7ZLj8Pq/27/5K1fv3KsadpGGXXK2SnduOCiazBuWpqb5W+3d/pZ1bNihgBhV55zjl5uaqoaFBmzdvVlNTk5xOpxITE3XDDTdo6NChp+xnAAAAAJxODMs6kWXkAQAAut6Hm/dr4dZy9U6IbDtWXbZPqxfO0dnnX6b0nEEqLyvTjh07lTcsTzExMZ029p7qZl15VorGD+nVafcEAAAAzmSsMQkAAHqM4VluSZYCx9kEp7qqSl99tUODhwzu1FDy4HiW8rPcnXZPAAAA4ExHMAkAAHqM5BiX8tLjVVZ/9K7ajU2N2rJ1q3IH5srt7twAsazeq2EZbiXHuDr1vgAAAMCZjGASAAD0KBPz0uS029Tg9bcd8/v92rljp/r26aPk5OROHa/B65fTbtM1Q5nCDQAAAHQmgkkAANCjeKLDdf2wdB1oaJUZtBTlTlLSWefrrPxRSs/I6NSxzKClysZWXT8sXZ7o8E69NwAAAHCmY1duAADQ45ybnaDt5Q1atatS1cXb5Xa71adPn04dIxi0tKe6WedmJ+jc7IROvTcAAAAAOiYBAEAPZLMZumFYL/nLd6rZFql+/ftLhtFp9zeDloqrmzU0PU6TR2TKZuu8ewMAAAA4yLAsywp1EQAAAB0RDAb19ttvq66xSeEDL9EXe+uUFBOuGJfzpO/d4PXrQEOrRvZJ0OQRmQp32DuhYgAAAAD/jqncAACgR7EsSwsWLFBVVZXuuusuhYe7NKioWnMKSlTX4ldqrEsOe8cnhQTMoMrqvXLabbplVJbOzU6gUxIAAAA4heiYBAAAPcqnn36qgoICff/731dsbGzb8arGVs3bUKoNJbWSDHmiwhQZZpfxDVO8LctSs89UVZNPkqVhGW5dM7QXG90AAAAAXYBgEgAA9BirV6/Wp59+qrvuuktJSUnHPKeiwat1xTVaVlilJq9fhgzJkJx2mwxDsizJbwYlS7JkKdrl1AU5iTqnd7ySY1xd/IoAAACAMxfBJAAA6BG2bNmi9957T7feeqsyMzO/9fyAGVRlo0/l9V6V1raozuuX37TktBuKczmVFh+hlFiXEqPDTmjqNwAAAICTQzAJAAC6vd27d2vmzJm68cYbNWDAgFCXAwAAAKAT0B4AAAC6tf379+utt97ShAkTCCUBAACA0wjBJAAA6LZqamr0xhtv6KKLLtKwYcNCXQ4AAACATkQwCQAAuqXGxkbNmDFDQ4YM0fnnnx/qcgAAAAB0MoJJAADQ7bS2tuqNN95Qenq6rrzyShmGEeqSAAAAAHQygkkAANCtBAIBzZo1S5GRkbruuusIJQEAAIDTFMEkAADoNizL0ty5c+X1ejV58mTZ7fZQlwQAAADgFCGYBAAA3YJlWfroo4+0f/9+3XzzzQoPDw91SQAAAABOIUeoCwBwcgJmUJWNPpXXe1Va26LaFp8CpiWH3VB8RJjS4iOUEutSYnSYHHY+iwDQfS1fvlxbtmzR97//fUVFRYW6HAAAAACnGMEk0ENVNHj1RXGNlhdWqdHrl02GZEhOu02GIVmW5DeDkiUFZSna5dSYfh4Nz3IrOcYV6vIB4Ajr16/XsmXLdMcdd8jtdoe6HAAAAABdwLAsywp1EQDar6qxVfM2lGpDSa0kQ56oMEWG2b9xcwjLstTsM1XV5JNkKS89XhPz0uSJZpokgNDbvn273n33Xd10003Kzs4OdTkAAAAAugjBJNBDBIOW1hRVa05BifxmUKmxrhOamh0wgyqr98ppt+n6Yek6NztBNhs73gIIjb1792rGjBm6/vrrNWjQoFCXAwAAAKALEUwCPUBrwNSsNXu1pqhaSTHhinE5T/qeDV6/DjS06tzsBE05N1PhDna+BdC1Kioq9Je//EXjxo3TiBEjQl0OAAAAgC7GGpNAN9caMDVjRbE2ldQpyxMleyd1N8a4nIoMc2htcY18gaBuPS+LcBJAl6mrq9Prr7+uUaNGEUoCAAAAZyi26AW6sWDQ0qw1e7WppE69EyI7LZQ8xG4zlJUQqY0ldXp77V4FgzRQAzj1LMtSRUWFBgwYoIsvvjjU5QAAAAAIEaZyA93Yql1Ven1Vcad2Sh6LGbRUXNWkW0ZlaVRfzykbBwAOMU1TNpvtGzfuAgAAAHB6o2MS6KYqG1s1p6BESTHhpzSUlA52TibFhGtOQYmqGltP6VgAIEl2u51QEgAAADjDEUwC3dT8DaXym8FO2eimPWJcTvnNoOZv3N8l4wEAAAAAgDMbm98A3VBFg1cbSmqVFhfRrvPrqypUumu7qsv2qaWpQZIUFetW74Fnq1ef3HZ3JaXGulSwr0ZXNaQqOcZ1wvUDwL8rLCxUv379Ql0GAAAAgG6EYBLohr4orpFkyGFvX1Pz7i3rVLV/r1KycpQ5YIhM01RZ8Q5tXLZI1WX7NOT8y9p1n4PjGVpXXKPxQ3qd+AsAgMPcfffdam5uVt++fXXTTTdpwIABcjj4LwgAAABwpmMqN9DNBMyglhdWyRMV1u5reg/M08XfvVODR1+qzNyzlX3WMI0a/10lpKRr385taqipave9PFFhWlZYpYAZPJHyAeAI69atU0NDg2bOnKkBAwbok08+0ZIlS0JdFgAAAIBugGAS6GYqG31q9PoVGWZv9zXu5F6y24/sPjIMQym9D06bbKxtfzAZGWZXk9evykZfu68BgOMZMmSIcnNztW3bNk2dOlW9e/fWV199pcrKylCXBgAAACDECCaBbqa83iubjE7Zrdbb3ChJCnO1b61K6WCgachQeb33pMcHAMMw1KdPH61evVr19fW6/PLLVVxcrM8//zzUpQEAAAAIMYJJoJsprW2RTj6TlLe5Uft2bFFEdIzcyWkdu9j4ug4A6CDLso743ul06rLLLlNpaanmzZuniooKDR8+XOvXr1cgEAhRlQAAAAC6A4JJoJupbfHJ2c5Nb47FNE35/T5tWPqRAn6fhpw3TjZ7+6eFS5LTblOd13/CNQA4M1mW1dbtvXnz5rbjmZmZuummm5SYmKj/+q//0h//+EdNnDiRDXAAAACAMxxPBEA3EzAtdXQWt2maqqqq0oGKA6qsOiD/gT0yW+o1ePSl8vTK7HANhiH5TevbTwSAwxwKJf/85z9r7ty5+utf/6rExERJUlZWlrKysnTZZZeppaVFCQkJRwSZAAAAAM48dEwC3YzDbshqRyZomqYOVFRoy5Yt+nz55youKlZkZIQiWmsVaK7TwBEXKqP/4BOqwbIkp52wAED7FBcXt329adMmvfLKK3r77bdVWVmpTz/9VJK0YMECrV27VhEREYSSAAAAACTRMQl0O/ERYfKbwWP+WdA0VVVdrQMVB1RVVaVwV7iSk5KUnZ2tyMhIbVy2UNVle5U7fIyyBuWdcA1+M6g4l/OErwdwZlm+fLlee+01xcXF6ZprrtGFF16oadOmKSIiQsXFxWpsbNTo0aPldrvbriGUBAAAAGBY/75KPYCQ2rC3Vq99XqRMT6Skg2FkdXW1Kg58HUaGhSspOUnJSUmKioqSDEOWZWnT8o9VuutLDTjnPPU9e8RJ1bC3qlm3n5+tvMz4znhJAE5TwWBQNtvByRfnn3++SkpK2roni4qKlJ2drR/+8IfKzc3VI488Ikl0SgIAAABoQzAJdDNldV79+sOtirJaVHmgUpVVlQoLC1NyUpKSkpMV/XUYebgv1/xTRdsKFJeYoqyBQ4+6Z4w7UTHuxHaNb1mW9lY3a9r4QUqNc3XKawJw+vL5fJo/f77Cw8M1a9YsXXjhhfrBD34gn8+np59+WiUlJZo+fXqoywQAAADQDTGVG+gmAoGACgsLtXHzFu3Y2iCXw6701ESdM2yYoqOjjwojD1dffUCSVFdZro3LFh315zl5I9sdTDb7TEW5nEqMDjuxFwLgtHd412NhYaHWrVunK664Qq+88oqmTJmiQCAgh8OhBx98UB6PR9KR3ZUAAAAAINExCYSUaZoqLCzUli1b9OWXXyoiIkKDBw/WAVea1pT61NsT1eU17alu1pVnpWj8kF5dPjaAnuPAgQNasWKFJk2apFWrVunDDz/Utddeq7S0ND366KO69NJLddddd0kilAQAAABwbHRMAiFgWZaKioo0a9YshYeHa/DgwbrtttuUlpYmwzBU0eDVmv3bFDCDcti77mE+YAYlWcrPcn/ruQDObLW1tZo7d67sdrsmTJigXbt26U9/+pMefPBBTZ8+XWFh/+q6JpQEAAAAcCwEk8ApZpqmbDbbEZs9GIahXr166ZZbblF6evpRG0Ekx7iUlx6vTaV1ynBHdlmtZfVeDctwKzmGtSUBHNsHH3ygHTt2yOPx6IknntBvfvMbpaWlKTIyUjabTQ6Hoy2UZKMbAAAAAN+EYBLoZMFgUIZhtD2M2+32Y57ncrmUkZFx3PtMzEvTl+UNavD6FeNynpJaD9fg9ctpt+maoUzhBnBsCxYs0IwZM3TLLbdo1qxZKi0t1UMPPaQHHnhApmnqxRdfVE5OTtv5hJIAAAAAvglrTAKd4HhdQX6/Xy+99JIKCgo0evRo3XvvvR2676pdVXp9VbGyPFGy207dA74ZtLSnukk3j8zSqL6eUzYOgJ7l8LUhA4GAJk2apJycHP3+97+Xz+fT9773PT377LOKjIyUy+WSx+OhSxIAAABAu7HoE9BOhzL8PXv2SDr4kH7IoYdw0zT15ptv6uGHH9ayZcu0a9cutbS06Je//KXWrVunzz//vENjnpudoHOzE7SnulnB4Kn5DCEYtLSnurltLACQDr7HHQollyxZopqaGs2YMUObN2/WO++8o7CwMHk8HlVXVys9PV0ej6etYxwAAAAA2oNgEmiHQx1AJSUlmjp1qiTJ4fjXSgh//OMf1dDQoI0bN2rRokUaOXKkLrjgAr399tuqq6vTX/7yFy1ZskQ7duyQ3+9v97g2m6Ep52ZqaHqciqubZXZyOGkGLRVXN2toepwmj8iU7RR2ZQLoWQ69xz3zzDN69tln9dhjj+nDDz/UjBkz9OSTT+quu+5SeHi48vPz265hkxsAAAAAHcETBHAMlmXJNM227w91AKWnpysxMVHl5eV6/vnntXLlSknS4sWLVVBQoEWLFum8887T9773PUnS0KFDtXHjRo0dO1bbt2/Xd77zHTmdTnVkBYVwh123npelc7PdKq5qUoO3/cHmN2nw+lVc1aRzs9269bwshTuOvRYmgDPL3LlztWzZMknSb37zG33++ef66KOP9Kc//UnvvvuuNm/erAULFmjv3r26//775XQ6O/SBCwAAAAAcQjAJfC0YDCoYDEo6GEQevmmNZVn69a9/rRtvvFEVFRUqKChQQkKCFi5cKEm69tprtWzZMl111VVtD/SSNHz4cE2cOFGLFy/W9ddfr9tvv11+v7/DUx3DHXbdPDJLt4zKUrPP1L6aZgXM4Am9zoAZ1L6aZjX7TN0yKks3jySUBPAvl19+ucaMGaPly5frjjvuUFlZmRYsWKCwsDC98MIL+vnPf66IiAj96Ec/0p133qna2lo5nad+gy4AAAAApx82v8EZKxgMyrKsY+6aXVlZqZdeekm1tbWaOnWqMjIy9Mtf/lIvvfSS3nrrLS1evFjPPfec7rnnHs2ZM0dlZWWaPHmyli5dqqlTp2rgwIHatGmTvv/97+vqq6/W/PnzNWTIEGVnZ5903VWNrZq3oVQbSmolGfJEhSkyzP6NYadlWWr2mapq8kmyNCzDrdkv/n+acu0Effe731VYWNhJ1wXg9LFv3z49+uij+vGPf6yMjAzdf//9mjZtmi688EJVVFQoOTlZkrRlyxYNHjw4xNUCAAAA6KkIJoGvrVmzRn/729/Uq1cvDR8+XLNnz9bVV1+t2bNn64EHHtDjjz+uf/zjH/J6vbrsssu0bNkyXXvttXrkkUfU0NCg++67T5999pkSExM1c+ZMDRw4UBdccMFR43TWjrUVDV6tK67RssIqNXn9MmRIhuS022QYkmVJfjMoWZIlS9Eupy7ISdQ5veOVHOPS5MmTtWPHDl199dW69dZblZuby6YVANp24t66datefvll3X333bIsSw8++KDmzZunmJgY2e32tiUpeN8AAAAAcKIIJnHaKiws1OzZs9WrVy/deOONCg8PbwsFTdPUypUr9cEHH8jpdOr//b//p9/97nfaunWr/vCHP+jZZ5/V1KlTNXjwYP3iF7/QhAkTNG3aNL3xxhtKTU1Vbm6u3nzzTcXGxupnP/uZcnJyNG7cOI0ZM6bLpzQGzKAqG30qr/eqtLZFdV6//KYlp91QnMuptPgIpcS6lBgdJof9X6s3/PKXv9SiRYvk8XgUExOjSy65RDfddJMiIiK6tH4AoXP4ByWHd0L+85//VGVlpbKysvTcc8/p6aefVlxcnBITE0NZLgAAAIDTjOPbTwF6np07d+pXv/qV3G63wsPDNW3aNL3wwgttD+CLFy/WE088oXvvvVeFhYWaNWuWsrKyFBYWpqioKNntdu3YsUODBw+Wz+dTQ0ODHnjgAT388MOqr6/XVVddJYfDoZycHL355pvHrOFQ19Gp5rDblBrnUmqcS3mZ8e2+Ljk5Wenp6dq/f7/sdru+/PJLNTY2EkwCZ5BD74nvvfeeZs+erd/+9reqrq7Wz3/+cz3zzDPKz8/XD37wA1mWRSgJAAAAoNMRTKLHM01TNpvtiOmEffv21XPPPSePxyNJmjJlipqamhQVFSVJysrK0llnnaXrrrtO9fX1mjNnjlwul9xut8rKyvTd735X7733nt544w21tLRo0qRJuvTSS5Wfn69evXrJ4XB8aw1dEUqejMTERDU1NckwDKWnp+upp55q+/kAOL3V1NTI7XZLkjZt2qTHH39cS5cuVXh4uJqamvTpp59KOthROXbs2LavmbYNAAAAoDPZn3rqqadCXQTQEYc2rTn0gHwoEDz8mGEYioyMlCS98847iouL0wUXXHDEVO7S0lKZpqlzzjlHS5cuVV5enrZv3y5JOv/885Wbm6uYmBg98MADys3NlSTFxcXJZrPJsqwjOiL/PZTsCWpqauT3+/XYY4+prKxMKSkpSkpKCnVZAE4xn8+nhQsXqqioSF9++aXOOusslZWVad26dZo3b55ef/119evXT7179z7ifa2nvccBAAAA6P5YYxLd3rd16cyePVvTp0/XOeeco/vvv18pKSlHXPfMM88oLS1Nd955Z9s1fr9ff/vb3/T+++/L7XbL7/frjTfeUEFBgbKzsxUf3/4p0T3V4T/XDRs26OOPP9aPf/xjuVyuEFcG4FQrKirSlClTVF1drR07dmjv3r0qLCzUJZdcogceeEDDhw/X7bffHuoyAQAAAJzmuvdcU5yRgsGgJB1zx1ev16uPPvpI9913n2bOnKna2loVFBTojjvu0NNPP62UlJS2bkbDMFRaWqq6ujrdeeedampqkiS99dZbqq+v16BBg5SRkaEpU6botddekyQNGzbsiFDydM7tD/+5Dh06VElJSVq8eHEIKwLQVXbt2qWJEydq7NixWrNmjTIzMzV69Gg98cQTKioqIpQEAAAA0CUIJtEtHAoAP/7447bp1IeCsyVLlrQFZm+++aYee+wxjRkzRmVlZfrd736ns88+W6tXr9aDDz6oRYsWyTCMtinWn3zyiebMmaPvfOc7+tGPfqTW1lYNHDhQbrdbSUlJSkxMVN++feVwONoC0cOdKVMXDcPQNddco4KCAu3duzfU5QDoZId/yLJo0SJt2rRJ1113ne69915Nnz5dy5Yt09y5czV58tMoNrwAACAASURBVGS9//77knTM90QAAAAA6EysMYlu4VAAuGnTJv3f//2fMjIyFBERoWeffVYfffSRDMNQQUGBLrjgAq1cuVLPPvusUlJStHDhQv3kJz/R4MGDdfXVV+upp57SJZdcorffflu9evXSunXrlJeXpzvuuEP33nuvHA6HUlNT28JLu92u5ORkud3uMyaEPJ6IiAgZhqF//vOfys/P7/ab9wBoP8Mw1NLSouLiYg0cOFCFhYUqLi5W//791bdvX/30pz9VamqqbrjhBkk6Yg1dAAAAADhVeOpAlzle901zc7MWLVqkDz74QHv37tX8+fP14Ycfyu/3a9euXXrxxRcVHx+v2bNnKyIiQmlpaaqoqFBCQoLS09O1bt06vf/++7r88suVk5OjpKQkXXLJJfJ4PLrvvvv02GOP6ZxzzjlqXLfbrSuuuEJ9+vQ51S+9xzj//PMlSStWrAhxJQA6w+Hvu//85z/1pz/9SeXl5Zo0aZIqKir04YcfKj8/X3PnztVPfvKTtnMJJQEAAAB0BZ48cEoEg0HV1NQccezwB13LsmRZlurq6jR+/HjNmTNH8fHxuueeezRu3Dg9+OCDSkpK0v79+3XDDTfI7/fr97//vfr06SOXy6VHH31Ut9xyi6KiopSbm6vMzEy9+uqreuaZZyRJffr0kdPplCSZpnlarxXZmex2uyZOnKjPPvtM1dXVoS4HwEmwLEs2m00FBQVasmSJTNPUlVdeqbffflsxMTHq27ev1q1bp7KyMiUmJkpi+jYAAACArsVUbnSqQzs9HzhwQHPnzlV+fr4kqaqqSrNnz9Z7772nc845Ry6XS4ZhaOvWrXI6nZo0aVLbmo87d+5USUmJ8vPzVVVVpby8PI0ePVqLFy+W3W5Xbm6uVqxYoV/84heaOHGiXC6XBgwYoKSkpGPWZLPZzvhp2h0RFxen+vp6bdy4UUOHDuVnB/RQh5ZmePLJJ2Wz2bR06VK1trZq2LBh+ulPf6qVK1dq2rRpGjp06BHXAAAAAEBXoWMSnerQQ63D4dD777+vm2++Wfv27dPzzz+vRYsWacKECYqIiFB5eblee+011dXVady4cXr//ff1zjvv6JlnntHkyZM1a9YszZ07VzfeeKNM09TLL7+srKwsDR06VJmZmXI4HBo6dKjsdnuIX/Hp6bLLLlN5ebk2bdoU6lIAdMChjkfLstTc3KyHH35Yo0eP1uOPP67//M//1ObNmzVo0CA9+uijeumllzR69OgQVwwAAADgTOYIdQFon4AZVGWjT+X1XpXWtqi2xaeAaclhNxQfEaa0+AilxLqUGB0mh/3U5s2maR6zC9E0TVVUVGjdunXav3+/7Ha7AoGAYmJilJ6eruTkZFVUVGjnzp0qLCzUihUrNGLECA0cOFBZWVl65513VFRUpP79+2vMmDGqqqpSSkqKHnrooSPGSUtL0zXXXKOqqip5PJ5T+lrPVC6XS1dddZU++OAD9e/fXxEREaEuCUA7HFoyo6ioSH369NErr7yiu+66S1deeaXOO+88ud1u2e12jRkzRhKb3AAAAAAILcNi8b1uraLBqy+Ka7S8sEqNXr9sMiRDctptMgzJsiS/GZQsKShL0S6nxvTzaHiWW8kxrpMe3+/3y+l0yjTNo7oTq6qqVFlZqT59+uivf/2r0tLSNHLkSL322msaNWqUXC6XFi5cqCeeeEK1tbVatGiRoqOj9fzzz2vx4sVt9/nss8/0wgsvKDU1VdOmTVN2dvZRdQSDQVmWRYdkF7IsSzNnzlRMTIwmTZoU6nIAfIPVq1dr5MiRkqTnnntO7733ns4//3zdcsstam1t1Xe/+11NnjxZhYWFmj59utxud4grBgAAAAA6JrutqsZWzdtQqg0ltZIMeaLClJAQ+Y3rf1mWpWafqYVby7Vwa5ny0uM1MS9NnujwDo/f2NioBQsWaNSoUcrOzj4iENyxY4ceeugh2Ww2ud1ujR8/Xr1799aOHTs0YcKEtk1rRo4cqX379kmSYmNjNXjwYH3++eeKjo5WRUWFkpOTJUljxozRxRdffFQNh4ehdPR0PcMwNGHCBP3v//6v8vLylJWVFeqSABxDXV2dGhsbJUm//vWvtXHjRn322Wdau3at/ud//kc///nP9eqrr+rxxx/X3Llz5Xa72z50AgAAAIBQIpjsZoJBS2uKqjWnoER+M6i0uIh2T802DENR4Q5FhTsUMIPaVFqnL8sbdP2wdJ2bnSCbrf2bGkRHR2vy5MlqamrSrFmztGzZMm3dulUvvviiMjMztXz5ctXW1qqurk633Xab7r//ftXX18swDNntdpWUlCg+Pl7h4eG6+uqrde+992rbtm06cOCA/vu//7stlJQOrkcpHQwiDcNoCyHpjgy9+Ph4XXLJJZo3b55+9KMftf1dAeg+4uLiNHbsWK1cuVJr167VF198oeLiYo0aNUrV1dV6+OGHNWPGDN111126/fbb9f7778vlOvmOegAAAAA4WezK3Y20Bky9uXqPPtpSpvhIp5JiXB0KEw9nsxmKjXDKsiyt3FWlqiafBvaKkaMDnYcrVqzQ9OnT9Ytf/EKPP/64JkyYoFdeeUXXXnut1q5dq5ycHPXp00eff/65kpOTVV1drXXr1mn9+vWqr6/XhRdeqEGDBik3N1eXXXaZLrroIl1++eVKTEw8Ts3snt0dpaenq6CgQM3NzcecZg+geygrK1Nqaqq8Xq8WLlyoMWPGKD8/Xzk5OcrMzNSIESOUnp6u/v37h7pUAAAAAJDErtzdRmvA1IwVxfqiuEZZnijFuDpnil2My6ksT5TWFtdoxopitQbMdl/r8XiUk5Oj1NRUJSYmatSoUfJ4PNq6dauGDRumt956S9u3b5fT6dSECRN0ySWXaNOmTbr44ov15JNPKikpSWeddZauuuoqhYcfnE5umqZY1rRnsdlsmjhxopYvX67KyspQlwPgOIYPH66cnBxddtll8vl8+uEPfyifz6f8/Py2c8aOHRvCCgEAAADgSHRMdgPBoKU3V+9Rwd5a9U6IlP0EuySPx2YYinM59VVFo2qbfTo7La5dnYlhYWEqKipSdXW1TNPUeeedp3379mnDhg2aMmWKfvazn6m4uFgDBgzQRRddpN69e+vaa6/V2WefrcjIyLb7WJbVNh5dkT1TTEyMmpqatH79euXl5fF3CHRDhmGoV69eamhoUHR0tMaPH6/c3NxQlwUAAAAAx0XHZDewpqhaa4qqlZkQecJTt7+NzWaod0KkVu8+ONa/W7lypaZPn37EsejoaMXExMjtduvDDz+UJI0cOVJut1v9+vVT37599de//lUPP/xw2zWWZSkYDB5xH0Ks08PYsWNVXV2tgoKCUJcC4BuMGTNGU6ZM0fjx4yWJLnUAAAAA3RbBZIhVNrZqTkGJkmLCO71T8t/ZbYaSYsI1p6BEVY2tbcdXrlypl19+WZ999plKS0vbHmJtNps8Ho8uvvhi3XHHHTJNUwMHDtTdd98tSUpISNCnn34q6V8PvodvXoPTy6GNjBYuXKimpqZQlwPg32zbtk2macrhcCgpKantOB8OAQAAAOiuDItWipD66/Ld2lRapwx35Lef3En21TRraHq8bj8/W6tWrdKLL74owzBUVFSkAQMG6Pe//71iYmKOe30gEJDD4VBjY6Oio6OPmKqN09+sWbPkdDp1ww03hLoUAF/bvn273n33Xd15553q1atXqMsBAAAAgHZxhLqAM1lFg1cbSmqVFhfRrvNbmhq0Y90K1VcfUGtLk4KmKVdUjBLTe6vP4Hy5IqPbdZ/UWJcK9tXI9vcN+t/fPiufz6eUlBQlJycrPT39mKFkMBhs64R0OA7+2kRHHxyPUPLMctVVV+nll19WYWGh+vXrF+pygDPenj179O677+r6668nlAQAAADQoxBMhtAXxTWSDDns7Zv63NrSLG9Tg5Iz+yg8Mlo2m02NtVUq2bFV5UU7dd41UxUe8e2dlwfHM1Swr07Dhg1TMBiU1+uVaZqqqKhQQ0PDUeEk07NxSGxsrMaNG6cFCxbonnvukdPZOTvIA+i4iooKvfnmm7riiis0aNCgUJcDAAAAAB3CVO4QCZhBPTV/q1wOm6LCTy4fLivaoYKlHyl3+Bj1GZzfrmuaWgPyBoJ66pqz5LDb1NLSourqajU0NKh///6y2+0nVRNOb8FgUK+++qr69OmjcePGhboc4IxUV1en6dOna/jw4br44otDXQ4AAAAAdBhtcCFS2ehTo9evyLCTDwBd0bGSpICv9VvO/JfIMLuavH5VNvokSREREUpPT9fAgQMJJfGtbDabJk6cqJUrV6q8vDzU5QBnnObmZs2YMUO5ubm66KKLQl0OAAAAAJwQgskQKa/3yibjhNZnNM2AfN4WeZsaVVm6R1tXHtwZOzE9u933MAxDhgyV13s7PD4gSSkpKRo5cqTmz58vGq+BruPz+TRz5kwlJyfrqquuYp1fAAAAAD0WwWSIlNa2SCf4LFm2e4c+efvPWvLuX7T24/fk93k19MIr5E7u4KYHxtd1ACfo4osvVkNDg9auXRvqUoAzgmma+vvf/y6Hw6EbbriB9X8BAAAA9Gg80YRIbYtPznZuevPvPGmZSh88UoNGXaq+Q0bI4XB2aBr3IU67TXVe/wnVAEhSWFiYrrnmGn388cdqaGgIdTnAac2yLM2bN08NDQ2aOnWqHA72rwMAAADQsxFMhkjAtHSis+/CXZGqbfYpMT1bA/LP06CRF2vrqs+0b8eWDt3HMCS/yRRcnJycnBz1799fH330UahLAU5rixcvVlFRkW6++Wa5XK5QlwMAAAAAJ41gMkQcdkMnuixffX297Da7oqKiJEkJqRlyRUZr386tHbqPZUlOO2uT4eSNHz9ehYWF+uqrr0JdCnBaWrFihdavX69bb71VMTExoS4HAAAAADoFwWSIxEeEyW8GT+jamtpaxbvjdXjLZdAMKODzdeg+fjOoOJfzhGoADhcdHa3LL79cCxYskK+Dv4cAvtnGjRu1ZMkS3XzzzfJ4PKEuBwAAAAA6DcFkiKTFR0gd7JhsbWmSJNXU1MgdH992fP/ur+Rr9SouMaVjN7S+rgPoBPn5+YqPj1dRUVGoSwFOG4WFhZo3b54mT56stLS0UJcDAAAAAJ2KlfNDJCXWpaAsWZYlo52LTX61boXqqw9of1WdYoefq6LactUeKFP5nkK5IqKUkzey3eNbliVLllJiWacMncMwDN1yyy2hLgM4bZSUlOjtt9/WpEmT1K9fv1CXAwAAAACdjmAyRBKjwxTtcqrZZyoqvH1/DSm9+6qm6oCaq8q0a+MqSYYiomOVPWiYsgfnKzwist3jN/tMRbmcSowOO8FXABzN6WRpAOBENTU1yTRNxcbGqqqqSm+88YYuvfRSnX322aEuDQAAAABOCYLJEHHYbRrTz6OFW8vbHUwmZ/ZVQ2tQKbn5ys3NPanxq5p8uvKsFDnszOYHgO5g3rx5WrVqlX74wx/qH//4h/Lz8zV69OhQlwUAAAAApwzBZAgNz3Jr4dYyBcxguwPCmtpa9e7d+6TGDZhBSZbys9wndR/gm9TU1KimpkZ9+/YNdSnAKRcwg6ps9Km83qvS2hbVtvgUMC057IbiI8KUFh+hlFiXEqPDjvl+b1mW1q5dq3379uk//uM/NGXKFI0bNy4ErwQAAAAAug7BZAglx7iUlx6vTaV1ynB/+zRsv8+npsamIza+ORFl9V4Ny3ArOYb1JdH5TNNUeXm5PB6PFi1apPT0dIWHh4e6LOCUqGjw6oviGi0vrFKj1y+bDMmQnHabDEOyLMlvBiVLCspStMupMf08Gp515HtweXm59u/fL5/Pp/DwcK1Zs0YbN25UXl5eCF8dAAAAAJxaBJMhNjEvTV+WN6jB61eM65vX56utrVVUVJScYSe+LmSD1y+n3aZrhvY64XsAx9Pc3KxVq1Zp8+bNGjNmjCZPnhzqkoBToqqxVfM2lGpDSa0kQ56oMCUkRH7jZmaWZanZZ2rh1nIt3FqmvPR4TcxLkyc6XNu3b9eePXvkdDqVmpoqy7K0Y8cOgkkAAAAApzXDsiwr1EWc6VbtqtLrq4qV5YmS3Xb8h9qvtn8lu92mfjk5JzSOGbS0p7pJN4/M0qi+nhMtFzim0tJSrVu3TkVFRfrb3/4mn8+nt956SwMHDpRpmrLb7aEuEThpwaClNUXVmlNQIr8ZVGqs64TW6g2YQZXVe+W023T9sHS99puf6/PlyzV27FiNHz9eI0aMUEJCwil4BQAAAADQfRBMdgPBoKXXVxVrbXGNshIiZTtOOLlq5Srl9M+Rx9PxUDEYtFRc3axzs926eWTWcccATkRtba3mzZunwsJCff755+rVq5euvvpq/eUvf9F7773HVG6cFloDpmat2as1RdVKign/1i739mjw+nWgoVW2qt2aem5vjTp3uGw2NiUDAAAAcGbg6acbsNkMTTk3U0PT41Rc3SwzeHRW7PV65W31Kj4ursP3N78OJYemx2nyiExCSXS6+Ph4OZ1OzZo1S9ddd51ee+01TZkyRcnJydq7d2+oywNOWmvA1IwVxfqiuEZZnqhOCSUlKcblVJYnSlZiX23xeeQ/xvs/AAAAAJyu6JjsRloDpt5eu1erdx/djbN//36VlZXpnHPO6dA9D3XjjOyToMkjMhXuYDotTp21a9dqxIgRampq0iuvvKKKigr96le/UllZWdu6ed+0Bh/QHR3qav+iuEa9v6Gr/WTHoKsdAAAAwJmGjsluJNxh180js3TLqCw1+0ztq2lWwAxKkmpraju0G3fADGpfTbOafaZuGZWlm0dmEUrilAkGD/6ejhgxQoWFhZo2bZpM09TTTz+t559/XqNHj9bq1atlGIZM0wxxtUDHrCmq1pqiamWeolBSOtg53zshUqt3HxwLAAAAAM4EdEx2U0fs+GpJu7Zt0rDBuYp3u497zaEdX6uafJIsDctw65qhveSJZn0/dJ1169Zp586d+s53vqNHHnlEhYWFuu222/SHP/xB7777ruI7ELADoVbZ2KrnF25XZJi906Zvf5MGr1/NPlOPXZHLezcAAACA054j1AXg2DzR4bpjTB9VNHj1ycbd+nJzUHWmUw1VzZIhOe02GYZkWZLfDEqWZMlStMup8YNTdU7veCXHuEL9MnAGys/PV0JCgq644goNHDhQ8+bNkyQtW7ZMTU1NBJPoUeZvKJXfDCrG1TXvpzEup+pa/Jq/cb9uPz+7S8YEAAAAgFAhmOzmkmNcyrbXaeoAh6686iyV13tVWtuiOq9fftOS024ozuVUWnyEUmJdSowOk8PODH2EzquvvqqZM2cqISFBw4YNkyT9+c9/VmNjo8rKylReXq78/PwQVwl8u4oGrzaU1CotLqJd51uWpeJtBdr71Ra1NNYpzBWpXtn9lTNslOyO9ndbpsa6VLCvRlc1pPIBEwAAAIDTGsFkD7Br1y7179dXqXEupca5lJdJxxm6r8jISL3++utKTU3VpZdequLiYu3Zs0dTp07Vpk2btG/fPg0ePFjh4UxTRff2RXGNJKPdH/Z8uWapir/cqJTe/ZR91jA11dWoaFuB6msqNeKya9u98dPB8QytK67R+CG9TvwFAAAAAEA3RzDZzQWDQRUXF+vSSy8NdSlAu0ydOrXt6/nz5ysqKkqbN2/W4sWL5Xa79ZOf/ET79+9XYmKioqKiQlgpcHwBM6jlhVXyRIW16/yGmirt2b5JKb376ZxLrm47HhEdq21rlqq8eKdSs/u3e3xPVJiWFVbpskEpdMEDAAAAOG3xtNPNlZSUyG63KzU1NdSlAB0WERGh+vp6Pfnkk+rfv79uu+02bdy4UbNmzdLSpUslSRUVFaqqqgpxpcCRKht9avT6FRlmb9f5ZUVfybIsZQ3KO+J4xoDBstsdKt21vUPjR4bZ1eT1q7LR16HrAAAAAKAnoWOym9u1a5eys7PbPQUQ6E5sNptiY2OVkpKi9evXq7i4WF6vV/369VNNTY2mTZsmwzB06623yuPxhLpcoE15vVc2Ge1+762rLJdhGIpLTDniuN3uUExCouqqyjs0vmEYMmSovN6r1DjWmQQAAABweqJjspvbvXu3+vbtG+oygBMSDAYlSS+88IIqKytVWFio/Px8XXTRRWpubtbMmTNls9k0ePDgEFcKHKm0tkXqwOdBrS1Ncoa7ZLcf/XmfKzJarS3Nbf8e2s34ug4AAAAAOE3RMdmN+f1+7d27V5MmTQp1KcAJsdlsCgaDCgsL0+OPPy6v1yu73a6XX35Z4eHh+vvf/67S0tK2803TlN3evqmzwKlU2+KTswNrO5qBwBGhpLelRS6XSzIM2b4+HjQDstnat2alJDntNtV5/e0vGgAAAAB6GDomu7E9e/YoOjpabrc71KUAJ8xmO/g2k5SUpMTERD300EPKyMjQT3/6U40ePVoOh0MPP/ywgsGg7Ha7LMsKccWAFDAtdWQFDbvDIdMMSJLKy8u1Zu1a+fwHQ8Xg18dtx+im/CaGIflN/j0AAAAAOH0RTHZju3btUt++fVlfEqeNiIgIZWRkKCIiQpK0evVqrVq1SoFAQI888ogk8fuObsFhN9SRjDw8Ikr+Vq9K9+3VV199pSFDhigs7GB3pLe5UeERkW0hfXtZluS08+8BAAAAwOmLYLIb27Vrl/r06RPqMoBOcWh9vd/+9rfKzc3VJ598or59+6q8vFwvvfSSMjIy2J0b3UZ8RJj8ZvvXhIxLTFFDfb02rf9CZ599dlunu2kG1FBdqVhPcodr8JtBxbmcHb4OAAAAAHoKgsluqqWlRWVlZWx8g9PG4d1iSUlJuueee1T7/7N3n4FRlWkbx/8zkzLpnYRQEkqCoa9SBaSsggioiBQhlN1FRRfFShMWQRFUdNVV4F1RcREFbAhSRBEERKQqRYqQHiAkkEr6zHk/sMmCAlKSzASu36dk5sw510k5ybnnfp4nKws3NzcyMjJ48sknCQoK0lBucQrh/h5wGT+Khrs3mZmZeFtK8ff3L3885dA+bLZSwus1uvwQxn9ziIiIiIiIXKNUmHRS8fHxhISE4O3t7egoIhXKbrcTERHBSy+9xMSJEwkKCiItLY3PP/8c4JzCpIqU4iihvlbsGJf0M5iSksKJU9n8qUM3cjOOsWv9SlJ+3ceBbRs5sH0jQWG1CYuMuqzjG4aBgUGor/VKT0FERERERMTpaVVuJxUfH69h3HJNKuucvOuuu+jcuTP+/v5kZmYyYsQIoqOjCQgI4OTJkzRr1gyTyYRhGJp3UqpcsLcb3lZX8otteLlf+E9lclISSUnJtGjeHC9vbxL3h5Ly6z7SU+Jxs3oSGdOSBi3aXPbPcH6xDS+rK8Hel76Kt4iIiIiISHWjwqSTiouLo3v37o6OIVKp/P39sdvtBAQE8MEHHxAYGMiiRYv45JNP6N69O88884yjI8p1ysVipkODINb8knbBwmRSYiLJKSm0aNEcbx8fAOo1uZF6TW686uOfPF1Mj8ahuFg0sEFERERERK5duuNxQtnZ2WRmZhIREeHoKCKVzmw2k5GRgbu7OzabjX379rF06VKOHj3KkSNHMJlMpKenOzqmXIduiggADEp/uwiOYZCQkEBySgotW7QoL0pWlDPHM7gxIqBC9ysiIiIiIuJsVJh0QvHx8YSHh2O1am4xuT5kZWXx1FNPERYWRlhYGDk5OUyZMoXt27fz4osv8u9//5vs7GxHx5TrTA0fKy1q+XM8p/B/DxoG8QkJHD16lJYtW+JVCfMAH88ppGXtAGr46G+AiIiIiIhc2zSU2wnFxcVpNW65rjRs2JCGDRsSGxtLeno6jz76KIGBgXz00UesW7eO48eP4+Gh1Yml6vVpEc6BtFxyC0vwcXchLi6OtLQTtGzZEk9Pzwo/Xm5hCa4WM72b16zwfYuIiIiIiDgbFSadjGEYxMfHc8899zg6ikiVKFvcZubMmWzcuLG8Y3LEiBHcdtttREdHk5ycTHR0tKOjynUoyNudvi1r8cGPiRxLTuBkRgYtW7bAoxKKkja7QUZeEUPaRBDk7V7h+xcREREREXE2GsrtZDIyMigoKKBOnTqOjiJSJUwmE3b7mTn8OnXqRE5ODsOHD2fcuHE88sgjzJw5k+zsbI4cOQJQvq1IVWkVEYBH3jGOpOXQokXlFCXtdoOkU/m0jgykdWRghe9fRERERETEGakw6WTi4uKoW7cuLi5qZpXrh9n8v0uRm5sbbm5udOjQgS1btnDvvffy6aef8vDDD3Pw4MFzthWpbIZhsHLlCkJPH6bPzc1Jyzew2Y0KPYbNbpB4Kp/mtfwY0KoOZrOpQvcvIiIiIiLirEyGYVTsHZZclUWLFlG7dm06duzo6CgiVa5sWPehQ4dwcXHh2WefZfv27bz//vskJSWxe/dupk6d6uiYcp2w2+0sW7aMpKQkRowYgbunF0u2J7M1/hQhPu74WF2v+hi5hSWk5xbRpl4gA1rVwd3FUgHJRUREREREqge1HjkRu91OQkKCFr6R65bJdKZTLDo6moSEBKKioti1axfjx4/n559/pkuXLo4NKNcNu93O0qVLSUlJ4S9/+Qu+vr64u1gY0iaC2LYR5BfbSMnMp9R2ZVMLlNrspGTmk19sI7ZtBEPaRKgoKSIiIiIi1x0VJp3IsWPHAAgLC3NwEhHH69y5M6tXr2b79u1MnjwZm81G/fr1ee6551i+fDlwpsNSpKLZbDY+++wzjh07xogRI/Dx8Sl/zmw20bZ+EE93b0SzcD+OZheQdCqf00Wlf/jzaBgGp4tKSTqVz9HsAprX8ufp7o1oWz9Iw7dFREREROS6pKHcTmTjxo0cPXqUgQMHOjqKiEPZbDYsFgsHDx7kyy+/5Mknn+TXX3/lnXfeYceOHbi5uTFz5kyaNWvm6KhyjbHZbHz66aecPHmS1ZOjAgAAIABJREFUYcOG4eXlddHtT+QWsjMxk01HTnK6sAQTJjCBq8WMyQSGASU2OxhgYOBtdaVjw2D+VNefGj7WKjorERERERER56TCpBN5//33iYmJoU2bNo6OIuJwZfNNZmZmsnv3bsaNG8fIkSMZOXIks2bNIjMzk+nTpzs6plxDSktL+eSTT8jOzmbo0KF4Xsbq26U2Oxl5xaTlFHI0q4DswhJKbAauFhN+VlfC/T0I9bUS7O2Gi0WDFURERERERAC09LOTKCkpITk5mV69ejk6iohTMJlM5OTk8NFHH7Fu3ToaNWrEyJEjyc7OxsPDg1tuueWc7csKmSJXorS0lMWLF5Ofn8+wYcPw8PC4rNe7WMyE+VkJ87PSoo5/JaUUERERERG5tqgw6SSSk5Px9PQkKCjI0VFEnEZ8fDyHDh1i9uzZjBo1iocffhg/Pz/c3d3ZsGEDX3zxBWFhYTzyyCOOjirVWElJCYsWLaK4uJihQ4ditWqItYiIiIiISFXQUG4nsXbtWnJycujbt6+jo4g4lbJOyLS0NPLy8khOTqZLly68/fbbNGzYkClTpvDmm2/SvHlzR0eVaqi4uJiPPvoIm83GkCFDcHd3d3QkERERERGR64YmunIScXFx1K9f39ExRJyOyWTCZrMRGhpKcHAwW7ZswW638+uvv2I2m3nvvffIzc0FtEq3XB7DMNi0aROGYRAbG6uipIiIiIiISBXTUG4nUFJSQmZmJvXq1XN0FBGnZLFYAPDz86OwsJD777+fgQMHkpubS4MGDWjQoAFA+RyThYWFGo4rf8hkMtG5c2cMw8DFRX8ORUREREREqpqGcjsBu90OgNmsBlaRCzl7cZupU6eSmZnJoEGDaNeu3TnPHz16lNmzZ/Pwww8THh7uyMhSDWjRJBEREREREcdRYVJEqg2bzVbePQlnuo1dXV1/t93DDz9McXEx8+bNq8p4IiIiIiIiInIZ1KInItXG2UXJuLg4Zs2aVd5xDGcWMlm9ejUAvXv3rvJ8IiIiIiIiInLp1DEpItVaZmYmAQEB5OXlMX/+fPbt28fIkSO56aabHB1NRERERERERC5Cs/2LSLW2ZMkSfvnlF2rWrEl+fj7Tp08nMDAQ0PyBci673V4+l29BQQEeHh4OTiQiIiIiInJ9U8ekEyguLsbNzY2SkhJcXFxUSBG5DPn5+bRv35727dszd+5c4NwClMhvvf322xw4cIA777yTzp07OzqOiIiIiIjIdUt37g5WXFzMpEmTGDlyJOPHj6ekpMTRkUSqDZvNhqenJ/Pnz6dRo0blj59d3C+bg9Jms1V5PnEOZ7//tnDhQlavXs2QIUMYM2YMa9euPWeeUhEREREREak6Kkw6SNmN8KefforNZmPAgAHEx8eTl5fHiBEjHBtOpJqwWCwYhsGf/vQnHn/8cXbs2AH8rzD5888/M2PGjPJtVYC6/pQN58/JyeHEiROEhIQwevRo1q5dS9OmTenQoYO6a0VERERERBxEd2MOUnYjXFJSwo033sjevXsZOHAg33zzDWFhYYA6vEQuxdndkdnZ2aSmpgKwatUqxowZw7fffsvEiRMBVIC6DplMJjZs2ECvXr1ITk6mqKiIyZMnk52dzYIFCxg/fjxvvfWWo2OKiIiIiIhcl3SX7mADBgxg7969LF68mJMnT/LNN99wzz33ACqiiFyubt264eHhwYwZM3jrrbeYNWsWa9euZePGjfzyyy+OjicOsmzZMiZPnsxNN91Enz596Nu3LyEhIdx77734+vry97//3dERRURERERErktaldsByhbm2LlzJ6mpqTz33HPcfvvtHDp0iFmzZuHr6wugRXBELtOhQ4f48MMPWbNmDS+//DKtWrVi9+7d3HnnnTRo0KB8JWYtjnNtKxu+/cEHHxAbG0tGRgalpaXlz9933334+PjQvXt3YmJiAC2YJCIiIiIi4ghaldsBym6AhwwZwq233spf/vIXAPbu3YvJZKJJkyYOTihSPT333HOEh4cTGhrKCy+8QGxsLAcOHKBp06aEh4ezY8cOxo0bh9VqdXRUqWBFRUW4u7uf81i/fv2444476NGjB8OGDePNN9/ks88+Iz09nRkzZuDp6QmoKCkiIiIiIuIoKkw60NNPP03fvn25+eabAejVqxcTJkygY8eODk4mUj3l5+eXF5u+/vprgoODSU5OxtXVlUWLFpGVlUVMTAwzZ850cFKpKIZhkJSUxOjRo/noo484ffo0ixYtYsyYMRQUFDBo0CCefPJJXF1d+eqrrzhy5AhvvPEGAQEBjo4uIiIiIiJy3VNh0oFSU1MZOnQoAQEBNG/enJ07d/LFF184OpZItXZ291tcXBzLli3DMAwGDhxIeHg4X3/9NTVr1qRp06blQ36l+rLZbFgsFmbNmkVGRgbPPfccvXv35qGHHuLuu+9m79693HPPPXz55ZdER0f/7nUiIiIiIiLiOJpjsoqVFU0++ugjQkND+fbbb0lMTCQ+Pp4nn3wSQMUSkatw9pDckJAQfH196d+/PykpKXz44YdkZmayefNmpkyZQpcuXfT7Vs2VFRc9PDxYs2YN4eHh/Oc//2HIkCE0adKE/Px8mjdvTmpqanlh0jAMFSVFREREREScgDomHeSzzz7j008/5dSpU9StW5devXrRu3dvzXMmUkF+W3CcPXs2NWrUoEePHmzdupV///vf/Oc///ndvIRS/cyYMYOUlBT+/ve/89BDD/H888/j6urKwoUL2bVrF3PnzqVp06aOjikiIiIiIiK/ocJkFTpfZ1ZRURH/93//x+eff84333yjLh6RCmYYBvHx8YwZM4ZPP/2UgoIC5s2bR2BgYPnCU3Cmm9lkMql70smdfR3Ny8vD29ubadOm0apVK+644w52797NwIEDWbZsGVFRURQWFmK1WtUZKyIiIiIi4oQ0lLuKnH1T/Pbbb5OQkMB9991H06ZNueGGG7j55puxWCxaHVakgplMJurXr4+/vz9PPPEEgYGBNG/enNLSUpYvX47JZCrvVj7fys7iXMquo++99x4bNmzglltuoWPHjrz44ov86U9/okGDBkRGRrJp0yaioqKwWq26roqIiIiIiDgpdUxWoWXLltGtWzcKCgpYunQp69atIzs7m5CQEPr370+vXr10Ay1Swcp+p/Ly8li4cCEdO3Zk/vz57Nmzh6FDhzJ37lxmzJjBzp07OXLkCK+//rqjI8t5nH1t3L17N6NGjeLdd9/l7bffplu3bqSlpfHNN98QFxfH2LFjueeeexycWERERERERP6ICpNV5MSJE4wdO5a0tDSaNWtGhw4d6NmzJyUlJXh4eKgYKVKJzi5qZWdn89RTT/Hiiy8SGBjI119/zWOPPcYtt9zC9OnTCQwMdHBauZDc3Fz+8Y9/MGDAADZt2sTTTz9NRkYGo0aN4p///CcBAQFkZWVRu3ZtAL3RIyIiIiIi4uR0x1ZFatSowfz58xkwYADp6el8/PHHdO3alXvvvZdNmzY5Op7INe3s4tS+fftISUkhMDCQH374gccff5z777+fOXPmqCjpZOx2e/nHWVlZTJ8+nby8PNq2bUt8fDxr167Fz88PT09P9u7di7e3N7Vr16bs/TYVJUVERERERJyb5pisIjabDYvFwoIFC1iwYAG1atUCYMiQIWRnZwPq7hGpCjfffDMtW7akS5cuHDt2jA8++ICoqCiGDRvGfffdR8+ePR0dUTj3epicnExISAi1a9fGYrGQn5/PsGHD2LRpE//85z9p0KDBOd83LXIjIiIiIiJSPWgodxUyDIO33nqLlJQU+vbtS9u2benatSufffYZAQEBjo4ncs0rK3aVlJTwzDPP8MQTT5CQkMC///1vevTowcCBA3+3rTjW888/z5o1axg+fDgmkwmz2Yynpyd9+vTBw8ODtLQ0QkNDAX3PREREREREqhvdwVUhk8nE3/72N4KCgnjhhRcYOHAg/fr1IyAgANWHRSqf2WzGZrPh6urKSy+9RFhYGEuWLOGxxx5j4MCB5Obm8uCDD5KamqoCl4PZ7XZef/11fvjhB1auXEloaCinTp3C1dWVuLg41q9fD5yZJqNse33PREREREREqhcN5a4ChmFgMpmYO3cuu3btonPnzkybNo2YmBjc3NwADT0UqSoWi6X84x07dvD999/z6quvsnPnTqZNm8Ytt9xSPtUCqODlCIZhUFJSQmRkJJ6enhw9epTevXuzb98+2rVrR5MmTYiOjgb+d+3U90hERERERKT60VDuKnL06FHuu+8+5s2bx+7du/nkk09o164do0ePPqdQIiJVa+bMmaxdu5bU1FQ+/PBD/P392bVrFxaLhTvvvNPR8a4LkyZNwmq14uXlxeOPP17+eE5ODqtWrWLTpk389a9/5fHHH2fcuHHl80mWvekjIiIiIiIi1ZNaTKpISkoK4eHh1KtXj379+jFu3Dh++OEHFSVFHKRsxefx48fTvn171q9fT2pqKg888ACFhYW8/vrrzJkzx8Epr307duxg586d/O1vf2P//v28+uqrwJmio6+vL507d6ZmzZqMGzeOYcOGaZEbERERERGRa4iGcleyso6ewMBAevbsyaRJk4iKiuK7776jTZs2wP9W7BaRqlM236TFYmHatGkAfPnllyxYsIDQ0FC6devG22+/zenTp/Hy8nJw2muX3W7n1ltvpWbNmrzyyisMHTqU6OhoevfuDYCPjw8DBgzAw8MDT09PB6cVERERERGRiqSOyUpmMpnIyspixIgRDBs2jL59++Lh4cH06dMZPXo0oLnRRBzl7DcEjhw5wv79+/Hz8wMgNDSUPn36YDabKS4uBv7XZSkVp1GjRmzYsIHVq1fj4+PDtGnTeP311zl06BA7d+5k+fLlREZG0qZNG44fP05WVpajI4uIiIiIiEgF0RyTVWDbtm1Mnz6dnj170r17d0JDQ9X5I+KEJk2ahL+/P0899RTvvvsue/bsISgoiAMHDvDBBx84Ot414XyLCe3cuZOJEyfy1ltv0aBBA+bPn09MTAxt27Yt36aoqAiLxYKLixr9RURERERErhWWZ5999llHh7jWubi44Onpid1uJyEhgY0bN+Ln50eNGjUcHU1EOFMsM5lMdOvWDR8fH5KSkli3bh07d+5kzpw5rFu3joKCApo0aeLoqNXa2dNWHDx4kODgYAzDIDw8HFdXV+bNm0dGRgavvvoqffv2LV8d3TAMXFxc1F0uIiIiIiJyjdFdXiUrLS3l8OHDuLi4kJmZidVqpW7duoSFhTk6moj8l9lspqx5vFmzZuzatYvOnTsTGxtL37598fPzIyYmxsEpqz+LxUJ+fj6DBw9m48aN2Gy28q/7wIEDGTduHGFhYbz55pu0bt26/HVa5EZEREREROTapDFxlaRsuOKKFSuYM2cOI0aMoGbNmuTk5DB8+HA8PDwcHVFEznJ28SsqKopXXnmF5cuXc+LECWrXrk2jRo0cmK76KlsArMzbb79NUFAQI0eOJC4ujpKSEho1asT8+fMZPHgwjRs3Ln8dqCgpIiIiIiJyLVNhspJt2LCBu+++m0GDBgEwatQo8vPzeeKJJ84715qIOF7Xrl3ZsmULCxcu5Omnn6aoqAhXV1dHx6p2zr7Gbd26laZNm+Lr60vbtm3p168ft99+O6tWrWLRokVERkbi5uZW/loVJEVERERERK59KkxWkrKb8aCgIFauXMnJkycJCwsjPT2d+++//5xtRMR5lHX4TZgwgeLiYlxdXVWUvEJl17jXXnuNxYsXc88995CRkcGECRO46667SElJYfXq1RQUFNClSxfHhhUREREREZEqp1W5K1l6ejrp6ekcOXKELVu2kJWVRZ06dYiMjCzvohQRuZacPXx72bJlPPXUU+zevZuSkhJmzZpFTEwMdevW5R//+AdjxoyhT58+Dk4sIiIiIiIijqDCZBWx2+3k5uZy7NgxDh8+jNls5o477nB0LBG5iN/OjyiXLiUlhcDAQHJycpg8eTJNmzZlzJgxrF69mtzcXNq3b4/Vai1fmVtfZxERERERkeuPhnJXguLi4nPmSoMzQxq3bdtGx44diY6OprS01EHpRORSlJaWcvr0afz8/BwdpdpZtmwZEydO5IEHHiApKYlJkyYxduxY3NzcWLduHS1btqR///6Air8iIiIiIiLXM01yWMG2bt3KzJkzufvuu1m7di25ubmUlJQAUFBQgNVqxWw2/65wKSLOJSsri9mzZ5OVleXoKE5v586d5R9nZmby3XffsW7dOurVq8eePXvw9PTkkUce4fPPP6dFixZMnDixfHsVJUVERERERK5fKkxWsJkzZ1KvXj0+/PBDVq5cyZQpU2jZsiX/+c9/NI+aSDUSHBxMkyZNWLFiBZrx4uISExMZPnw4Tz75JJmZmbi6ujJ8+HA+/PBDFi1axLFjxwgLC+Oxxx4jMTGRnJwcR0cWERERERERJ6DCZAUqLS3FYrHQv39/PD09WbVqFbGxsezbt4/NmzeTlJTk6Igichluu+02jh49yi+//OLoKE7HMIzygu0dd9zBkSNHWL9+PfXr12fgwIGcPn2aAQMG8P777zN9+nSCgoLo3r07bdq0ISMjw8HpRURERERExBlo8ZsKlJ+fz65du+jQoQOlpaV89913/PnPf6aoqIhOnTqxefNmXFw0radIdbJnzx6++uorRo8ejdVqdXQcp7N27VoSExNp2bIlM2fOZPDgwdx9993s2bOH1atXk52dzbRp0zCb9T6YiIiIiIiInEuFySqwbNkyVq5cydy5c7Hb7bpBF6lGDMPggw8+ICAggN69ezs6jlN59dVXWblyJQsXLiQ0NJS4uDgeeughYmNj8fPz47bbbsPDwwMAm82GxWJxcGIRERERERFxJipMVpKyL6vJZKKoqIjMzEzCwsJUmBSphjIzM5k9ezZDhw6lbt26jo7jNGbOnEnt2rVxc3MjKSmJm266iYCAAJ599lkeffRRunXrBmjlbRERERERETk/VcgqmM1mA84UJMtuxN944w1q1KgBoKKkSDUUEBBA586dWb58efnv+PXkt+9flZSUANCqVSsMw+DYsWPceeedzJ07l5YtW7Jo0aLyoiRo5W0RERERERE5P1XJKojdbgdg1apVbNu2DaB8sZu7774bs9mslX1FqrH27dtjNpv5/vvvHR2lSp3d7bhjxw5yc3NxdXXl4MGD7Nmzh6FDh/KXv/yFqVOnUr9+fQDNxSkiIiIiIiKXRIXJClLWCfnKK68QERHB0qVLefXVV5k2bRpRUVGAuoZEqjOLxUKfPn3YuHEjp06dcnScKlN23XrxxReZNWsWOTk5ZGRkMHXqVGrWrAnAihUraN26NTNmzHBkVBEREREREalmNMdkBcrIyGDIkCE888wzvPLKK3z++ed07tyZFStW4Ovr6+h4IlIBVq5cSUZGBkOHDr1m32wwDIMff/yRmJgY/Pz8OHXqFL179+bjjz/m9OnTJCYmctttt5VvX1BQoEVuRERERERE5LKpY7KCGIZBcHAwo0aN4vjx48yePZsDBw7g6+uLr69v+VBvEaneunXrRnp6Ort373Z0lEpjMpkwm83cf//9dOzYkbS0NIYMGcL06dP55JNPmD17NkuWLCnfvqwoCagoKSIiIiIiIpfMxdEBrgVlc7Bt3ryZvLw8Dh48SJ06dcjPz+fpp592dDwRqUBWq5WePXvy5ZdfEhUVhaenp6MjVQofHx8KCgrw8vIiJiaGmJgYjh8/TlhYGMOHD6e0tNTREUVERERERKSaU8dkBTCZTBw7dowxY8bg7+9Py5YtWbNmDZGRkXTp0gXQatwi15KYmBjq1KnDmjVrHB2lwpw9q8euXbvYunUrU6ZMoWfPnrz22msAuLi48MgjjxAWFsbgwYMdFVVERERERESuEeqYvEp2ux2z2czatWu58cYb6dOnT/nj48eP5+OPPy7fRkSuDSaTiTvuuIO33nqLFi1aUK9ePUdHumpl82XabDby8/PJyMggKyuLvn378sYbb/Doo49Ss2ZNJk2aRGhoKICubSIiIiIiInJVdEd5lcpuyi0WC7t372bYsGHMmjWL77//nltvvfWcbUTk2uHn50e3bt348ssvq+2w5rO7JO12Ox988AFz5syhQ4cONG/enF27dpGZmcnjjz+O3W6nX79+5UVJwzB0bRMREREREZGrolW5K4hhGCQmJpKfn8+iRYtISkrC1dWVqKgoRo0apVW5Ra5BdrudefPmERUVRdeuXR0d57Kc3e24Y8cOatWqxYEDBzh48CC1atWid+/ePPHEE3h6ejJ+/Hi8vb1/9zoRERERERGRq6Gh3FfBZrNhsVhYt24d+/bt45tvvuHee+9l2rRpHD9+nOPHj7N3714VJUWuUWazmT59+vDuu+/StGlTQkJCHB3pkpUVF+fPn8/cuXPp1asXv/76KyNHjmT9+vWcOHGC5ORk7rvvvvKi5NmvExEREREREbla6pi8CmWrcffs2ZNnn32Wjz/+mD//+c/Y7fbyOehE5Nr31VdfcfToUUaMGFE+V6OzOrvjcePGjdx9993s37+fGjVqMHXqVIqKihgyZAjPPvssw4YNK583V0RERERERKSiqfXlKphMJmw2G1FRUQDs3r2bnj17Mnv2bGrVquXgdCJSVbp27UpWVha7du1ydJQLstvtwJmOxxMnTnDy5Ek6derEnXfeyfPPPw/AX//6V/z9/WnSpAkLFy4sL0rq/SsRERERERGpDBrKfZWKi4sZOHAgL7/8MgkJCUyfPp2QkBBatGjh6GgiUkXc3Nzo1asXn332GdHR0ecMfXYGe/fu5cSJE3Tr1o0ff/yR0aNH065dO+rWrct7773HnXfeyT/+8Q8yMjLKOz5dXP7358HZu0BFRERERESketJQ7it0+PBhGjZsyLRp03jwwQcJDg5mx44dFBQU0KBBA2rXrl0+1FtErg9LlizBYrHQr18/R0c5R2JiIjVr1mTDhg089dRTvP3227Ru3ZrJkyfTuHFjevToQe/evenUqRMvvviio+OKiIiIiIjIdUJDua+AYRj8+uuvdO/eneeff56FCxdy/Phx2rRpw/Hjx8sXu1FRUuT60rNnTw4dOsThw4cdHeUcERERJCUlsXXrVnJzc1m9ejUADz/8MKtXr2bv3r3861//YuvWraSnpzs4rYiIiIiIiFwvLM8+++yzjg5R3ZhMJqKiorjlllsIDQ2luLiYuXPnsmDBArZs2cKDDz7o6Igi4gDu7u5YrVa+/fZbbrrpJiwWi6MjlbNarRQXFxMeHs63336Lt7c3bdq0ISoqirCwMG644Qbsdjvu7u6aI1dERERERESqhIZyXwGbzYbFYmHhwoW0bt2a6OhoAE6cOMGJEydo2rTpOSvfisj1wzAM3n33XerWrcttt93m6DjnyMjIYOPGjWzatImvvvqKL774ggYNGjg6loiIiIiIiFynVDm7AmVdUO+//z7R0dEsWbKEnj178tVXX9G0aVMAFSVFrlMmk4nevXuzdetW0tLSHB3nHMHBwTRr1oymTZsyZcoUFSVFRERERETEoVQ9u0K7d+8mLy+PFStWsGbNGhYvXsySJUscHUtEnEBoaCht27Zl+fLl2O12R8c5R8OGDbn77rvp378/gNPlExERERERkeuHCpNXqHnz5owZM4b8/HzGjh3Lxo0b8fDwAM4M9RaR61vnzp05ffo027dvd3SUcxiGQUBAQPnn6u4WERERERERR9Ed6RUyDIOBAwdSo0YNoqKisNvtPPPMM4BW4xYRcHV1pXfv3qxdu5acnBxHxwEgMTGR0tJSNLWwiIiIiIiIOAMVJq+QyWQiNTWVZ555BpPJRM+ePWnRogWgDiQROaNBgwY0atSIVatWOToKBw8eZOHChSQnJ+vNExEREREREXEKqqBdhrIuo2PHjgFnVuF+7LHHAHVJisj59ejRg4SEBA4ePOiwDPv37+eTTz6hb9++1K9f32E5RERERERERM6mwuRlKCs+jh8/nvT0dIKCgvDw8MAwjPKVukVEzubl5UX37t1ZsWIFRUVFVX78vXv38vnnn9O/f39iYmKq/PgiIiIiIiIiF6LC5CUq65bcu3cvx44dIyQkhJdffpkPP/yQ8ePHa2VbEbmgli1bEhgYyLp16yr9WMnJyeXXo927d7Ns2TIGDBhAdHR0pR9bRERERERE5HK4ODpAdVHWLWk2mwkODmbs2LF07dqVJk2aMHnyZMxmM4ZhaEi3iPyOyWSid+/e/N///R/NmzcnPDz8nOdLbXYy8opJyynkaFYBWQXFlNoMXCwm/D3cCPf3INTXSrC3Gy6WC7+flJWVxfTp0+nUqRMxMTGsXr2aQYMGafi2iIiIiIiIOCWToeVZL4ndbsdsNrNlyxaCg4MxDIO6devy6quvEhISwsiRI7HZbBrSLSIXtH79eg4ePMj999+P2WzmRG4hOxIz+f7ISfIKSzBjAhO4WsyYTGAYUGKzgwF2DLytrnRoEMRNEQHU8LH+bv8bN27krbfeIjMzk4CAAF544QUVJUVERERERMRpqWPyEuTl5fHZZ5+xdetWli5dSkpKSvlzt9xyC23atAFQUVJELqpjx47s3buXrzf8wDHXcH5OzQJMBHm5ERjoedGOa8MwyC+2seaXNNb8cpwWtfzp0yKcIG/38m02btxIcXExhYWF5Ofns337durVq6dObhEREREREXFK6pi8BDabjaysLF588UUOHjxIZGQkQUFBuLi4cOjQIebPn+/oiCJSDdjtBst//IU5K3cQfcMN1An2vejQ7Asptdk5nlOIq8VM35a1aB0ZyKlTJxk2bBjZ2dmEh4fj5uZGWFgYU6ZMwdfXtxLORkREREREROTqqGPyElgsFoKCgmjfvj0DBgygdu3a7Nq1iy1bttCvXz8ADeMWkYsqKrWxeFvQwu8vAAAgAElEQVQy2xKKCAsOIO9EMi41ml7RvlwsZmoHeJJbWMIHPyZyMC2X4v3rSU1N5cYbb+TWW2/lT3/6Ew0bNsTNza2Cz0RERERERESkYqhj8hLl5+fzzjvv4OLiQnBwMNHR0TRu3BhXV1dHRxMRJ1dUamPBD4nsSc2mTqAnhq2UrVu3ERXVkJAaNa5q3za7QdKpfMJci7ilJrRr0woXF73nJCIiIiIiIs7v8scQXqc8PDzo0aMHDRo0ICUlhTfeeIMFCxY4OpaIODm73WDxtmT2pGZTN9ATi9mEi6srDRo24NfDhyktLb2q/VvMJiICPTle4k68OQyzWZ3bIiIiIiIiUj2oMHmJVq5cSXZ2NjfffDOPP/44/v7+eHt7A2dW7BYROZ9tCafYlnCKOoGemM3/W4QmtEYNvL28iY+Lv+pjmM0m6gZ6sjX+zLFEREREREREqgON97sENpuN9PR0kpKS+PrrrzGbzWzevJlHHnkEQCveish5ZeQV8flPqYT4uGMx/+Y6YTIRFR3F9m3bCQ2tga+f31Udy2I2EeLjzuc/pdKwhvc5q3WLiIiIiIiIOCN1TF5E2fSbJpOJmJgYXF1dufnmm2nXrh1jxowhMjKy/HkRkd/68uejlNjs+FjPPxeth4cHERF1OXjoEEYFdF77WF0psdn5cvexq96XiIiIiIiISGXT4jcXYbfbMZvN3H///dSqVYuMjAxSU1N55plnaNWqlaPjiYgTO5FbyIxV+wn388DFcuH3gAy7ne07dlCalYaHm5nskycoyMvFNyCYm/vcd9nHLbXZOZpdwISeMdTwsV7NKYiIiIiIiIhUKnVMXoTZbKagoIBTp07x7LPP8uabb/Liiy/yxhtvkJmZ6eh4IuLEdiRmAqaLFiUBTGYzjaKj2bNlPWnJCXj5+OPicv4Oy0tx5ngmdibqGiUiIiIiIiLOTYXJP7Bt2za+/vprJk+ezP79+/Hy8iIzM5OAgABHRxMRJ1Vqs/P9kZMEebld0va+fn7c3GcIYc060OrWu3C1Xl2nY5CXG5uOnKTUpoW5RERERERExHmpMHkRhmFwyy23sHnzZgIDA3nwwQdp3749Li4uHD58uHwbEZGzZeQVk1dYgqeb5ZJfc0PTZuSfziftxImrPr6nm4XThSVk5BVf9b5EREREREREKosKkxdhMpn46aef2L17N127dmXDhg38/PPP9OrViwceeIBNmzZp4RsR+Z20nELMmC7r+uDi4kLDqIYcPnwY21V2OppMJkyYSMspvKr9iIiIiIiIiFQmFSbPw/7f1XFXrFjB448/jmEYLF68mOHDh/Puu+8ycuRIvv32Wzp27OjgpCLijI5mFcAVvGcREhyMr68vJzMyrj6E6b85RERERERERJyUi6MDOCOz+Uy9dufOnTz22GPcddddACQmJnLy5EkAbDYbFsulD9MUketHVkExrn+w6M15mUxER0WxbdXH5OblXVUGV4uZ7MKSq9qHiIiIiIiISGVSx+RvFBcXs2zZMgC+++47xo4dy8svv8zu3buJiIjgxhtvBFBRUkQuqNRmcKWzPJTabBQWFnDo0KGrymAyQYlNc+CKiIiIiIiI81LH5G/k5eURGRlJbm4uXbt2JT09nS1btvD5559Tq1YtPv74Y0dHFBEn52IxcbnrYhmGQUpyMgkJiQQFBVFUXAKGwZVWOA0DXC2aA1dERERERESclwqTvxEYGEhgYCCnT5+mW7du5OTkkJmZSXJyMrVr1wbOzEFZNtxbROS3/D3cKLmMBWwKCgo4cOAAxcXFtGjRHPuJwyQkJ1NQUICHp+cVZSix2fGzul7Ra0VERERERESqggqTF+Dl5UX79u2x2+3k5ORw5MgR6tatC6CipIhcVLi/B1xKx6RhcPToUY7ExREWFkb9+vXPTBNhNuHu7k5ubu4VFyYx/ptDRERERERExEmpMPkHzGYzfn5+NG/eHFdXdR+JyB8L9bVix8AwDEwXGIpdVFTEwQMHyc/Pp2nTpuSfSiNh304ASouLsQAHdv5Abq1aWL18qNXghks+vmEYGBiE+lor4nREREREREREKoUKk5dg+/btmM1mbrrpJkdHEZFqINjbDW+rK/nFNrzcf3OZNQzS0tL49fBhgoODadykMS4uLvy6dR2n0lLLNzNhJ273VvLTaxIYWuuyCpP5xTa8rK4Ee7tV1CmJiIiIiIiIVDgVJi/BgQMHiI6OdnQMEakmXCxmOjQIYs0vaecUJkuKizl46BA5OTnE3HADQcHB5c+16XHPOfs4ffo0O3fspGOnjhfsuryQk6eL6dE4FBeLpp0QERERERER56W71j9QWlpKUlIS9evXd3QUEalGbooIAAxK/7sITkZ6Olu3bcNkMtG6detzipLn4/nfuSXz8/Mv67hnjmdwY0TAlcQWERERERERqTLqmPwDKSkpWK1Wgv+giCAicrYaPlZa1PLnp6RT5GekcOrkKaKiGlKjRg24hA5Ik8mEt483ubm5eHl5XfJxj+cU0rJ2ADV8NL+kiIiIiIiIODcVJv9AXFwc9erVu+yhlCIiTX2L+PjQAfw8rbRu3Qo3d/fLer2Pjw+5ubmEhYVd0va5hSW4Wsz0bl7zSuKKSDVWWFhIcXGxo2NUObvdjtmsAUAiIiIizsrNzQ2r9cKNMypM/oH4+HgteiMil6W4uJg1a9awZ88eBrbtzLZMKxbXy1+IxsfHh9TU1D/eELDZDTLyihjSJoIg78srgIpI9VZYWEi9evU4fvy4o6NUOXd3d4qKihwdQ0REREQuICwsjPj4+AsWJ1WYvIiioiJSU1O59957HR1FRKqJxMREli5diq+vL6NGjcLPzx9+TGR7YiYRgZ6YzZfefe3j40NeXh6G3Y7pIh1BdrtB0ql8WkcG0joysCJOQ0SqkeLiYo4fP05ycjK+vr6OjlNlUlNTady48XV33iLi/B577DFq1KjBxIkTL/u1mzdv5qGHHuLnn3++4Dbh4eFs2LCBhg0bXk1MEZFKl5OTQ506dSguLlZh8kokJCQQEBCAn5+fo6OIiJMrLS3l22+/Zdu2bXTr1o127dqVTwExsHUdikvt7E7Npm6gJ5ZLLE56enhgNpk5nZ+Pt7f3ebex/bco2byWHwNa1bmswqeIXFt8fX0rpEC3efNmxo0bx549ewBo2LAh06ZNw9PTk0GDBjlNZ2ZOTg5QcectInIxXbp0YcuWLbi4uODu7k7r1q15/fXXadSo0e+2fffdd6/4OLfffjvx8fHln0dGRjJ37lxuv/328sfy8vKueP8iIs5Gk/JcRHx8PPXq1XN0DBFxcoZhsHPnThITE3nggQdo3779OfPSurtYGNo+gtaRASSePE1uYcml7fisBXDOJ7ewhMSTp2kdGcDQ9hG4u1gq4nRE5DqWk5NDr169GDlyJBkZGaSlpfHPf/5ThT8REeC1114jLy+PxMREAgICGDFixDnPG4aBzWZzTDgRkWpKhcmLiIuLo379+o6OISJOzmQy0apVK/72t78REhJy3m3cXSwMaRNBbNsI8ottpGTmU2qz/+G+yxbAOVupzU5KZj75xTZi20YwpI2KkiJSMQ4dOkRJSQnDhw8v7wrq1KkTzZo1o2fPnpw4cQJvb2+8vb3Zv38/AB988AFNmzbF39+fTp06sW/fvvL9RUZGMmPGDJo1a4afnx/9+vUjKyvLUacnIlIhvL29iY2NZc+ePXTp0oWJEyfSpUsXvLy82Lp1KyNGjGD8+PEArF+/nrCwMF5//XVq1qxJUFAQ8+bNY8eOHbRs2RI/Pz+GDh1KaWnpOdsD3HfffSQlJdG3b1+8vb155plngDP/ex44cICtW7cSGBh4zuJnW7duJSAgoHz+3Ytdo0VEnIEKkxeQl5dHenq6OiZF5JKYzeY/XBnWbDbRtn4QT3dvRLNwP45mF5B0Kp/TRaUYhnHe15QVJg3D4HRRKUmn8jmaXUDzWv483b0RbesHafi2iFSY6OhorFYrgwcP5ssvvyQ9PR0APz8/Vq1aRY0aNcjLyyMvL4+YmBiWL1/OpEmT+Oijjzh58iSxsbH06dPnnJvk+fPn88UXX5CSkkJRURGPPvqoo05PRKRC5OTksGDBAm688UbgzHWurJuy7LGzlXWgJyYmMn/+fEaPHs3UqVNZtWoVhw8fZuPGjSxZsuR3r/voo4+oW7cun3/+OXl5eUyfPv2c59u0aUNISAgrV64sf2zhwoX0798fd3f3S7pGi4g4mgqTFxAfH09YWBgeHh6OjiIiTuhChcRLEeTtzogO9ZjQM4YejUMpLLWTfCqf5JP5JJ/K53h2IWk5hRzPLiTH5srx3DNDtotK7dzeJIwJPWMYfnOkVt8WkQrn6+vL5s2b8fT05O9//zthYWF07dqVw4cPn3f7OXPmMG7cOJo1a4bFYuHBBx/EZDKxZcuW8m1Gjx5N/fr18fHxYfr06SxevBi7/Y87xkVEnM0TTzxBQEAAN9xwA0VFRbz//vsADBs2jJYtW2I2m3F3//3/Z2azmalTp+Lm5kafPn1wc3Nj8ODB1KxZk5CQELp3787OnTuvKNOQIUNYuHAhADabjcWLFxMbGwtc2jVaRMTRtPjNBWgYt4icT1lB8uw5JA3DOOfzS1XDx8rtTWtya0woGXnFpOUUcjSrgOzCEkpsBq4WE77WEKxJPzLsxkCaNKiLi0XvJ4lI5YqOjmbevHkA5XPnDh06lBkzZvxu24SEBJ5++mkmTJhQ/lhxcTGpqanln9etW7f844iICIqLi0lPTyc0NLQSz0JEpOK9+uqrjBo16nePR0REXPR1gYGBuLq6ln/u6elZPly77PMrXdAmNjaWpk2bkpOTww8//IDVaqVTp07ApV2jRUQcTYXJ8zAMg/j4eHr37u3oKCLiZMoKkPv37+fLL79k8ODB1KpV66r26WIxE+ZnJczPSos6/r97/uRPAZTkZOBiibyq44iIXK6IiAgeeeQR7rvvvvO+AVO3bl3Gjh37uwUgzpaUlHTOx66urhecj1dEpDq6kjeoK2rf9evXp2XLlnzyySesW7eOwYMHl7/mUq7RIiKOptab88jMzCQ3N/ecd/hF5Pr129UVly9fzkMPPYTVamX27Nls3769Uo8fHh7O0aNHK/UYIiIABw4c4OWXXyYpKQnDMDhx4gTz5s2jffv2hIaGkpmZSWZmZvn2Dz30EDNnzuTnn3/GMAzy8vJYvnz5OYt2zZ49m/j4eHJzc5k0aRIDBw78wzl5RUTkjNDQUI4cOXLRbWJjY5k3bx5Lly4tH8YNl3aNFhFxNP1XeB7x8fHUqVMHNzc3R0cRESdgsZxZ8Xrx4sUcOHAAq9XKpEmTiIyMZOnSpeTm5lbqfGkqTIpIVfHx8WH79u3cfPPNeHt707JlS7y9vXn//fe54YYbiI2NpWHDhvj7+7N//37uuusupkyZwvDhw/H39ycqKooFCxacs89hw4Zx5513Urt2bSwWC6+//rqDzk5EpPqZMGECL730Ev7+/kyePPm82wwcOJDt27cTFRVF48aNyx+/lGu0iIijmYyrWcHhGvXxxx9To0YNOnfu7OgoIuIE0tPT6d+/P82aNeOll15iwYIFLFmyhHr16vHqq69y8uRJAgMD8fX1rZTjZ2dn8/rrrzNhwoRz5icSEYEzq8P6+fmRnZ1dadehKxUZGcncuXO5/fbbK3zfKSkp1KlTxynPW0REREQu7f9UzTH5G2XzS7Zr187RUUTEAUpLS3FxOXNp3LNnD7Vr1yYnJ4cePXqUTxzeq1cvDh06RIcOHZg2bRpbt27lnXfeqbQbY19fXzw8PEhLS6N27dqVcgwRqf5SU1PJyclxdIxz2Gw2MjIySElJqfB9Hzt2DHDO8xYRERERLmnqCBUmfyMtLY3S0lLCw8MdHUVEqsjp06eZOXMmzz33HC4uLuTn5+Pp6cnSpUvJy8vjkUce4bvvvisvTNaqVYv777+fQ4cOER0dzcsvv1yp+UwmE+Hh4aSmpqowKSK/Y7fbcXd3P2f4njMZOnRope3bzc3Nac9bRERERMDd3f2iU5+pMPkbcXFxREZGls8pJyLXttLSUry8vEhLS+Odd97hrrvuYuLEibz11ltMmDCBe+65B1dXV9q1a8eMGTOw2Wzs3buXSZMm0adPn/L92Gy2Sr1uaJ5JEbkQs9lMUVERycnJ19WQ5tTUVBo3bnzdnbdUnClTpnDixAnmzJnj6Ci/89hjj1GjRg0mTpx42a/dvHkzDz30ED///PMFtwkPD2fDhg00bNjwamJesUOHDtG6dWuys7MdcnxHOXLkCH/5y184cuQIY8aMYezYsY6O9Dtt27Zl5syZdO3atVKPU1hYSGhoKLt37yYiIqJSj1UdRUVF8e6779KpU6dK2f8jjzzCsmXLCA4OZseOHZVyDBE4M5S7Tp06F134UIXJ34iPj6dBgwaOjiEiVaRs2HazZs2YP38+I0aMICQkhKVLl9K/f38GDhzIQw89xGeffcb69etZsWIFL7zwAvXr1z9nP5X9ZkZ4eDi//PJLpR5DRKo3X1/fal+gy8rK4oEHHmDVqlX4+PgwduxYHnvssfNuWzZ8+1o4b3EMd3d3XF1dMZvN3HHHHfzyyy8UFxdTv359pk6dyl133eWwbO++++4Vv/b2228nPj6+/PPzzfWal5d3Vfmulre3N8Al/e6uX7+eQYMGcfz48as+bpcuXRg0aBCjRo266n1didmzZ9OhQwd++uknhxz/t8739di/f3+VHLtsoVkfH58//DlISEigXr16FBQUYLVaKzXX5fyMVObPk8lkwsvL65J+R0wmE/v37+eGG264pH1///33fPXVVyQlJeHj43O1UStFXl4eU6dO5ZNPPuHEiROEhITw5z//mcmTJxMZGenoeFLBtCr3WWw2W/lFT0SuTXa7nbPX/CotLaVfv34kJCQQEBDA5MmT+fvf/87q1atJT0+nRYsW/PDDD/z000906dKFl19+mfr161PV64aFh4eTkZFBcXFxlR5XRKQqjR49mqKiIlJTU/nqq6944YUXWLVqlaNjyTXO3d2duXPnkpaWRk5ODnPmzCE2NpbU1NQqz2IYBjabrcqPK2f89v/EihYfH0+zZs2u6LWlpaUVnEauV/Hx8URGRl6wKOnon7Xi4mJuvfVWduzYwbJly8jJyWHXrl20aNGCb775xqHZpJIYUi4hIcF46aWXDLvd7ugoIlLJ9u/fb/z6669GTk6O8eijjxqGYRiFhYVGx44djWPHjhmfffaZMWLECKNbt27GV199dc5rbTabIyIbs2bNMhISEhxybBFxXtnZ2QZgZGdnOzrKVcnLyzPc3NyMPXv2lD82ceJE49577z3v9snJydfEeUvV+fnnn41WrVoZ3t7exh133GE88MADxvDhw8/Zxm63Gz/88IPh7u5ufP/994ZhGMZ7771ntG3b1hg3bpwREBBg1KpVy1i+fLmxevVqIzo62vDz8zOefPLJCx53ypQpRt++fY0hQ4YY3t7eRuPGjY3169eXP9+5c2djwoQJRufOnQ0PDw9j8+bNxvDhw41x48YZhmEY69atM0JDQ43XXnvNCAsLMwIDA423337b2L59u9GiRQvD19fXiI2NNUpKSs7Z3jAMY9CgQYbJZDKsVqvh5eVlTJw40TAMwwCM/fv3Gz/++KMREBBgFBUVlef58ccfDX9/f6OwsNAwDMNYsGCB0aRJE8PPz8/o2LGjsXfv3vJtX375ZaN27dqGt7e3Ua9ePeOjjz4679egoKDA+Nvf/mYEBAQYDRs2NP71r38ZZ9+Kvv/++0bjxo3L9/Pmm28ahmEYWVlZhtVqNUwmk+Hl5WV4eXkZv/zyixEXF2d069bNCAwMNIKCgoxBgwYZp06dush33zDGjh1rmM1mw93d3fDy8jJiY2MNwzCMiIgI48UXXzRuvPFGw93d3Th69OgF85z99X3jjTeMsLAwIyQkxHjppZfKn9+2bZvRpk0bw8fHxwgODjYGDx5sGIZhdOrU6Zzjb9261cjOzjb++te/GqGhoUatWrWMxx9/vPzrHh8fbwDGe++9Z0RGRhoxMTGX/bOQnZ1t9O7d2wgJCTH8/f2Nnj17GomJiX/49Vi1apVhGIZR9P/snXdUVNf2x78zMEObYQaG3qVYUBRRwYYlKihoUBNFESUmEuVFIzFRQY1AjF0TS6yxYcESXuydPFRUEEvEWLDwpAhIExgQmIGZ8/uDH/dx6aiRgOezFms5p+6zz7nXe/c9Z2+ZjHz33XfE1NSUGBgYED8/P1JQUMCMFQDZtm0b6dChA9HW1iaTJk1iraXqKBQKEhQURPT09Ii5uTnZsWMHAUCeP39OCCHkzJkzpHv37kQoFBIzMzOyaNEipq6xsTEBwKyBs2fPNjg2QgjZu3cvsbGxIQKBgJiZmZGffvqJyTt37hzp0aMHEYlEpHv37uTKlSsN6qQ56+nx48dk6NChRCwWEzs7O7Jjx45626jOTz/9RExMTIi+vj5Zs2YNMTQ0JNHR0YSQyjXVp08fIhKJiKGhIQkICGDWSZ8+fQgAoqmpSbS0tMi2bdsIIZXXvpGREdHW1ib9+/cn9+7dI4QQsm3bNqKmpka4XC7R0tIis2fPZtbVzz//TExMTIi7u3uDbRBCiJ+fH5kxYwbx8vIiWlpaxNHRkTx79oysXLmSGBoaEiMjI3Lo0CGmvEwmI8HBwcTKyopIJBIyceLEeq/ZnTt3En19fSKVSuvVV2ZmJhk7diyRSCTEysqKLF26lHlHe9N7ZlPLE0LInj17SIcOHYhIJCKDBw8mjx49YvIGDhxIFi1aRAYNGkQEAgHp27fvB/0O15TnVGqYrEZ0dDT57bffWloMCoXyjqluSHz9+jWZO3cuGTJkCPHz8yPXrl0jH330EYmLiyOEEDJnzhwyZswYQggh2dnZLSJvfURERJDr16+3tBgUCuUfRlsxTN65c4eoqqqy0o4cOUI6duxYZ3lqmKQ0B7lczry8yuVycv78eaKhocEyTPbv35/w+XwCgAwbNoxUVFQQQioNk6qqqmTz5s2kvLycbNiwgejp6RFvb29SUFBAkpKSiFAoZAyZNQkJCSEqKipk7969pLy8nOzZs4eIRCLmpXzgwIHE2NiY/Pnnn0ShUJCysrJahkkVFRUSHBxMZDIZOXHiBFFTUyOjRo0iGRkZJDs7m1haWpIDBw4w5asMk4SwDU1VVBkmCSGkffv25OjRo0ze119/Tfz9/QkhhJw4cYJYWlqSe/fukYqKCrJ161bSrl07IpPJSGJiItHQ0CCJiYmEEEIyMjLIgwcP6tTBggULiLOzM8nKyiJZWVmkd+/eLMPk6dOnydOnT4lSqSRXrlwhGhoaJD4+vs7xEEJIUlISOX/+PCkrKyM5OTlk4MCB5Kuvvqqz7+oMHDiQbNmyhZVmaWlJOnfuTJKSkohcLifl5eWNyqOiokLmzp1LZDIZuXHjBuHxeOTp06eEEEJ69+5NfvzxR6JQKEhpaSm5evVqvf37+fkRd3d3kp+fTzIzM4mzszNjPK4yTHp7e5PCwkJSUlLS7LWQn59PfvvtN/L69WtSVFREvL29iaenZ6P6qFovISEhpHv37iQ9PZ0UFBSQUaNGMYZWQghzreTm5pKsrCxibW1Ndu7cWafut2/fTmxsbEhSUhKRSqXEy8uLZZi8fPkySUhIIAqFgty7d48YGhoy7+ZVuigtLWXaa2hsxcXFRFVVlTE45uXlkTt37hBCCLl79y7R1dUlMTExRKFQkJMnTxJdXV2Sk5NTr07qo2ZZuVxO7OzsyPfff09kMhm5efMmkUgk5OLFiw22c+HCBaKrq0tu375NSktLyfTp04mKigpjmLxz5w65du0aKS8vJ8nJycTe3p6sXr2aNQ/VDWOEELJr1y5SWFhIysrKyJw5c0jnzp2ZvKqPLVVUrauvv/6alJaWkpKSkkbb8PPzI2KxmMTGxhK5XE7Gjx9P2rVrR0JDQ4lcLidHjhwhIpGIaWvOnDnE3d2dZGVlkZKSEuLn58daS9Xx9vYmkydPblBngwYNIlOnTiWvX78mT58+JTY2NmT79u2s8TTnntnc8lVjl8lk5McffyTW1taMsXjgwIHEwsKC/PXXX0Qmk5FPPvmk0fG0ZahhsglUVFSQ//73v6SkpITs3LmT3L59u6VFolAofwPl5eVk06ZNJCYmhnz++efk9evX5NKlS+TAgQNkyZIlxNfXl/z0009kwoQJZMmSJUShUDC7p6teTFqaS5cukcjIyJYWg0Kh/MNoK4bJK1euEIlEwkq7cOECMTU1rbM8NUxSmsPly5eJvr4+62Pl6NGja+2YlMlk5NixY2TNmjVM2u7du4mlpSXzOy8vjwAgsbGxTNqwYcPIxo0b6+w7JCSEODk5sdIcHR3Jvn37CCGVL7FVRsgqahomeTwekcvlTL5QKGTtTvT392d2bTbXMBkWFsbsTK6oqCCGhobk8uXLhBBCRowYQTZv3syqa21tTS5fvkyePXtG1NXVSWRkJGN8qI927dqREydOML+PHTtGGjq8N3r0aGYO6jJM1uTYsWOkS5cuDZYhpH5DXGOGqJry8Hg81s5ABwcHxog2YMAA4u/vT9LS0hrsv6KigvD5fHL37l0m/+zZs8TMzIwQ8j9jXHWDU3PXQk3u3r1LBAJBo/qoWi82Njbk2LFjTN6jR4+IiooKM3YAjPGMEEJmzZpVr4F48ODBZMOGDSxZqhsmaxIYGEhmzpzJ0kV1w2RDYysuLiYaGhpk69attf6PCAgIIPPmzWOlffTRRyQ8PJwQ8naGyZiYGKKnp8d6d5g/f36DOy8JIWTq1Klkzpw5zO/8/HzC4XBYuq3OunXryMiRI5nfdRkmq1NQUEAAkNzcXEJI3YZJLpdLXr9+3eQ2/Pz8WPfPf//730RLS4s19qpTEEqlkmhqapKHDx8yeRfN574AACAASURBVKmpqYTH49X5njV06NBa98TqpKWlEQ6Hw9pxuWXLFtK/f39mPM29Zzan/BdffEECAwOZPIVCQYyNjUlUVBQhpHJdhISEsHRT3aj7odGU59QP3sdkfn4+li9fjoCAABw8eBDJyclITk5uabEoFMpboFQqWb9fvXqFefPm4dKlS7C1tYWtrS3S09MxcOBA3L17F5988gm+//57yOVyhIaGYtGiReByueBwOAD+/sA2TYVG5qZQKG0ZgUDABLSporCw8B/rmJ/SusjIyICpqSkrKmhdkYD5fD68vLxw5swZnDhxgkk3MjJi/q2pqVlnWkMBZSwsLFi/LS0tWT4sG4tKrKurCx6Px+qvOf03hK+vL06fPg2pVIqoqCioq6szkYCTk5Mxd+5ciMVi5i8zMxPp6emwsbFBeHg4NmzYAENDQ3h6eiIxMbHOPjIyMlg6qDnes2fPonfv3tDV1YVYLMbp06eRm5tbr8xZWVmYMGECTE1Noa2tjUmTJjVYvjGaK4+uri4TvAVg63/Xrl0oKSmBk5MTOnfuXG8goyrf4dUDeVhZWSEzM5Pl57KmbM1ZCyUlJZg+fTosLS2hra0NV1dXFBcXQyaTNUkv6enpteRTKBSsQERNXYeNrYEbN25g8ODB0NfXh0gkwpYtWxqc04bGpqWlhRMnTuDo0aMwNzfHwIEDERsbC6ByTW/cuJG1puPi4t7JM3Z6ejrMzMxY7w5WVlaN+qutqRuxWMwKevPkyROMHDkSRkZG0NbWRnBwcIO6USgUCAoKgo2NDbS1tRldN1RHT0+Pubc1tY2ac6+np8cau4aGBoqLi5GTk4OSkhL06dOH0bmDgwO4XG6dQa0kEkmD85Geng6RSAQdHR0mraaem3vPbE75mtcFl8uFhYUFq/93dX/+UPjgDZO6urpQVVWFUqnE69evsX//fqxbtw6lpaUtLRqFQnlDql46Lly4gN9//x1lZWWwt7eHqakpjIyMIBQKcfLkSfzxxx94+PAhUlJS0L59e8yfPx8dOnQAIaSWcfOfgImJCfLy8lBWVtbSolAoFMo7p3379uBwOHjw4AGTdvfuXXTp0qUFpaK0FUxMTJCens76/z01NbXe8hUVFUhKSnpn/dfsKzU1Faampszvqo+hfweNtW1tbQ1HR0dERkZi//798PHxYepYWFjgl19+QUFBAfNXUlKCiRMnAgDGjx+Py5cv4+XLl7CxsYG/v3+dfZiYmLB0UP3fMpkMn3zyCQIDA5GVlYWCggJ4enoyxrm65F+wYAGUSiXu3bsHqVSKAwcONCloTX26qJ7emDyNYWNjg/379yMrKwubNm3CjBkz8OzZs1rl9PT0wOfzWZtikpOTYWxszJLnbdbG2rVr8fDhQ8TFxUEqlSImJgYAGtRtdUxNTWvJx+VyWUaXptLQGgAAHx8feHp6IjU1FYWFhQgICGhQzsbGNnToUCaYpaenJ8aPHw+gck3PmzePtaZfv36NoKCgevuqj5plTU1N8eLFC1YAq+TkZNa1Xhc1dVNQUMD6UBcQEABbW1s8efIEUqkUy5cvb3A9RkRE4Pfff8fFixdRWFiIlJQUAGiwTs2xvEkb9aGnpwcNDQ3cvXuXpfeysrI6dePm5obz58/Xa8wzNTVFYWEhCgoKmLSm6PldUfO6UCqVSEtLe2/9t0U+eMMkl8uFra0tcnNzYWBgAC0tLUydOhUaGhotLRqFQnlDCgoKMGPGDOzevRu3b9/G5s2bYWpqChsbG9y+fRv+/v7o1KkTNm3aBB8fHwwfPpypSwgBh8Nh7aj4p6ClpQWRSITMzMyWFoVCoVDeOVpaWvj000+xcOFCFBUV4f79+9ixYwc+//zzlhaN0gbo06cPNDQ0sGrVKpSXlyMqKgrnzp0DANy6dQvR0dGQyWSQy+XYuXMnYmNjMWjQoHfWf0JCAiIiIlBRUYF9+/YhKSkJHh4e76z9hjA0NGzUyOrr64sdO3bg2LFj8PX1ZdIDAgKwYsUKJCQkgBCC4uJinDx5EkVFRXj8+DGioqJQVlYGNTU1CASCek+ZeHt7Y9myZcjJyUFOTg5WrlzJ5MnlcshkMujr60NVVRUXLlzAhQsXWPLn5+cjPz+fSSsqKmKeizIyMrBmzZp3povG5GmMvXv3Ijs7GxwOB2KxGBwOp069qKioYMKECQgODkZBQQGysrIQFhaGyZMnN7mvxigqKoKGhgbEYjHy8/OxZMkSVn5j+pg0aRKWLFmCzMxMSKVSBAUFwdvbm7VbtKl4e3tjw4YNeP78OYqKihAaGlpLVh0dHWhoaODWrVuIiIhg8vT19cHlclmyNjS2rKwsHDt2DMXFxeDxeBAKhcwcfPnll9i+fTuuXr0KpVKJ0tJSREdH48WLF03SSXVqlnVxcYFYLMaSJUsgl8tx584d7Ny5s9E59fb2Rnh4OO7evYuysjIsWLCA9S5SVFQEbW1tCIVCPHnyBFu3bm1QjqKiIqipqUEikaC0tBSLFi1q0niq8y7aqILL5WL69On45ptvmPeY7OxsHD9+vM7yvr6+sLKywujRo/HgwQMoFAoUFhZi06ZN2LlzJ8zMzODq6orvvvsOpaWlSEpKwtq1a9/ptdMQkyZNwp49exAfH4/y8nKsXLkSfD4f/fv3fy/9t0X+eW/eLUDHjh2RlZUFuVyOL774At26dWtpkSgUShPIyMhg/hOuvgOirKwMN2/exKxZs7B06VLY2NggPj4ednZ2+P3336FUKjFixAj8/vvv8PHxAdD0L8ctDT3OTaFQ2jKbNm0Cj8eDsbExhg0bhqCgIIwYMaKlxaK0AXg8Ho4fP47IyEjo6Ojg559/Zl5iy8vL8e2330JPTw+Ghob49ddfERkZie7du79RXzExMRAIBKy0jz/+GGfOnIGOjg6WL1+Oo0ePQldX963H1RSCg4OxatUqiMVifP/993WW8fb2xq1bt2BnZwd7e3sm3cvLCyEhIfDz84NYLIadnR327dsHoHJn4cKFC6Gvrw+JRIK4uDjGYFJTB4sXL0anTp3Qvn179O3bl9lxCQBCoRAbNmyAj48PdHR0sG/fPowaNYrJ79ixI3x9fWFrawuxWIxHjx4hJCQE9+7dg1gshoeHB0aPHt0kXcyePRsnTpyAjo4O/Pz86izTmDyNcfHiRTg4OEAgEGDcuHHYvHkz2rVrV2fZqmPwHTp0QPfu3eHs7IzFixc3ua/GCAwMhFwuh76+PlxcXDBs2DBWfmP6WLBgAQYOHIiePXvC1tYW2tra2Lx5c5P6Tk1NhUAgYHYCTps2DWPGjIGzszPs7e3h6enJKr9582b88MMPEAqFCA0Nxbhx45g8TU1NLFq0CIMHD4ZYLMb58+cbHJtSqcS6detgZmYGsViMPXv2MIZOJycnhIeHY+7cuZBIJLC0tMTatWuZd4mmrJH69Mfj8XDy5EnExMTAwMAA3t7eWLp0Kdzc3Bpsx93dHcHBwfD09ISFhQVsbGygp6fH5K9ZswZHjhyBUCjEF198wdINAISFhWHatGkQi8X49ddfMWXKFFhbW8PU1BSdOnVCr169Guy/Lt5FG9VZsWIFunXrBldXVwiFQvTt2xfx8fFMvkAgYHa98vl8REVFwdHREZ6entDW1kbXrl1x9+5dZp4PHjyIvLw8mJmZ4aOPPsKUKVMwbdq0t5KxqQwePJgxhOrr6+PChQs4ffo01NTU3kv/bREOeZO9uG2Mmzdv4vPPP0dwcDAmTpz4jzdMUCiUyuMF8+bNw9ChQ/Hrr7+Cx+Mxux2Byv+s4uLisH79esTExOD58+dwdXXFo0ePMHToUPB4PHA4HCiVyn/k7sj6iImJwcuXL2s9kFAolA8XqVQKkUiEwsJClk+qts6LFy9gbm7+wY2b0voIDQ1FYmIiDh061NKiUCgUCoXyXmnKc6rqe5apRahQKJFbLEeWtAwZBaUoKJWjQkGgqsKBWIMPbaExfP1n4tNx46lRkkJpJdjZ2eHq1as4fvw4Dhw4gM8++4yVP2rUKBw+fBghISG4ceMGRowYgXbt2tX6Yt2ajJJApU+TO3futLQYFAqFQqFQKBQKhUKhvDVt2jCZXVSG2yn5uJaUh+KycnDBATgAT4ULDgcgBChXKAECKA2d8cOZRPSzkaCHpQ4MhOotLT6FQqmDql2RPXr0AJfLxcCBA7Fp0yZ4eHjAwMCAKScQCPDNN99g2bJl+O6772odXWmtGBsbIz8/HyUlJazIeRQKhZKenl4rqnVbpspP1Yc2bkrrQyqVoqSkhPFhR/n7SE9Px0cffVRn3t69e+Hi4vKeJaK0Zt7legoKCsLRo0drpTs7OzPuESiUtkhRUVGjZdrkUe68YhlOJmQgIb0AAAcSLT40+SoN7oYkhKBErkDeazkAgm6mYozqZgKJgPoJoFBaGoVCUa8zdYVCgV9++QWEEAQGBgIAoqOjoaGhAQcHB6xZswbW1taYPHlyg+20JjZs2ABPT0/Y2Ni0tCgUCuUfQEFBAYyMjCCTyVpalPcOn8+HXC5vaTEoFAqFQqFQKPWgpqaGly9fQiwW15nfpgyTSiXBzeRXOHo3HeUKJYy01aGq0vxjmhUKJV5Ky8BT4WKMoyl6WemCy6VHvCmUlqSwsBAXLlyAq6srjIyMWHklJSWYM2cO9PX1MWnSJOjq6kIikUBFRQUJCQlISEjA5MmT24yrhsjISBgaGsLV1bWlRaFQKP8Aqnz3pKWlfVC+FtPT02Fvb//BjZtC+dAICQlBdnY2tmzZ0tKi1CIwMBAGBgZYsGBBs+tev34dAQEBSEhIqLeMiYkJrly5Altb27cR828lJiYGn3/+OZ4+fdrSojSZsrIyGBoa4t69e7C0tHyrefyQuHnzJgICApCRkYE1a9bg3//+N7y8vDBlyhQcOHAAu3btwh9//NHSYlL+YUil0kZ9greZo9yyCgUO30zDzeRX0BeqQaj+5kexVVW4MNPRRFFZOfbfSMHjrCJ49zKHmmrr32lFobQGagakuXTpEmbMmIEFCxawItRVcefOHVy4cAGDBg2Cnp4eq0y3bt3QrVu39yL3+8LExARpaWktLQaFQvmHoa2t3aoNdL/88gv27NmDv/76C2PGjGk0UEjV8e3WPm4KhdIwampq4PF44HK58PDwwMOHDyGXy2FtbY2wsDB4eXm1mGy7du1647rDhw/H8+fPmd9WVlbYunUrhg8fzqQVFxe/lXzvAy0tLXA4nFZ1H+bz+QAqI7Bra2u/1Tx+SKxYsQL+/v6YO3cuAGDGjBlMnoaGBlRUVN7ZOrhz5w5CQ0Nx9epVKJVKWFlZwc/PD19//XWbOAFHYdO6oj7Ug6xCgX2xKbidkg9LiRaE6rx30q5QnQdLiRZupeRjX2wKZBWKd9IuhUJpmCqj5M2bNwEAubm5CAoKQteuXXH16lU8evQIAJgvs0qlEpGRkdi1a1edhsu2homJCTIyMlpaDAqFQnmnmJiYYNGiRfD3929pUSgUyj8QNTU1bN26FVlZWZBKpdiyZQt8fX2Rnp7+3mUhhEChoO+GlPfL37nuKioqGi3z/PlzODg4/O1937hxAwMGDICzszMePXqEgoICRERE4MaNG03yV0hpfbR6w6RSSXD4Zhr+Si+Eha4mVN7xkWsVLgeWupq4l16II7fSoFS2mZPvFMo/BkIIqnuVuH//Pr744gt8++232Lx5Mx4/fgxDQ0Ps2rULpaWlCAkJAQD8+9//Rl5eHgYMGAAnJycQQqBUKltqGO8NY2NjSKXSVvEVnUKhUJrK2LFjMXr06A/iAxOFQqmfe/fuoVevXhAKhfD09ER+fj4AgMfjwd7eHioqKkwwxPLycqSkpAAA9uzZg969eyMoKAi6urowMzPDqVOncP78eXTo0AFisRjfffddvf2GhoZi7Nix8PX1hVAoROfOnXH58mUmf9CgQViwYAEGDRoELS0txMfH47PPPkNQUBCAyhM+RkZGWL9+PYyNjSGRSLBjxw7cvn0bjo6OEIlEmDx5MmOEqSoPABMnTkRqairGjBkDgUCAhQsXAgA4HA4SExMRHx8PXV1dlk/d+Ph46OjoMP6F9+/fjy5dukAsFsPV1RUPHjxgyq5Zswbm5uYQCoWwtraud0f6rVu34OLiAm1tbcZFUhWPHz+Gh4cHczpp5syZrLobN26EsbExDAwMsHr1aiadEIKffvoJ7du3h66uLjw8PFiBoDgcDrZu3YqOHTtCS0sLM2bMQE5ODjw9PSEUCtG3b19W+Tlz5sDCwgJCoRBOTk6sOaoPpVKJ4OBg6Ovrw8LCAgcOHGDlV5/HvLw8eHl5QUdHBzo6OnBxcUFubi6AStdSM2bMgJmZGUQiEVxdXVFaWsrMR58+fSASieDg4IBTp04BqAzUVuVfr4qqtKogbufPn0fPnj0hFovh5OSEmJgYpmzNdRcXF8ead6DSkCeRSFjro2p9rV69GgYGBjAzM8P69euZ/Kr1/vnnn0MsFmP16tWQSqX44osvYGRkBDMzM8yZM4fpx8rKCv/973+ZNZqdnY1BgwZh69atder86dOnGDFiBPT09GBjY4PNmzc32Hd15s6di8mTJ2PRokUwNDQEANjb2+PQoUOMj8KzZ8+iW7duEIlEcHFxQWxsLGs+AwICMHr0aAgEAnTv3h1JSUlYtWoVjIyMYGxsjMOHD79x+Yb0lJycDA6Hg3379qFdu3bQ0dHBN998U6eOKP+j1Rsmbya/ws3kVzDX1fzb/EByuRxY6Goi/nllXxQK5d1R9WDJ4XCYL4C7d+9GQUEBrly5gk6dOuHly5cYMWIE1q1bh9u3b8PMzAxAZXQ7iUTCtMXhcFhHwNsqampqkEgkdNckhUKhUCiUNkV5eTm8vLwwZswYvHr1CrNnz64VsdjV1RXq6uro06cPBgwYwIqMfPv2bVhaWiI7Oxvz58/H1KlTsXv3bsTHx+POnTvYvn07rl+/Xm//J06cgLu7O/Lz8zFv3jx4eXkxhlGg0vi5bt06FBcXw8nJqVb93NxcZGVlISUlBXv27MHMmTMRFhaGs2fP4tmzZ4iJicGRI0dq1Tt48CAsLCxw9OhRFBcXY+nSpax8Z2dn6Ovr48yZM0zagQMHMG7cOKipqeHkyZNYtGgRDh48iLy8PPj6+mLUqFGQy+V4/PgxFi9ejKioKBQVFeHatWvo2rVrneOfNWsWPv74YxQUFCAtLQ3/+te/AFQeKR86dCj69euHtLQ0pKWlYcKECaxxp6WlISUlBadOncLChQvx7NkzAJVuOg4cOIALFy4gKysLTk5OrLoAcPz4ccTGxuLhw4c4cuQI3N3dERoailevXsHAwAA//PADU7ZHjx64c+cO8vPzMWXKFIwbNw4lJSX1zikA7Ny5E7/99htu3LiBBw8e4OTJk/WWXbNmDZRKJdLT05Gbm4vNmzdD/f/dxPn5+SErKwt//vknXr16hRUrVoDL5SI/Px/Dhw/H559/jry8PKxduxbe3t549OgRjI2NMXDgQJYx+NChQxg4cCCMjY2RkJAAHx8frFu3Dq9evcIPP/yA0aNHM8ZQgL3uevbsCXNzc9YY9u7di4kTJzJH1KvPy/Pnz5Gamorjx48jLCwM//nPf5j8kydPws3NDa9evUJgYCC+/vprpKenIzExEbdu3cK1a9cY3ScnJ7PWqIGBQb06LCkpwZAhQ/Dxxx8jMzMTZ86cwYoVK3Dx4sV6+65e99q1axg3bly97T99+hSffPIJli1bhry8PHz11VcYMWIES2eHDh1CUFAQ8vPz0b59ewwbNgylpaVIS0vDhg0bMH36dMao3NzyDempiqioKNy/fx+3b9/G7t27WXqn1KZVv8HnFstw9G469IVq73ynZE1UuBzoC9Vw9G468oo/vKiXFMq7pKKigvlazOFwIJPJ8PXXX2PevHm4ePEipk+fDqVSiczMTAwePBgGBgaIj4/H999/j9zcXISFhTFttaH4Xc2CHuemUCgUCoXS1oiNjcXr168RFBQEHo8HNzc3uLu7s8rExMSgqKgIx44dg7u7O8vfnKmpKQICAqCqqopJkyYhNzcXgYGBEIlEsLa2Ru/evXHnzp16++/WrRsmT54MVVVV+Pn5oV27djh9+jSTP2XKFDg6OoLL5UJNTa1WfS6Xi7CwMPD5fIwaNQp8Ph8+Pj4wNjaGvr4+3NzcGuy/ISZNmsTs9FMoFDh8+DB8fX0BAFu2bMH8+fPh4OAAFRUVTJ8+HRwOB3FxcVBVVQUhBPfv30dpaSmMjY1hb29fZx98Ph8pKSnIyMiAuro6+vXrBwA4deoURCIRFi5cCA0NDWhoaKB///6scf/444/g8/lwdnZGx44dcffuXUa2H3/8EVZWVuDxeAgNDcXNmzeRmprK1J8/fz50dHRgaWmJ/v37w9nZGb169QKPx8P48eNZOps0aRL09PSgqqqKwMBAlJeXM26e6uPgwYOYPXs2rK2tIRQKWe8SdekgLy8Pz549g4qKCnr06AGBQICXL1/i+PHj2L59O/T19aGiooJ+/fpBTU0Np0+fhqWlJfz9/aGqqgo3NzeMGjUKERERAABfX1/WLs0DBw4wc7dt2zZMmzYN/fv3B5fLxciRI+Ho6MgyQtdcd1OnTsXevXsBAHK5HIcPH4afn1+tsSiVSixfvhzq6uro0aMH/Pz8GJmASiPvhAkTwOVywefzcfDgQaxcuRJisRhGRkYICwtj+mkOp06dgrGxMQICAsDj8dChQwf4+/vj4MGDdfatoaHBpOfn50OpVMLExKTe9g8fPgx3d3d4enpCVVUVU6ZMQceOHXH8+HGmjJeXF3r37g0ejwdvb29kZ2dj0aJF4PF4GDduHEpLS5GUlNTs8gqFokl6CgsLg5aWFqytrTFgwIA3vu4/FFq1YfJUQgbKFcp35lOyMYTqPJQrlDh1L/O99EehtFWuXLmCCxcuAABev36Nbdu2wdLSEt999x02btyIFy9eYMiQIVi5ciUuXryIq1evwtHREcHBwVi3bh2EQiFzZLutRNpuLtQwSaFQKBQKpa2RkZEBU1NT1gkYS0vLWuX4fD68vLxw5swZnDhxgkmvOhoNAJqamnWmNeQKx8LCgvXb0tKS5cOyLlmqo6urCx7vf++mmpqazeq/IXx9fXH69GlIpVJERUVBXV0drq6uACp3s82dOxdisZj5y8zMRHp6OmxsbBAeHo4NGzbA0NAQnp6eSExMrLOPXbt2oaSkBE5OTujcuTMTFCY1NbXByOC6urqs3XrVx5mcnAxvb29GLj09PXC5XNbx7Jo6akhna9asQadOnSASiSAWi1FYWMjaKVcXGRkZrLltaB7nzp2L/v3745NPPoGJiQnmzZuH8vJypKamQiQSQV9fv1ad9PR0WFlZsdKsrKyYtTN27Fg8fPgQT548wePHj/Ho0SOMHTuW0c/GjRtZcxcXF8d6zq8pr6+vL/744w/k5OTg9OnTMDAwQK9evWrJJRKJIBKJWO3Ut55zc3Mhl8tZ47CyskJmZmazN4IkJyfjzz//ZI1p1apVrOPs9c2Bjo4OuFxug+85jekbqL2m9PT0WB8xNDQ0WOuqqeWbqqd3dd1/KLTaqNzZRWVISC+AiUij0bKlxVJc/j28zjwdA2O4DP+0yf0aaavj7ot8jCgygoHwzSN/UygfGlWGRC6XC2dnZzg7O2PJkiXYuXMnpFIp5HI5Fi1aBHNzczg4OKB9+/aYPn06tm/fjmXLloHP5zMPPISQD+LIdkOYmJjg6tWrzFF4CoVCoVAolNaOiYkJ0tPToVQqmWe91NTUeiP9VlRUsHY9vS3Vd/FV/f700/+9K/6dz1yNtW1tbQ1HR0dERkYiOjoaPj4+TB0LCwvMmzcPn332WZ11x48fj/Hjx6OkpARBQUHw9/dn+TGswsbGBvv37wchBJcvX4abmxsGDBgACwuLWkfqm4qFhQW2bt2KQYMGvVH96sTExGD58uWIjo5Gly5dwOVyoaOj06jhzMTEhDW3Nee5OgKBAKtWrcKqVauQlJSEESNGoEOHDvD09GSMoDV9IZuamiI5OZmVlpycDDs7O6ZNLy8vRrdeXl4QCAQA/jd3oaGh9cpUc21U7b49ePAgoqOj69wtCVT6xJRKpcz1k5qaClNT0zrb1dPTA5/PR3JyMrp168aMwdjYuNnr3sLCAn379sWlS5eaPKYqNDU10a9fP0RGRuKjjz6qs4ypqSlu377NSktOTsawYcOaJeeb8C71RPkfrfbN/nZKPgAOVFWaPgRDCxt07T+M9WfT1blZ/Vb2x8GdlPxGy1IolP/B5XLB5XLx9OlT3LhxA05OTjAyMoK9vT2cnZ1x6NAheHt745dffsG5c+dQXFwMf39/tG/fHj169GC1RW/6lV/hXr9+TSPTUSiUNkNFRQXKyspQUVEBpVKJsrIylJeXt7RYFArlPdKnTx9oaGhg1apVKC8vR1RUFM6dOwegMjBLdHQ0ZDIZ5HI5du7cidjY2Hdi8KoiISEBERERqKiowL59+5CUlAQPD4931n5DGBoaNmpk9fX1xY4dO3Ds2DHmKDAABAQEYMWKFUhISAAhBMXFxTh58iSKiorw+PFjREVFoaysDGpqahAIBKydYNXZu3cvsrOzweFwIBaLweFwoKKiAk9PT8anYmlpKUpLS3H16tUmjSsgIAALFy5kxpafn1+nn82mUFRUBFVVVejp6aGiogJLly6FVCpttJ63tzc2bNiA58+fo6ioqEEj4KlTp/DkyRMolUpoa2uDx+NBRUUFRkZGGDVqFGbMmIHc3FwoFApcv34dMpkMHh4eSE5Oxq5du1BRUYGoqCicPHkSPj4+TLtVx7kjIiJYc/fll19i+/btuHr1KpRKJUpLSxEdHc3aUVoXU6dOxZYtW3D+/HlMnjy5zjJcLhcLFiyATCbDn3/+ifDwcEycOLHOsioqKpgwYQKCg4NRUFCArKwshIWFUSpd4wAAIABJREFU1dt2Q4wcORLJycnYuXMnZDIZKioq8Ndff+HmzZtNqr9q1Srs27cPy5cvR05ODoDK4Es+Pj4oKCjA+PHjcf78eZw9exYVFRXYv38/EhMT4eXl1WxZm8u71BPlf7RKw2SFQolrSXmQaPEbL1wNoY4EJtYdWX96JhaNV6yBRIuPq0l5qFC0/ei/FMq7oqSkBNOmTcOSJUtgYGCA/fv3Q6lU4sGDB3Bzc0NAQAAuXboEZ2dnvHr1Ch07doSrqyukUimePHnS0uL/4+Dz+dDX16fHuSkUSpvhxx9/hIaGBpYuXYrffvsNGhoa8Pf3b2mxKBTKe4TH4+H48eOIjIyEjo4Ofv75Z+aFv7y8HN9++y309PRgaGiIX3/9FZGRkejevfsb9RUTE8PsWqvi448/xpkzZ6Cjo4Ply5fj6NGj0NXVfetxNYXg4GCsWrUKYrEY33//fZ1lvL29cevWLdjZ2bH8RHp5eSEkJAR+fn4Qi8Wws7NjdjjKZDIsXLgQ+vr6kEgkiIuLYyIp19TBxYsX4eDgAIFAgHHjxmHz5s1o164dhEIhLl68iP/85z8wNTWFhYVFk42Ls2bNwoQJEzBy5Ehoa2ujW7duOH/+/BvpqMqvYMeOHWFpaQkejwdzc/NG602bNg1jxoyBs7Mz7O3t4enpWW/ZZ8+eYfjw4RAKhejatSvc3NyYNRgeHs5E3ZZIJAgODoZSqYSuri7OnDmDbdu2QSKRIDAwEBEREejUqRPTrpubG4qLi1FcXAw3Nzcm3cnJCeHh4Zg7dy4kEgksLS2xdu1a5rRZfXh4eCA/Px8DBgxg/DEKBALWTlg9PT1YWlrC3NwcI0eOxKJFizBkyJB626w67t+hQwd0794dzs7OWLx4ccPKrQOBQICLFy/ixIkTMDc3h76+Pr788st6jcgHDhxA586dmd+9e/fGpUuXcP36ddjZ2UEsFsPb2xsuLi4QCoVo3749jhw5gvnz50MikWD9+vU4ffp0rZ2sfxfvSk+U/8EhrTByxMvCMqw49wgWuppN2jlVdZTbtpsz2nXpARACFdU390tJCEHaqxLMH94JRiJ6nJtCqUn14zdVpKSkYOHChVi/fj1kMhlMTExw+vRpREZGYurUqbCxsUFOTg4sLCxYD4BlZWVMJDwKm2PHjkFbW7veYw4UCuXDQCqVQiQSobCwsN7jjm2RFy9ewNzc/IMbN4VC+XsIDQ1FYmIiK3oyhfJPxsnJCXPnzq1zF+SlS5cwYcIEll9HCqUlaMpzaqv0MZklLQMXnGYf50x+8CeeJcQDADS0hDC1tYd1lx7g1rOVvT44HA444CBLWkYNkxRKHVQZJasbKMvLyzF8+HDMmjUL9vb2uH79Ok6dOoWoqCjs2bMHq1evhqOjI1OPw6m8xqlRsn5MTEzoblIKhcKQnp7epCNtbYXMzMpghB/auCkUyt+DVCpFSUlJo0doKZR/AlevXkVqaipcXFzqXLM5OTlQKBR0PVNanKa4HmuVhsmMglKgOTZJDgcSIzMYmFtDQyCEvKwUmc+f4FnCDUjzstF9sGfzfdZxKuXoZi5uXj0KpQ3y6tUr1i7HqKgohIeHw9/fHwMGDMDNmzdx7tw5fP/99/j0009BCEFCQgKys7OxfPnyWsbHDz2wTVMxMTHBpUuXaAAcCuUDR6lUQk1NjXWs70OBz+d/kOOmUCh/HydPnmxpESiUJmNjY9NgflOOulMofzdqamoNuidolYbJglI5eM0IeqOhJUQvtzHMb4VCATNbeyRcvYDM50+Q8+I5DMytmyUDT4WLwjLqkJ1CkcvluHjxIrp06QJ7e3u8ePEC3377LSIiItC5c2e8fPkSERERcHFxASEEJ06cwMqVK/Gvf/0LRkZGTDt1Hf+mNIyRkRHKyspQWFgIsZh+JKFQPlS4XC5kMhnS0tI+qCPN6enpsLe3/+DGTfmwCQkJQXZ2NrZs2dLSotQiMDAQBgYGWLBgQbPrXr9+HQEBAUhISKi3jImJCa5cuQJbW9u3EfONefLkCXr16oXCwsIW6b+lSEpKwtSpU5GUlITZs2dj3rx5LS1SLVxcXLBixQoMHjy4pUUBUOnCqmvXrsjKyqKnvygfPFKpFObm5g2+67dKw2SFguBtNgc9efIEmhoasO7SE5nPnyA3I7XZhkkOByhXtDr3nBTKO4fP50NLSwuffPIJjI2NsWvXLnh4eOD333/H7t27IZFI8PPPPzPle/bsiRs3bkBVlX37oUbJ5qOqqgpDQ0NkZGRQwySFQoG2tnarNtDJZDJ89dVX+OOPP5CbmwsLCwssXLiQFdW0OlXHt1v7uCmU5qCmpgYejwculwsPDw88fPgQcrkc1tbWCAsLey9Raetj165db1x3+PDheP78OfPbysoKW7duxfDhw5m04uLit5LvbakKEtOU+8279O83aNAgTJgwATNmzHjrtt6EzZs3o1+/frh7926L9F+TuvTx6NGjFpSoNkKhEEDlWqGGyTfjt99+w9q1a3H//n1oamrC3t4e3377LUaNGtXSolH+BlqlJUBVhYO3CdljZmqK1NQ0cFQro3rLy0qb3QYhAE+FHp2kfHjUjJf14MEDPH/+HI6Ojpg6dSratWuHgIAADBkyBGvWrMHZs2eRnJzMlLe2toaqqioUCsV7lrxtYmJiQiNzUyiUNkFFRQVMTEzwxx9/QCqVYtu2bQgICEBsbGxLi0ah/ONQU1PD1q1bkZWVBalUii1btsDX1xfp6envXRZCCH2ua0GUSmWt5/N3yfPnz+Hg4PBGdSsqKt6xNJR/Au9yzdV1/1i/fj0CAgIwZ84cZGZmIjMzE99//z2OHTv2Tvqk/PNolYZJsQYf5Yr6z6c3hlBbGwYGBnh0v/KoAF9ds9ltlCuUEKm/eWRvCqW1UeUTosqX4evXr5GcnIxTp07Bzs4OK1euRGxsLMrKymBhYQFtbW14enqiU6dO0NHRqdWeSjODTlHqhhomKRRKW0FLSws//PADrK2tweFw0L9/f/Tr1w/Xr19vadEolBbj3r176NWrF4RCITw9PZGfnw8A4PF4sLe3h4qKCuNrury8HCkpKQCAPXv2oHfv3ggKCoKuri7MzMxw6tQpnD9/Hh06dIBYLMZ3331Xb7+hoaEYO3YsfH19IRQK0blzZ1y+fJnJHzRoEBYsWIBBgwZBS0sL8fHx+OyzzxAUFASgcsegkZER1q9fD2NjY0gkEuzYsQO3b9+Go6MjRCIRJk+ezBiuqsoDwMSJE5GamooxY8ZAIBBg4cKFACqfQRMTExEfHw9dXV3I5XJGnvj4eOjo6EAmkwEA9u/fjy5dukAsFsPV1RUPHjxgyq5Zswbm5uYQCoWwtrauNwp3WVkZpk2bBl1dXdjZ2SEqKoqVv3fvXnTu3JlpZ9OmTQCAwsJCjBgxAtnZ2RAIBBAIBHj06BGeP3+OIUOGQCKRQE9PDxMnTmTmsz7mz5+PmJgYBAYGQiAQYPLkyQAqd5SuWrUKPXr0gKamJl6+fFmvPNX1u3HjRhgbG8PAwACrV69m8m/dugUXFxdoa2tDX18fkyZNAgAMGDAA0dHRTP83b96EVCrFF198ASMjI5iZmWHOnDmM3pOTk8HhcLBnzx60a9cOXbt2bfZakEqlGDVqFAwMDKCjowMPDw+kpqY2qo9z584BqHQzNXfuXJiZmcHQ0BCfffYZ6/g9h8PB9u3b0bFjR4hEIvj6+rLWUk327t2LLl26QCgUwtbWlumnIT3UpLp8ALB161YMGjSIJdPWrVvRsWNHaGlpYcaMGcjJyYGnpyeEQiH69u3LCmLT3DFUn5sdO3bA3Nwc+vr6WLBgAfOO19j6rGvN7du3D7a2thAKhTA3N2edkjt//jx69uwJsVgMJycnxMTEMHl13T+qkEqlWLhwITZt2oTx48dDKBRCRUUFQ4YMwc6dOwFUGjNXrVoFa2trSCQSjB49mvU+ZGVlhdWrV8PJyQlaWloYO3Ys8vPzMWXKFGhra8PBwQH3799/4/JPnjzBsGHDoKOjg/bt2zNyAf+79y5cuBASiQSmpqY4cOBAg3NDaaWGSROxBtAMA31dOyItrSzx7M84lJWWwsC8XfOFIP8vB4XSxiGEgBDCOmp9+vRpbNy4EVZWVmjfvj1SUlKgVCrh4OCAkJAQbN++HYWFhZg+fTq2bdsGkUjUgiNo21QZJv/OL+UUCoXSErx+/Rq3bt1Cly5dWloUCqVFKC8vh5eXF8aMGYNXr15h9uzZ2LdvH6uMq6sr1NXV0adPHwwYMAAuLi5M3u3bt2FpaYns7GzMnz8fU6dOxe7duxEfH487d+5g+/btDRr+T5w4AXd3d+Tn52PevHnw8vJiGSr27NmDdevWobi4GE5OTrXq5+bmIisrCykpKdizZw9mzpyJsLAwnD17Fs+ePUNMTAyOHDlSq97BgwdhYWGBo0ePori4GEuXLmXlOzs7Q19fH2fOnGHSDhw4gHHjxkFNTQ0nT57EokWLcPDgQeTl5cHX1xejRo2CXC7H48ePsXjxYkRFRaGoqAjXrl1D165d6xz/kiVL8NdffyExMRHXrl2rZVzQ09PD8ePHIZVKER4ejrlz5+LmzZsQiUQ4e/YsDAwMUFxcjOLiYnTq1AmEEMyfPx8ZGRlITExkdoE1xMqVK+Hq6sroufr87927F7/99huKioqgr69frzzV5yMtLQ0pKSk4deoUFi5ciGfPngEAZs2ahY8//hgFBQVIS0vDv/71LwDAlStXWP336tULX3/9NdLT05GYmIhbt27h2rVr+OGHH1hynzt3DgkJCbh9+3az14JSqYSfnx+Sk5MZH8JV8jSkjyqWLVuGP/74A/Hx8Xjy5AlevXrF1K8iMjIS165dw9OnTxEbG4v9+/fXqf9jx45h3rx52L59O6RSKaKjo2FpaQkATdJDczh+/DhiY2Px8OFDHDlyBO7u7ggNDcWrV69gYGBQq+2mjqEmZ8+excOHDxEXF4dDhw5hz549ANCk9Vl9zWlqauLzzz/H7t27UVRUhISEBMbYmpCQAB8fH6xbtw6vXr3CDz/8gNGjRyM3N5dpq777x/Xr11FWVoaxY8fWO4bw8HBs3rwZZ86cwYsXL2BsbIzx48ezyhw6dAgnT55Eamoq7t+/DxcXF/j4+CA/Px+DBw/GnDlz3qh8eXk5Ro4ciT59+iArKwsRERGYP38+68PF7du3YWRkhKysLGzcuBHTp09n3M9Q6qZVGiYNtdWhBGnyi/iDuGjcvHgMT+/GIe3JfSTdu4nbF3+HiqIMSjUBJMbNi1RFCAEBgaE29RdBaftwOBxwOBykpqbiq6++wrVr16ChoQGBQID4+HgMGDAAaWlpSExMhK+vL7KyssDlctGvXz98/PHHANBgBC7K22FgYIDy8vJGv7hTKBRKa0KpVOKzzz5Dr1694Obm1tLiUCgtQmxsLF6/fo2goCDweDy4ubnB3d2dVSYmJgZFRUU4duwY3N3dWSdSTE1NERAQAFVVVUyaNAm5ubkIDAyESCSCtbU1evfujTt37tTbf7du3TB58mSoqqrCz88P7dq1w+nTp5n8KVOmwNHREVwuF2pqarXqc7lchIWFgc/nY9SoUeDz+fDx8YGxsTH09fXh5ubWYP8NMWnSJMZQqFAocPjwYfj6+gIAtmzZgvnz58PBwQEqKiqYPn06OBwO4uLioKqqCkII7t+/j9LSUhgbG8Pe3r7OPg4ePIhFixbBwMAABgYGzG7QKjw8PGBrawsOhwNXV1e4u7vjypUr9cpsbW0NNzc3qKmpQU9PD9988w1rF2pzmTlzJqytrcHj8aCqqtqoPFwuFz/++CP4fD6cnZ3RsWNHxm8kn89HSkoKMjIyoK6ujn79+tXZp0KhwMGDB7Fy5UqIxWIYGRkhLCwMe/fuZZULDQ2FtrY2NDQ0mL6buhbEYjE+/fRTaGpqQiAQIDg4uFl62r9/P0JCQmBiYgKRSIRVq1bh8OHDrB2FCxYsgEQigYGBATw9Petdh1u3bsV3332Hvn37gsPhwNzcHJ06dWqyHprD/PnzoaOjA0tLS/Tv3x/Ozs7o1asXeDwexo8fX0vGpo6hJqGhoRAKhbCxscHs2bMREREBoGnrs+aa4/F4ePjwIaRSKXR1ddG9e3cAwLZt2zBt2jT0798fXC4XI0eOhKOjI+tjQn33j7y8POjp6YHHq/906v79+/HNN9+gY8eO0NDQwOrVq3Hjxg0kJSUxZWbNmgVTU1NIJBIMHz4c1tbWGD58OFRUVDBhwoRa+mpq+Rs3biA/Px8hISHg8/no2bMnpk2bhvDwcKYtU1NTzJo1C6qqqhg7diy4XC6ePHnSpPn5UGmVhkk9AR8CdR5K5E3zZaJvagmiVOLF0wd4FH8Zzx/cgYoqD33cR8O0Uw9kNNMXS4lcAS11HvQE/DcRn0L5x1PTH8zu3bvh5+eHDh064M8//8T+/fvh4OCA8+fPQyKRID09HTdu3ICamhp2796NadOmserTwDZ/HyoqKjAyMqLHuSkUSpuBEIIZM2YgIyMDhw8fZlyIUCgfGhkZGTA1NWU9R1Xt1qoOn8+Hl5cXzpw5gxMnTjDpVUejAUBTU7POtIYCylhYWLB+W1pasnxY1iVLdXR1dVnGBU1NzWb13xC+vr44ffo0pFIpoqKioK6uDldXVwCVR1bnzp0LsVjM/GVmZiI9PR02NjYIDw/Hhg0bYGhoCE9PTyQmJtbZR0ZGBksHNcd79uxZ9O7dG7q6uhCLxTh9+jRrR1hNsrKyMGHCBJiamkJbW5sxFr8pzZVHV1cXfP7/3l+r63/Xrl0oKSmBk5MTOnfuXG8go9zcXMjlclhZWTFpVlZWyMzMZG0aqilbc9ZCSUkJpk+fDktLS2hra8PV1RXFxcX1HpOuSXp6ei35FAoFKxBRU9dhampqnVHgm6qH5lBTpsZkfNNrqeaarrqmm7I+q8+rlpYWTpw4gaNHj8Lc3BwDBw5kfEInJydj48aNrGswLi6O9b5S3/1DIpEgNzcX5eXl9Y6h5hwLBALmnbSKt9VnfeXT09NhZmbG+ghkZWVVb9/19Udh0yqtBaoqXPSzkSDvdcN+FKows+sMZ/exGDzuC7j5foWhE6ej94hxMG/fBXZ2dnienIzyRnwyVCfvtRz9bSRQVWmV6qNQ6uXGjRtQKpVMxOwqfx/GxsZIS0vD119/jZkzZ6KsrAxSqZT54mpoaIiZM2dCTU0NHA6H7pB8z1A/kxQKpa1ACMFXX32Fu3fv4uzZs0wUXArlQ8TExATp6ems56oqX3t1UVFRwdox9LbU7Cs1NRWmpqbM77/zo0FjbVtbW8PR0RGRkZHYv38/fHx8mDoWFhb45ZdfUFBQwPyVlJRg4sSJAIDx48fj8uXLePnyJWxsbODv719nHyYmJiwdVP+3TCbDJ598gsDAQGRlZaGgoACenp6MUaou+av8+d27dw9SqRQHDhxokhGrPl1UT29MnsawsbHB/v37kZWVhU2bNmHGjBnMMe/q6Onpgc/nswJbJicnw9jYmCXP26yNtWvXMkeNpVIp45uwId1Wx9TUtJZ8XC63lrGoKVhYWLyVHqoQCAQoKSlhfr+LaO1vSs01XXVNN2V91hzb0KFDce7cOcYfZtVxagsLC8ybN491DVbt/q6vrSr69u0LdXV1HD16tN4x1Jzj4uJi5OXlse5PfxempqZ48eIFK2BPcnLye+m7LdNqLWs9LHUAEFS8RRAcoPLrjVgkxvPnyU0qX9kfgZNl7WAeFEprJicnBwqFAlwuF//5z3/g4+ODxYsXIzQ0FG5ubhg6dCjj2LdLly7o3Lkzvv32W+zevRsrVqyAnp4e858X3SH5fjExMUFeXl5Li0GhUChvzcyZMxEXF4fz589DW1u7pcWhUFqUPn36QENDA6tWrUJ5eTmioqKYABq3bt1CdHQ0ZDIZ5HI5du7cidjYWFZAjbclISEBERERqKiowL59+5CUlAQPD4931n5DGBoaNmpk9fX1xY4dO3Ds2DHmGDcABAQEYMWKFUhISAAhBMXFxTh58iSKiorw+PFjREVFoaysDGpqahAIBPUGZPT29sayZcuQk5ODnJwcrFy5ksmTy+WQyWTQ19eHqqoqLly4gAsXLrDkz8/PZ7naKSoqgpaWFkQiETIyMrBmzZp3povG5GmMvXv3Ijs7GxwOB2KxGBwOp069VB1rDQ4ORkFBAbKyshAWFsYEoXkXFBUVQUNDA2KxGPn5+ViyZAkrvzF9TJo0CUuWLEFmZiakUimCgoLg7e3N2i3aVL788kusXbsWcXFxIITgxYsXSExMbLYeunfvjoiICMjlcjx8+JDx69gShIWFoaioCP/973+xfv16xmDf3PWZlZWFY8eOobi4GDwejwlSA1Tqbfv27bh69SqUSiVKS0sRHR3NCuBTH9ra2li6dClmzpyJyMhIFBcXQ6FQ4PLly8xHhEmTJmHdunV48uQJysrKMH/+fDg7O8PGxuYttdM4Li4uEIvFWLJkCeRyOe7cuYOdO3e+02vgQ6TVWg8MhOroZirGS2nZW7dlY2uDrKwsFBcVNVr2pbQMjmY6MBBS/5KUtoW+vj569OiBw4cP47PPPsOQIUNw7tw5iEQihIeHY968eVi8eDHmz5+PM2fOgM/nQ0VFBZ07dwZQ6Q+MHrdrGRwcHDBhwgQaAIdCobRqUlJSsHnzZjx8+BDm5uZMNNtly5a1tGgUSovA4/Fw/PhxREZGQkdHBz///DPz8lteXo5vv/0Wenp6MDQ0xK+//orIyEjGx1tziYmJqbVD+eOPP8aZM2ego6OD5cuX4+jRo9DV1X3rcTWF4OBgrFq1CmKxuN4AMd7e3rh16xbs7OxYfiK9vLwQEhICPz8/iMVi2NnZMUFSZDIZFi5cCH19fUgkEsTFxWHr1q0Aautg8eLF6NSpE9q3b4++ffsyBhwAEAqF2LBhA3x8fKCjo4N9+/Zh1KhRTH7Hjh3h6+sLW1tbiMViPHr0CCEhIbh37x7EYjE8PDwwevToJuli9uzZOHHiBHR0dODn51dnmcbkaYyLFy/CwcEBAoEA48aNw+bNm9GuXd0BYquOwXfo0AHdu3eHs7MzFi9e3OS+GiMwMBByuRz6+vpwcXHBsGHDWPmN6WPBggUYOHAgevbsCVtbW/wfe+cdFtW1/f3vDMzA4AwzDGXoI4iIhdhQiA2MXUSjMQ5RFFuM3Jsb9SZY0AT8qdFgTKJJ0JiYKIjd1y7Wq7FEEQtqBIwivaPA0Adm1vsHl3MZpYMS8Xyeh+fhnL3P2Wvvs/Ype1YxNDRESEhIo9pOTk6GUChkrAonTZqE1atXY/bs2TA0NMTQoUORlJQEoGnjsGrVKqSnp0MqleLjjz/GjBkzmjIkrcro0aPRrVs3uLq64v3338esWbMAoMn6qdFo8N1338Ha2hoSiQTbt29n4lX26dOHScBkbGwMuVyODRs21OlVN3/+fMyfP5/ZXrBgAX744Qd8/fXXkMlksLCwQFBQECOTr68v5s2bh1GjRsHS0hIpKSnYu3dvawxPg/B4PBw7dgyXL1+GmZkZFAoF1qxZw8bDbiEceo2/ZJ8WlWP9mYcw4OtApF93cNTGkPDkCfILCtC7Vy+gjsWVwrIKlKjU8B/ZBcbCFwM8s7C87mRmZuL06dPMC3BoaChSU1PxxRdfYN26dQgLC8Pdu3dbFNiZhYWFhaV1USqVEIvFKCgoeKOsDFNTU2FjY/PG9ZuF5VUQFBSEuLg47Nmzp61FYWFhaQUSExNhZ2eH0tJS6OuzRlYsr47GvKfqvmKZWhVjoR4m9rLCzsgkGPB1ocNtvrWWrVyOzMgbyM7OhplM9kK5WkPILSrHtP5ydlGSpd1ibm4OBwcHDBs2DCdPnsS5c+cwfPhwDBgwAHfv3sXs2bMxevToqnliZtbW4rKwsLCw1CAtLQ1KpbKtxXhlZGRkAHjz+s3C8ipQKpUoKSlplOslCwvL35+az8yaWbBZWF42hY3wTH6tLSYBQKMh7IxMws2kPMilBuC2YHEyOysL8fFP0N+1v1ZMDY2GkPSsBP06GmFaf3mL2mBh+btTnd3w//2//4fc3FycOnUKRMS4ae/fvx+urq4vZGpkYWFhYWkb8vPzYW5u3uiMpe0JPp8PVRMSGLKwsLCwsLCwsLxa9PT0kJmZCYlEUmv5a78wCQDllWqEXUvCvbQC2EoNmm85SYQ70dGQiMWws7cHUGUpmfysBG9ZiTH9bTn0dGsPjszC8iqoVGuQW6RClrIM6fmlyC9VoVJN0NXhQCLgw1IigMxQHyZCfouyxl+/fh1JSUlwcHBA3759AUBrcZLl74dKpUJJSQlycnLQuXPnthaHhYXlFVLtIpOSkvJGuTSnpaWhW7du7bLfgYGByM7OxubNm9talBdYuHAhzMzMEBAQ0ORj//jjD/j5+eHu3bt11rG0tMSlS5fg4ODQEjGbzV9//YV+/fqhoKCgTdpvK+Lj4zFr1izEx8djwYIFWLx4cVuL9AKurq5Yt24dhg4d2taitCvCw8Px66+/4vz5820tSqNwdnbGt99+i+HDh7e1KG1OS+7HLCyvCqVS2WDondfalbsaPV0dTH9bjn03U3Aj4RlMRXrNiznJ4aCzgwPu3ImGuYUFKjm6yCksR387Kaa42LCLkixtRnZhGW4l5eFq/FMUlVWACw7AAXg6XHA4ABFQodYABGhAEOrzMLCTMfrKm5eoqV+/fnBzc9Paxy5K/j25f/8+0tLS8OzZM0RFRSE1NRVbt26FkZFRW4vGwsLyijE0NHztF+jmzZuHEydOoLCwEFKpFPPmzavzg6vafbs99Pt59PT0wOPxwOVyMXbsWMTExEClUsHe3h4rV67EhAkT2ky2X3/9tdnHjh49GgkJCcx2x44dsWXLFowePZrZV1RU1CL5Wkp18pPG6NTFixfh7e2NzMzMFrfr4eFWnoakAAAgAElEQVQBb29vrQQQr5KQkBAMHDgQ0dHRbdL+89Q2HrGxsW0oUe3UpsOtycyZM2Fubo5169a9lPMDgEAggI6OzmtzH+VwODAwMHht5G0taov72pL7MQBs374dW7ZswfXr11sqXqtz+/ZtBAUFMZm9O3bsCF9fX3zyySe1Zoxneb15bbNyP4+erg6m9ZfDx1WOEpUaqXklqFTXnvWpPoQiEUxkMtyMeYISlRo+rnJM689aSrK0DU+LyrH9agLWRsTiTEwW9HW5sJUawMbYADZSA5iL9SEz1Ie5WB82/91vKzWAvi4XZ2KysDYiFtuvJuBpUd3ufTdu3MD+/ftBRExW5+qbfTswqG73XL16FZGRkUhLS4NcLodGo0FkZCQA1Jn5joWFheXvysKFC/H48WMolUpcvnwZO3fuxL59+9parDZDT08PW7ZsQVZWFpRKJTZv3gwfHx+kpaW9clmICGq1+pW3y1KFRqN5qe9lCQkJcHZ2btaxlZWVrSxN+4EdGxaWphMZGYkhQ4agf//+iI2NRX5+Pnbt2oXIyMhGxStkef1oNwuTAMDlcuBqbwz/kV3gbClGekEpkp+VoLi8ssEHORGhuLwSyc9KoG9kDkFZDiZ35sPV3piNKcnyytFoCJFPnmL9mYe4n14AS7EAtlIDdNDTbdBykcPhoIOeLmylBrAUC3A/vQDrzzxE5JOn0Gi050FxcTHCwsJw8OBBHD16FBkZGVqLWayV5N+f+fPnY9q0aSgvL8ezZ89gZWWFa9eutbVYLCwsLM2iW7duEAgEzDaXy8Xjx4/bUKJXw71799CvXz+IRCJ4enoiLy8PAMDj8dCtWzfo6OgwIVUqKiqQlJQEoMraxc3NDUuXLoVUKoW1tTWOHz+O06dPo0uXLpBIJPjss8/qbDcoKAiTJk2Cj48PRCIRunfvjt9//50p9/DwQEBAADw8PNChQwfcuHEDM2fOxNKlSwFUWQyam5tj48aNsLCwgLGxMX755RfcunULvXr1glgsxvTp05nFmer6APDBBx8gOTkZEydOhFAoxPLlywFUvXvExcXhxo0bkEqlWjFEb9y4ASMjIyae6s6dO9GjRw9IJBIMHjwYDx48YOp+/fXXsLGxgUgkgr29fZ3ZpcvKyjB37lxIpVJ07twZ586d0yoPDQ1F9+7dmfP8+OOPAICCggKMGTMG2dnZEAqFEAqFiI2NRUJCAoYNGwZjY2OYmJjggw8+YK5nXSxZsgSXL1/GwoULIRQKMX36dABV1njBwcHo27cvDAwMkJmZWac8Ncf3+++/h4WFBczMzLB+/Xqm/ObNm3B1dYWhoSFMTU0xbdo0AMCQIUNw4cIFpv2oqCgolUrMmTMH5ubmsLa2xr///W9m3BMTE8HhcLB9+3bY2dnhrbfearIuKJVKeHl5wczMDEZGRhg7diySk5MbHI9Tp04BqAph4+/vD2tra8hkMsycOVPL/Z7D4WDr1q1wcnKCWCyGj49PvfFo69KlQ4cOwdLSErm5uQCAW7duwcjICA8fPqxXh0NCQuDk5MTEU1u/fj0cHBwgEonQtWtXHDx48AU969GjB0QiERwcHHDq1CmEhIQgPDwc33zzDYRCIYYMGVKvHpWXl2PZsmWws7ODSCSCi4sLUlJSAFSFKBgxYgSMjIzg6OiIbdu21XqO6mtbVlbG7PP29kZQUBCA5s/5pupkQ9y5cwfOzs4Qi8V47733kJ+fDwDw8vJCcHCwVt0xY8bg66+/rvd8YWFhzPWxsbHBt99+y5SdPn0aLi4ukEgk6NOnDy5fvsyUeXh4YPny5XB3d0eHDh3g4eGB7OxsfPrpp5BKpbC3t8fFixeZ+s2dv8ePH8eXX36JgwcPQigUomPHjgCgdT8GgIiICLi4uEAsFsPGxgbbt2+vs8/379/H/PnzERUVxdzDkpKSoK+vzyTKAaqS5ujr6yMzM5ORcf369TAzM4O1tTU2btzI1CUifPPNN3B0dIRUKsXYsWOZBFpEBH9/f8hkMhgaGsLJyUlrbGri7++P6dOnY8WKFZD9NzFxt27dsGfPHmZORUREoGfPnhCLxXB1ddX6/pk5cyb8/Pzw7rvvQigUonfv3oiPj0dwcDDMzc1hYWGBvXv3Nrt+Y+6PYWFhsLOzg5GRERYtWlTndWD5L9SOyVKWUsT9dFp++D4t3HObFu25Q4v23qHFB+7SkoN3afGBu7Ro7x1atOcOLdxzm1Ycvk+n/sygLGUpRUZG0vfff0+VlZVt3Q2WN4yyikra8UcCfbzrFq089id9c/Zhi/9WHvuTPt51i3b8kUBlFf/T6cOHD5NCoaAxY8bQgAED6ObNm6TRaNqw9yxNJSkpifz8/Gjz5s00YsQISkpKotGjR7e1WCwsLK+QgoICAkAFBQVtLUqrsHTpUjIwMCAA1LFjR0pKSqq1XkpKSrvot0qloo4dO9KaNWtIpVLR6dOnSSAQkK+vL1Nn0KBBxOfzCQCNGDGCeT/97bffSFdXl0JCQqiiooI2bdpEJiYmpFAoKD8/n+Lj40kkEtHVq1drbTswMJB0dHQoNDSUKioqaPv27SQWi+nZs2dEROTu7k4WFhZ0584dUqvVVFZWRr6+vrRkyRIiIrpw4QLp6OjQsmXLqLy8nI4ePUp6enrk5eVF6enplJ2dTXK5nMLDw5n6MpmMaV8ul1NERISWTAAoNjaWiIgcHR3p0KFDTNknn3xCH374IRERHT16lORyOd27d48qKytpy5YtZGdnR+Xl5RQXF0cCgYDi4uKIiCg9PZ0ePHhQ6xgEBARQ//79KSsri7KyssjNzY1qfiKdOHGCHj16RBqNhi5dukQCgYBu3LhRa3+IiOLj4+n06dNUVlZGOTk55O7uTv/85z9rbbsm7u7utHnzZq19crmcunfvTvHx8aRSqaiioqJBeXR0dMjf35/Ky8spMjKSeDwePXr0iIiI3NzcaPXq1aRWq6m0tJSuXLlSZ/u+vr40atQoysvLo4yMDOrfvz8FBAQQEVFCQgIBIIVCQQUFBVRSUtJkXcjLy6P9+/dTcXExFRYWkkKhIE9PzwbHo1pfAgMDqXfv3pSWlkb5+fnk5eVFU6dOZepWz5Xc3FzKysoie3t72rZtW61jX58uERHNmTOHJkyYQMXFxdSlSxf66aefapWpZtvu7u6UlZVFJSUlRES0f/9+Sk1NJbVaTfv37yeBQEApKSlERHTo0CGSyWR09epV0mg0lJycTDExMcx1qJ5vDbFgwQIaMGAAJSYmklqtpujoaMrNzSWVSkWdO3emzz//nMrLyykqKoqMjY3p7NmzRFR1H3F1ddW6tqWlpcx5FQoFBQYGElHz5nxzdbIu5HI5OTo6Unx8PCmVSvL09KTp06cTEdHBgwepR48eTN2MjAzS09OjjIyMOs9XVFREurq6dOnSJSIievr0Kd2+fZuIiKKjo0kqldLly5dJrVbTsWPHSCqVUk5ODhFV6alcLqfY2FgqLi6mQYMGkZ2dHW3bto0qKytp/fr11LlzZ6atlszfwMBAUigUWrLX1I+bN2+SUCik48ePU2VlJeXk5DD9qIua174aLy8vWr9+PbMdHBzMzM1qGf38/Ki0tJRu3rxJRkZGdP78eSIi2rRpE/Xp04cSEhJIpVLR8uXLaeDAgUREdOrUKbK2tqb09HQiInry5AnFx8e/IFNxcTFxuVzmnLXx119/kUAgoOPHj1NFRQXt2LGDxGIxc118fX1JIpHQtWvXSKVS0ZQpU8jOzo6CgoJIpVLRvn37SCwWM/OzOfUbuj/OmDGDioqKKD4+nsRicb39ae805j21XVlMPo+ZSB+je1ggaFw3LBndFb4DOmJEVxl620rQ3VKM3rYSjOgqg++AjlgyuisCx3XDqO7mMBPpw8XFBVwul3GJZGF5FVQncrqVlAe5cYfmxUqtBZE+D3LjDriZlIewa0kor1QjNzcX4eHhyMrKAp/Ph4WFBW7dutUq7bG8OmQyGR4/foz58+ejpKQEf/75J/T09FBcXNzWorGwsLA0i7Vr16KoqAg3btzA1KlT233M3GvXrqG4uBhLly4Fj8fDyJEjMWrUKK06ly9fRmFhIQ4fPoxRo0ZpxdeysrKCn58fdHV1MW3aNOTm5mLhwoUQi8Wwt7eHm5sbbt++XWf7PXv2xPTp06GrqwtfX1/Y2dnhxIkTTPmMGTPQq1cvcLlc6OnpvXA8l8vFypUrwefz4eXlBT6fj6lTp8LCwgKmpqYYOXJkve3Xx7Rp0xAeHg4AUKvV2Lt3L3x8fAAAmzdvxpIlS+Ds7AwdHR189NFH4HA4uH79OnR1dUFE+PPPP1FaWgoLCwt069at1jZ2796NFStWwMzMDGZmZlrWRwAwduxYODg4gMPhYPDgwRg1ahQuXbpUp8z29vYYOXIk9PT0YGJigkWLFmlZoTaVjz/+GPb29uDxeNDV1W1QHi6Xi9WrV4PP56N///5wcnJi4kby+XwkJSUhPT0d+vr6GDhwYK1tqtVq7N69G1999RUkEgnMzc2xcuVKhIaGatULCgqCoaEhY+XcFF2QSCSYPHkyDAwMIBQKsWzZsiaN086dOxEYGAhLS0uIxWIEBwdj7969WlaRAQEBMDY2hpmZGTw9PevUw/p0CQA2btyImJgYuLm5oWvXrpg3b16D8i1duhRmZmbM2EyePBlWVlbgcrmYPHkyunTpwnxnbtmyBZ999hkGDBgADocDGxsbdO3atdFjAVS5+m/duhUbN26EXC4Hl8tFz549YWxsjMjISOTl5SEwMBB8Ph8uLi6YO3cuduzY0aQ2qmnqnG8NnXye6nkhEomwZs0a7N27FxqNBl5eXsjMzGTaDw8Px7BhwxhL7brg8XiIiYmBUqmEVCpF7969AQA//fQT5s6di0GDBoHL5WLcuHHo1asXTp48yRw7c+ZMODk5wcDAABMnTgSfz8fs2bOho6ODqVOn4tGjR0zs3JbM34b4+eef4evrC09PT+jo6MDExITpR1OYNWsWwsLCmO2wsDD4+voy2xqNBmvXroW+vj769u0LX19f7Nq1C0DVXFq9ejU6duwIHo+HoKAgREVFITk5GXw+H2VlZXjw4AEqKipgZ2cH+/8mHK5JXl4eNBoNLC0t65Rx7969GDVqFDw9PaGrq4sZM2bAyckJR44cYepMmDABbm5u4PF4UCgUyM7OxooVK8Dj8fD++++jtLQU8fHxTa7f2PvjypUr0aFDB9jb22PIkCHNfg6+KbTrhclqdHW4MBfro6eNBGOcLeDdzxbT3eTw7meLMc4W6GkjgblYXyuLMZfLxZgxY/D777+3eRBuljcDjYawNyoF91uaXb4OdLgcyKUGuJdWgH03U7Bhwze4f/8+BAIBhEIhDA0NkZOTU6+bC8vfC41GAz09PQgEAmRkZGD8+PGIiorCDz/8AH39pic9YmFhYfm7wOFw0K9fP+jr6yMwMLCtxXmppKenMwsW1cjl8hfq8fl8TJgwASdPnsTRo0eZ/TU/uA0MDGrdV9+7rK2trda2XC7XimFZmyw1kUql4PH+90OqgYFBk9qvDx8fH5w4cQJKpRLnzp2Dvr4+Bg8eDKDKXc7f3x8SiYT5y8jIQFpaGjp16oQdO3Zg06ZNkMlk8PT0RFxcXK1tpKena43B8/2NiIiAm5sbpFIpJBIJTpw4wbj11kZWVha8vb1hZWUFQ0NDZrG4uTRVHqlUCj6fz2zXHP9ff/0VJSUl6NOnD7p3715n4ozc3FyoVCrGXRSocqPOyMjQCo/1vGxN0YWSkhJ89NFHkMvlMDQ0xODBg1FUVMS4QzZEWlraC/Kp1WqtRESN1cP6dAkAOnToAB8fH9y/fx/+/v6Nku/5sQkNDUWvXr2Y89+/f5+5bsnJyS3OQp+bm4vS0tJaz5OWlgZra2utHzQ6duzY7Fi1TZ3zraGTz/P8nFWpVMjJyQGPx8O0adOYRaLQ0FCtRbXa6NChA44ePYpDhw7BxsYG7u7ujFtwYmIivv/+ey3duH79OtLT05njn+97bffk6v62ZP42RGvoEQCMGzcO6enpiI6Oxp07d5Camorx48cz5WKxGGKxmNmu+cxITEyEQqFgxsrExARcLhepqakYOnQoVq5ciYCAAJiamsLb21trHKsxMjICl8uttaya5+c/8KJOP38dTExMtOaAQCDQGtvG1m/s/bG1noNvCm/EwmRzsbOzQ6dOnV6INcPC8jKISnyGqMRnsJEavLS4plwuB7ZSA9xIeAZpl37YuHEjvvvuO3z33XfYunUrli9fXqs1BMvfk+oYoMHBweDz+Rg3bhzGjBmDffv24b333sORI0ewc+fONpaShYWFpflUVlZqWTS0RywtLZGWlqYV47k61l5ttPaYPN9WcnIyrKysmO2XGW+6oXPb29ujV69eOHDgAHbu3ImpU6cyx9ja2uKHH35Afn4+81dSUoIPPvgAADBlyhT8/vvvyMzMRKdOnfDhhx/W2oalpaXWGNT8v7y8HO+99x4WLlyIrKws5Ofnw9PTk/n4rE3+gIAAaDQa3Lt3D0qlEuHh4Y1KWlPXWNTc35A8DdGpUyfs3LkTWVlZ+PHHHzF//vxaY7iamJiAz+cjMTGR2ZeYmAgLCwsteVqiGxs2bEBMTAyuX7/OJLsCUO/Y1sTKyuoF+bhcboOWcbXRkC7FxcXhm2++wcyZM/HJJ5+goqKCObYx1y0pKQlz587Fpk2b8PTpU+Tn58PZ2Znpq62tbZ2xdBs7xiYmJhAIBLWex8rKCqmpqVrJqxITE7XmeTXVWelLSkqYfa2Rdb4uGquTz/P8nOXxeDA1NQVQZfG3e/du3Lp1C8nJyZgwYUKD5xs+fDhOnTqFnJwceHp6YsqUKQCqrs3ixYu1dKPawr2ptHT+NqQL9elRU87J4/Hg4+OD0NBQhIaGQqFQaH0fFhQUQKlUMts1nxm2trY4evSo1niVlpZiwIABAIB//OMfuHHjBhISElBZWYklS5a80L6BgQEGDhyIAwcO1Cn38/MfqFunW5vG3h9Zmga7MNkAI0eOxIMHD5igrSwsL4PconIcik6DqUiv1S0ln0eHy4GpSA9PDTuh7wB3ODo6wsTEBLq6ui+1XZbWp/rhZ2FhgYsXL+Lq1au4fv06EhMTkZSUhOzsbISEhLSxlCwsLCyNIy8vD2FhYVAqldBoNLh69So2b96M4cOHt7VoL5W3334bAoEAwcHBqKiowLlz55gEHzdv3sSFCxdQXl4OlUqFbdu24dq1a/Dw8Gi19u/evYtdu3ahsrISYWFhiI+Px9ixY1vt/PUhk8kaXGT18fHBL7/8gsOHDzNu3ADg5+eHdevW4e7duyAiFBUV4dixYygsLMTDhw9x7tw5lJWVQU9PD0KhUMvypSYKhQJffvklcnJykJOTg6+++oopU6lUKC8vh6mpKXR1dXHmzBmcOXNGS/68vDyt5DaFhYXo0KEDxGIx0tPTG0y60ZSxaEiehggNDUV2djY4HA4kEgk4HE6t46KjowNvb28sW7YM+fn5yMrKwsqVK5kkNK1BYWEhBAIBJBIJ8vLysGrVKq3yhsZj2rRpWLVqFTIyMqBUKrF06VIoFAota7PGUp8uqVQqTJs2DUuXLsW2bdsgFArxxRdfNFpOAEx4neqFs9DQUPz5559M+bx587BhwwZcv34dRITU1FTGwlcmk+HJkycN9oHL5WLu3LlYtGgRkpOTQUS4e/cunj59CldXV0gkEqxatQoqlQq3b9/Gtm3bar2eJiYmsLa2xo4dO6BWq3HkyJGXmlSxsTr5PCEhIUhISEBhYSFWrFgBhULBWJ337NkTVlZWmD17Nry9vRs0usjKysLhw4dRVFQEHo8HkUjEyDBv3jxs3boVV65cgUajQWlpKS5cuNCstYGWzl+ZTIbExEStH7FqMnfuXISGhiIiIgJqdVXYrobcwGUyGdLS0l6wVJ41axZ27dqF3bt3Y+bMmVplXC4XAQEBKC8vx507d7Bjxw5mEd/Pzw/Lly9n5kReXh727dsHAIiKisK1a9egUqlgYGAAAwODOq91cHAwwsLCsHbtWuTk5AAAHj58iKlTpyI/Px9TpkzB6dOnERERgcrKSuzcuRNxcXGNWoRuKa/i/vgmwi5MNoBEIsHAgQMRERHR6F8zWFiayvG76ahQa1otpmRDiPR5qFBrcPxeRsOVWf72GBgY4MmTJ7C1tcWkSZOwadMmzJ49Gx9++CE2btyo9Qs5CwsLy98VDoeD3377DXK5HGKxGHPmzMGnn36Kjz/+uK1Fe6nweDwcOXIEBw4cgJGREb799lvmA6eiogKffvopTExMIJPJ8PPPP+PAgQPNihsGVMWqrLaIqmb8+PE4efIkjIyMsHbtWhw6dAhSqbTF/WoMy5YtQ3BwMCQSCT7//PNa6ygUCty8eROdO3fWihM5YcIEBAYGwtfXFxKJBJ07d2biopWXl2P58uUwNTWFsbExrl+/ji1btgB4cQy++OILdO3aFY6OjhgwYADzgQ0AIpEImzZtYmKdhoWFwcvLiyl3cnKCj48PHBwcIJFIEBsbi8DAQNy7dw8SiQRjx47Fu+++26ixWLBgAY4ePQojI6M6XU8bkqchzp49C2dnZwiFQrz//vsICQmBnZ1drXWr3eC7dOmC3r17o3///loLci1l4cKFUKlUMDU1haurK0aMGKFV3tB4BAQEwN3dHS4uLnBwcIChoWGjf4xNTk6GUChkrO7q06XPP/8cIpEIixcvBpfLRWhoKLZu3crEw2yMDnfr1g3+/v4YOHAgZDIZoqOjGQsyAJg0aRJWr16N2bNnw9DQEEOHDkVSUhIAYM6cOXj06BGMjIwwdOjQevsVHBwMV1dXDBo0CGKxGHPnzkVpaSl4PB6OHTuGy5cvw8zMDAqFAmvWrMHIkSNrPc+2bdvwww8/QCqV4sSJE03SsabSFJ2syYwZMzB+/HjGRb1mZmigamHt3r17DbpxA1Whkb777jtYW1tDIpFg+/btTMzEPn36YMeOHfD394exsTHkcjk2bNhQ5+JgfbR0/r7//vvg8XgwNjZGp06dXih3cXFBeHg4AgICmAzid+/erfec77zzDnr16gULCwtIJBJmEf2tt96ClZUVJBIJXF1dtY4xMTGBXC6HjY0Nxo0bhxUrVmDYsGEAgH/961/w9vbGuHHjYGhoiJ49e+L06dMAqjJZz58/H8bGxrC0tERBQQHWrVsHoCoWaPfu3Zk23NzccPHiRfzxxx/o3LkzJBIJFAoFXF1dIRKJ4OjoiH379mHJkiUwNjbGxo0bceLECZiYmDR6PFvCy74/volwiF1ta5CKigr8+OOPcHd3b/aLIAtLXWQXlmFtRCwsxQKtOKeNpSj/Gf44vhsajQa9PcZCZvvig6o2KtUapBeUYtmYrjATsfEI2wOPHz/Gnj17EBUVhfHjx2P06NGvxKWBhYWlbVEqlRCLxSgoKIChoWFbi/PKSE1NhY2NzRvX79YkKCgIcXFx2LNnT1uLwsLCwtJqHD16FP7+/nj48GFbi/LaMmnSJPTr1w/Lli1j9l28eBHe3t4v1b2fpf3RmPdU1mKyEVRnSDx//jzKysraWhyWdsatpDwAnGYtShIRYq5fAJfbsMvD81S1x8HtpLwG67L8/cnMzMSuXbtw+/ZtyOVymJmZYfny5QCq4pGxsLCwsLCwsLCwtHfKysrw/fffY/78+W0tymtLbGwszp49i9mzZ7e1KCxvCGxQuUbStWtXREVF4dKlS3WavbOwNJVKtQZX45/CuEPT4+EAQNrjGBQ8zYZdjz54fPdGk4837sDHlfinGN5V1qyFUZa2h4jA4XAQExODlJQUhIaGYvHixXBzc2MsYNj4oSwsbwZpaWlaAenbOxkZVeFI3rR+tyZKpRIlJSVsLPVXQFpaGt55551ay0JDQ19wl2RhqY0uXbrUun/dunWYOHHiK5bm5RAZGYkZM2bUWvaf//ynXm+gK1euYPbs2ejTpw/Gjx/P3Nve5Pk3ffp03Ljx4nfixIkTGVfqmnz66ac4efIkFi9ejIqKCq3nQ05ODtRqNfvMYGkShYWFDdZhXbmbQFZWFn7++WfMnz//lcUvYGnfZBaUYd2pWNhKDZqcxau8tARXjuyEXfc+0BN0wP0/zjXJlRuoWtRKeVaCJaO7wlzMunO/zpSUlGDixIk4cuQInJyc4ODggPXr17PhJ1hY3gDy8/Nhbm7+QvD6NwE+nw+VStXWYrCwsLCwsLCwsNSBnp4eMjMzIZFIai1nzWiagEwmQ9++fREREQEfHx82HTxLi8lSloELTrN06eGtq+DrC9CxW29kJPzVrPY5HA444CBLWcYuTL7GEBEMDAwwefJk6Ovr49atWzAyMkJOTg6ys7NhZmYGjUbDZCtkYWFpX3C5XJSXlyMlJeWNirWYlpaGbt26tct+BwYGIjs7G5s3b25rUV5g4cKFMDMzQ0BAQJOP/eOPP+Dn51dvQgZLS0tcunQJDg4OLRGz2fz111/o168fCgoK2qT9tiI+Ph6zZs1CfHw8FixYgMWLF7e1SC/g6uqKdevWNZgEhqVphIeH49dff8X58+fbWpRG4ezsjG+//RbDhw9va1HanJbcj1lYXhVKpRI2Njb1fouyC5NNxMPDA99//z3++uuvOk3pWVgaS3p+KdCM9e2nGSlIfxIHl+ETwNVpenxJLThVcvS0qf3XC5a/P9UL2z4+Pti5cycePHiAyMhIxMXFwc/Pr84skSwsLO0LQ0PDdrNAl5uby1h/X79+vdY61e7b7anf1ejp6YHH44HL5WLs2LGIiYmBSqWCvb09Vq5ciQkTJrSZbL/++muzjx09ejQSEhKY7Y4dO2LLli0YPXo0s6+oqKhF8rWU6ozdjdGp1kwE4eHhAW9v7zaLixcSEoKBAwciOjq6Tdp/ntrGIzY2tg0lqp3adLg1mTlzJszNzWt1uW0tBAIBdNCkjBQAACAASURBVHR0Xpv7KIfDgYGBwWsjb2tRW7KyltyPAWD79u3YsmVLnc/ZtuT27dsICgrClStXoNFo0LFjR/j6+uKTTz6BTku/f1n+drDmM01EIBBg2LBhOHXqFJtQgqXF5JeqwGtibEe1uhIxkRdhLndAuYbb4l/0eTpcFJRVtOgcLG1LdUSO2NhYXLhwAf3798fu3btx9epVFBcXAwBrLcnCwvJa4e/vj27durW1GG2Onp4etmzZgqysLCiVSmzevBk+Pj5IS0t75bIQEdRq9Stvl6UKjUaDlxmBKyEhAc7Ozs06lv0mqht2bFhYmk5kZCSGDBmC/v37IzY2Fvn5+di1axciIyMbFa+Q5fWD/VJtBr1794a+vj6uXbvW1qKwvOZUqglN9eJO+PMWykuKYdO1NyJv3Ghx8GEOB6hQs6FmX2eqLSb79OmDbdu2YeLEiZDJZDA1NYVcLmfjr7GwsLxW/P7773j06BFmzZrV1qK8Mu7du4d+/fpBJBLB09MTeXl5AAAej4du3bpBR0eHSXZWUVGBpKQkAFXWLm5ubli6dCmkUimsra1x/PhxnD59Gl26dIFEIsFnn31WZ7tBQUGYNGkSfHx8IBKJ0L17d/z+++9MuYeHBwICAuDh4YEOHTrgxo0bmDlzJpYuXQqgymLQ3NwcGzduhIWFBYyNjfHLL7/g1q1b6NWrF8RiMaZPn84szlTXB4APPvgAycnJmDhxIoRCIZYvXw6g6pkWFxeHGzduQCqVaj3Dbty4ASMjIyae6s6dO9GjRw9IJBIMHjwYDx48YOp+/fXXsLGxgUgkgr29vZaVUU3Kysowd+5cSKVSdO7cGefOndMqDw0NRffu3Znz/PjjjwCAgoICjBkzBtnZ2RAKhRAKhYiNjUVCQgKGDRsGY2NjmJiY4IMPPmCuZ10sWbIEly9fxsKFCyEUCjF9+nQAVdZ4wcHB6Nu3LwwMDJCZmVmnPDXH9/vvv4eFhQXMzMywfv16pvzmzZtwdXWFoaEhTE1NMW3aNADAkCFDcOHCBab9qKgoKJVKzJkzB+bm5rC2tsa///1vZtwTExPB4XCwfft22NnZ4a233mqyLiiVSnh5ecHMzAxGRkYYO3YskpOTGxyPU6dOAQBUKhX8/f1hbW0NmUyGmTNnav1Yz+FwsHXrVjg5OUEsFsPHx6fe96G6dOnQoUOwtLREbm4uADDhch4+fFivDoeEhMDJyYmJp7Z+/Xo4ODhAJBKha9euOHjw4At61qNHD4hEIjg4OODUqVMICQlBeHg4vvnmGwiFQgwZMqRePSovL8eyZctgZ2cHkUgEFxcXpKSkAKgKUTBixAgYGRnB0dER27Ztq/Uc1de2rKyM2eft7Y2goCAAzZ/zTdXJhrhz5w6cnZ0hFovx3nvvIT8/HwDg5eWF4OBgrbpjxozB119/Xe/5wsLCmOtjY2ODb7/9lik7ffo0XFxcIJFI0KdPH1y+fJkp8/DwwPLly+Hu7o4OHTrAw8MD2dnZ+PTTTyGVSmFvb4+LFy8y9Zs7f48fP44vv/wSBw8ehFAoRMeOHQFA634MABEREXBxcYFYLIaNjQ22b99eZ5/v37+P+fPnIyoqirmHJSUlQV9fn0kuB1QlmtPX10dmZiYj4/r162FmZgZra2ts3LiRqUtE+Oabb+Do6AipVIqxY8cy36pEBH9/f8hkMhgaGsLJyUlrbGri7++P6dOnY8WKFZDJZACAbt26Yc+ePcycioiIQM+ePSEWi+Hq6qq1NjNz5kz4+fnh3XffhVAoRO/evREfH4/g4GCYm5vDwsICe/fubXb9xtwfw8LCYGdnByMjIyxatKjO68DyX4ilWSQlJdGaNWuooKCgrUVheY3ZFZlIiw/cpW/OPmzU35dHbtGYmQtozqotNDFgM01Y9iNNXxtG//p2F42c/jEt+uEArToYSV+fimn0ORcfuEu7byS19VCwtBJFRUW0adMm6t+/P/Xq1YtWr15N9+7dIyIitVrdxtKxsLC8DAoKCghAu3gnKS8vpx49elB0dDT99ttv5OrqWmfdlJSUdtFvlUpFHTt2pDVr1pBKpaLTp0+TQCAgX19fps6gQYOIz+cTABoxYgRVVlYSEdFvv/1Gurq6FBISQhUVFbRp0yYyMTEhhUJB+fn5FB8fTyKRiK5evVpr24GBgaSjo0OhoaFUUVFB27dvJ7FYTM+ePSMiInd3d7KwsKA7d+6QWq2msrIy8vX1pSVLlhAR0YULF0hHR4eWLVtG5eXldPToUdLT0yMvLy9KT0+n7OxsksvlFB4eztSXyWRM+3K5nCIiIrRkAkCxsbFEROTo6EiHDh1iyj755BP68MMPiYjo6NGjJJfL6d69e1RZWUlbtmwhOzs7Ki8vp7i4OBIIBBQXF0dEROnp6fTgwYNaxyAgIID69+9PWVlZlJWVRW5ublTzE+nEiRP06NEj0mg0dOnSJRIIBHTjxo1a+0NEFB8fT6dPn6aysjLKyckhd3d3+uc//1lr2zVxd3enzZs3a+2Ty+XUvXt3io+PJ5VKRRUVFQ3Ko6OjQ/7+/lReXk6RkZHE4/Ho0aNHRETk5uZGq1evJrVaTaWlpXTlypU62/f19aVRo0ZRXl4eZWRkUP/+/SkgIICIiBISEggAKRQKKigooJKSkibrQl5eHu3fv5+Ki4upsLCQFAoFeXp6Njge1foSGBhIvXv3prS0NMrPzycvLy+aOnUqU7d6ruTm5lJWVhbZ29vTtm3bah37+nSJiGjOnDk0YcIEKi4upi5dutBPP/1Uq0w123Z3d6esrCwqKSkhIqL9+/dTamoqqdVq2r9/PwkEAkpJSSEiokOHDpFMJqOrV6+SRqOh5ORkiomJYa5D9XxriAULFtCAAQMoMTGR1Go1RUdHU25uLqlUKurcuTN9/vnnVF5eTlFRUWRsbExnz54lItK611Zf29LSUua8CoWCAgMDiah5c765OlkXcrmcHB0dKT4+npRKJXl6etL06dOJiOjgwYPUo0cPpm5GRgbp6elRRkZGnecrKioiXV1dunTpEhERPX36lG7fvk1ERNHR0SSVSuny5cukVqvp2LFjJJVKKScnh4iq9FQul1NsbCwVFxfToEGDyM7OjrZt20aVlZW0fv166ty5M9NWS+ZvYGAgKRQKLdlr6sfNmzdJKBTS8ePHqbKyknJycph+1EVtz1kvLy9av349sx0cHMzMzWoZ/fz8qLS0lG7evElGRkZ0/vx5IiLatGkT9enThxISEkilUtHy5ctp4MCBRER06tQpsra2pvT0dCIievLkCcXHx78gU3FxMXG5XOactfHXX3+RQCCg48ePU0VFBe3YsYPEYjFzXXx9fUkikdC1a9dIpVLRlClTyM7OjoKCgkilUtG+fftILBYz87M59Ru6P86YMYOKioooPj6exGJxvf1p7zTmPZW1mGwmtra26NKlC86ePdvWorC8xkgEfFSoNY2uryorhbqyEneuXsCTqPPIjYtC9PnDeHj7DwDAg8iLuHw4DCVFjXfvrlBrINbnNVl2lr8nUVFRKC4uxtixY/HPf/4T/fr1Y345ppfoAsbCwsLSGqxbtw7Dhw9Hz54921qUV8a1a9dQXFyMpUuXgsfjYeTIkRg1apRWncuXL6OwsBCHDx/GqFGjtOJrWVlZwc/PD7q6upg2bRpyc3OxcOFCiMVi2Nvbw83NDbdv366z/Z49e2L69OnQ1dWFr68v7OzscOLECaZ8xowZ6NWrF7hcLvT09F44nsvlYuXKleDz+fDy8gKfz8fUqVNhYWEBU1NTjBw5st7262PatGkIDw8HAKjVauzduxc+Pj4AgM2bN2PJkiVwdnaGjo4OPvroI3A4HFy/fh26urogIvz5558oLS2FhYVFnaEBdu/ejRUrVsDMzAxmZmZa1kcAMHbsWDg4OIDD4WDw4MEYNWoULl26VKfM9vb2GDlyJPT09GBiYoJFixZpWaE2lY8//hj29vbg8XjQ1dVtUB4ul4vVq1eDz+ejf//+cHJyYuJG8vl8JCUlIT09Hfr6+hg4cGCtbarVauzevRtfffUVJBIJzM3NsXLlSoSGhmrVCwoKgqGhIQQCAdN2Y3VBIpFg8uTJMDAwgFAoxLJly5o0Tjt37kRgYCAsLS0hFosRHByMvXv3allFBgQEwNjYGGZmZvD09KxTD+vTJQDYuHEjYmJi4Obmhq5du2LevHkNyrd06VKYmZkxYzN58mRYWVmBy+Vi8uTJ6NKlCyIjIwEAW7ZswWeffYYBAwaAw+HAxsYGXbt2bfRYAFWu/lu3bsXGjRshl8vB5XLRs2dPGBsbIzIyEnl5eQgMDASfz4eLiwvmzp2LHTt2NKmNapo651tDJ5+nel6IRCKsWbMGe/fuhUajgZeXFzIzM5n2w8PDMWzYMMZSuy54PB5iYmKgVCohlUrRu3dvAMBPP/2EuXPnYtCgQeByuRg3bhx69eqFkydPMsfOnDkTTk5OMDAwwMSJE8Hn8zF79mzo6Ohg6tSpePToERM7tyXztyF+/vln+Pr6wtPTEzo6OjAxMWH60RRmzZqFsLAwZjssLAy+vr7Mtkajwdq1a6Gvr4++ffvC19cXu3btAlA1l1avXo2OHTuCx+MhKCgIUVFRSE5OBp/PR1lZGR48eICKigrY2dnB3t7+hfbz8vKg0WhgaWlZp4x79+7FqFGj4OnpCV1dXcyYMQNOTk44cuQIU2fChAlwc3MDj8eDQqFAdnY2VqxYAR6Ph/fffx+lpaWIj49vcv3G3h9XrlyJDh06wN7eHkOGDGn2c/BNgV2YbAEjRozAw4cPGbcDFpamYikRAE1YKxIIDWHVzQUmnZzxziQf9HQfAwvH3rDt8hYAwK57H/RyHwN9gbDxJ6X/ysHyWlO96FhUVITExET84x//wH/+8x8QERMzig0UzcLC8nfm8ePH2L59O1auXNnWorxS0tPTmQWLauRy+Qv1+Hw+JkyYgJMnT+Lo0aPM/pof3AYGBrXuqy+hjK2trda2XC7XimFZmyw1kUql4PH+9wOngYFBk9qvDx8fH5w4cQJKpRLnzp2Dvr4+Bg8eDKDKXc7f3x8SiYT5y8jIQFpaGjp16oQdO3Zg06ZNkMlk8PT0RFxcXK1tpKena43B8/2NiIiAm5sbpFIpJBIJTpw4wbj11kZWVha8vb1hZWUFQ0NDZrG4uTRVHqlUCj6fz2zXHP9ff/0VJSUl6NOnD7p3715n4ozc3FyoVCrGXRSocqPOyMjQ+pHzedmaogslJSX46KOPIJfLYWhoiMGDB6OoqIhxh2yItLS0F+RTq9VaiYgaq4f16RIAdOjQAT4+Prh//z78/f0bJd/zYxMaGopevXox579//z5z3ZKTk1uchT43NxelpaW1nictLQ3W1tZa74EdO3Zsdqzaps751tDJ53l+zqpUKuTk5IDH42HatGnMIlFoaKjWolptdOjQAUePHsWhQ4dgY2MDd3d3xi04MTER33//vZZuXL9+Henp6czxz/e9tntydX9bMn8bojX0CADGjRuH9PR0REdH486dO0hNTcX48eOZcrFYDLFYzGzXfGYkJiZCoVAwY2ViYgIul4vU1FQMHToUK1euREBAAExNTeHt7a01jtUYGRmBy+XWWlbN8/MfeFGnn78OJiYmWnNAIBBojW1j6zf2/thaz8E3BXZhsgVUP0TPnz/PWiKxNAuZoT40oEbrT7mqAvmlari5D4eNQ1fIHbtD38gUQokUACAxNYe53AG6NR5o9UFEIBBkhvrN7gPL34PqOJMeHh7Iz8+HUChETk4OTpw4gcWLF7exdCwsLCwNc+XKFWRmZsLR0RHm5uZYsGABbt++DXNzcyYDd3vE0tISaWlp0Gj+50FR34/elZWVWlYeLeX5tpKTk2FlZcVsVz9fXgYNndve3h69evXCgQMHsHPnTkydOpU5xtbWFj/88APy8/OZv5KSEnzwwQcAgClTpuD3339HZmYmOnXqhA8//LDWNiwtLbXGoOb/5eXleO+997Bw4UJkZWUhPz8fnp6ezHtbbfIHBARAo9Hg3r17UCqVCA8Pb9R7Xl1jUXN/Q/I0RKdOnbBz505kZWXhxx9/xPz58/H48eMX6pmYmIDP5yMxMZHZl5iYCAsLCy15WqIbGzZsQExMDK5fvw6lUsnE7atvbGtiZWX1gnxcLrdBy7jaaEiX4uLi8M0332DmzJn45JNPUFHxv6SRjbluSUlJmDt3LjZt2oSnT58iPz8fzs7OTF9tbW1rvQ71nf95TExMIBAIaj2PlZUVUlNTtZJXJSYmas3zaqqz0peUlDD7WiPrfF00Vief5/k5y+PxYGpqCqDK4m/37t24desWkpOTMWHChAbPN3z4cJw6dQo5OTnw9PTElClTAFRdm8WLF2vpRrWFe1Np6fxtSBfq06OmnJPH48HHxwehoaEIDQ2FQqHQspYvKCjQeibXfGbY2tri6NGjWuNVWlqKAQMGAAD+8Y9/4MaNG0hISEBlZSWWLFnyQvsGBgYYOHAgDhw4UKfcz89/oG6dbm0ae39kaRrswmQLGTBgAD744AN2YZKlWZgI+RDq81CiajjLpbqyEg8ePICtrQ0kRkYAAJ6uLnR1dFHezOQmJSo1OujzYCJs3EImy98foVCIoKAgCAQCnD17FoGBgQgLC0NsbCwAaH34srCwsPydUCgUePLkCaKjoxEdHY3/+7//g7OzM6KjoyESidpavJfG22+/DYFAgODgYFRUVODcuXNMgo+bN2/iwoULKC8vh0qlwrZt23Dt2jV4eHi0Wvt3797Frl27UFlZibCwMMTHx2Ps2LGtdv76kMlkDS6y+vj44JdffsHhw4cZN24A8PPzw7p163D37l0QEYqKinDs2DEUFhbi4cOHOHfuHMrKyqCnpwehUFin14BCocCXX36JnJwc5OTk4KuvvmLKVCoVysvLYWpqCl1dXZw5cwZnzpzRkj8vL08ruU1hYSE6dOgAsViM9PT0BpNuNGUsGpKnIUJDQ5GdnQ0OhwOJRAIOh1PruOjo6MDb2xvLli1Dfn4+srKysHLlSiYJTWtQWFgIgUAAiUSCvLw8rFq1Squ8ofGYNm0aVq1ahYyMDCiVSixduhQKhULL2qyx1KdLKpUK06ZNw9KlS7Ft2zYIhUJ88cUXjZYTAIqLiwGAWTgLDQ3Fn3/+yZTPmzcPGzZswPXr10FESE1NZSx8ZTIZnjx50mAfuFwu5s6di0WLFiE5ORlEhLt37+Lp06dwdXWFRCLBqlWroFKpcPv2bWzbtq3W62liYgJra2vs2LEDarUaR44ceakJXxurk88TEhKChIQEFBYWYsWKFVAoFIzVec+ePWFlZYXZs2fD29u71hAUNcnKysLhw4dRVFQEHo8HkUjEyDBv3jxs3boVV65cgUajQWlpKS5cuNCs5KMtnb8ymQyJiYl1vsvPnTsXoaGhiIiIgFqtRm5uboNu4DKZDGlpaS9YKs+aNQu7du3C7t27MXPmTK0yLpeLgIAAlJeX486dO9ixYweziO/n54fly5czcyIvLw/79u0DUBVu6tq1a1CpVDAwMICBgUGd1zo4OBhhYWFYu3YtcnJyAAAPHz7E1KlTkZ+fjylTpuD06dOIiIhAZWUldu7cibi4uEYtQreUV3F/fBNhFyZbiI6ODvT19bXcb1hYGouuDhcDOxnjaXEDC4tEePjwL+jr68O2pmsIh1P1Umdui9Ez/gWZbacmtf+0WIVBnYyhq8Pqb3uisrISs2bNwqRJkzBhwgRs2LBBK+YKCwsLy98RgUAAc3Nz5k8sFoPH48Hc3LxdWyHweDwcOXIEBw4cgJGREb799lvmA6eiogKffvopTExMIJPJ8PPPP+PAgQPNihsGVMWqrLaIqmb8+PE4efIkjIyMsHbtWhw6dAhSqbTF/WoMy5YtQ3BwMCQSCT7//PNa6ygUCty8eROdO3fWihM5YcIEBAYGwtfXFxKJBJ07d2biopWXl2P58uUwNTWFsbExrl+/ji1btgB4cQy++OILdO3aFY6OjozBQTUikQibNm3C1KlTYWRkhLCwMHh5eTHlTk5O8PHxgYODAyQSCWJjYxEYGIh79+5BIpFg7NixePfddxs1FgsWLMDRo0dhZGRUp+tpQ/I0xNmzZ+Hs7AyhUIj3338fISEhsLOzq7VutRt8ly5d0Lt3b/Tv319rQa6lLFy4ECqVCqampnB1dcWIESO0yhsaj4CAALi7u8PFxQUODg4wNDRESEhIo9pOTk6GUChkrO7q06XPP/8cIpEIixcvBpfLRWhoKLZu3crEw2yMDnfr1g3+/v4YOHAgZDIZoqOjGQsyAJg0aRJWr16N2bNnw9DQEEOHDkVSUhIAYM6cOXj06BGMjIwwdOjQevsVHBwMV1dXDBo0CGKxGHPnzkVpaSl4PB6OHTuGy5cvw8zMDAqFAmvWrMHIkSNrPc+2bdvwww8/QCqV4sSJE03SsabSFJ2syYwZMzB+/HjGRb1mZmigamHt3r17DbpxA1U/2n/33XewtraGRCLB9u3bmZiJffr0wY4dO+Dv7w9jY2PI5XJs2LChWT/0t3T+vv/+++DxeDA2NkanTi9+87m4uCA8PBwBAQFMBvG7d+/We8533nkHvXr1goWFBSQSCbOI/tZbb8HKygoSiQSurq5ax5iYmEAul8PGxgbjxo3DihUrMGzYMADAv/71L3h7e2PcuHEwNDREz549cfr0aQBVmaznz58PY2NjWFpaoqCgAOvWrQNQFQu0e/fuTBtubm64ePEi/vjjD3Tu3BkSiQQKhQKurq4QiURwdHTEvn37sGTJEhgbG2Pjxo04ceIETExMGj2eLeFl3x/fRDjEmvqxsLQp2YVlWBsRC0uxoM4FwvS0NCQlJcPFpS94z/0S/ODBAxiKRLB5LkZUQ1SqNUgvKMWyMV1hJmJdudsDRAQOh4O4uDhER0fjnXfegZmZGVJSUjBnzhycOXOGqcPCwtJ+UCqVEIvFKCgogKGhYVuL88pITU2FjY3NG9fv1iQoKAhxcXHYs2dPW4vCwsLC0mocPXoU/v7+ePjwYVuL8toyadIk9OvXD8uWLWP2Xbx4Ed7e3i/VvZ+l/dGY91TdVyxTu0etViM/Px/GxsZtLQrLa4KZSB89rSS4n14AayODF8qLCgsRH/8Ezm85v7AoCVRZmJSWljW53UxlGXpZG7GLku2I6gVHJycnODk5obS0FDExMYiMjMTbb78NtVrNJsBhYWnHpKWltetYjM+TkZEB4M3rd2uiVCpRUlLSLLdEFhYWlr8j5eXlWL9+Pby9vdl7WzN59OgRzpw5g88//1xrDHNycqBWq9lxZWkShYWFDdZhLSZbkaKiIvD5fOzZsweTJ09msnCxsDTE06JyrD/zEAZ8HYj0/5flrrKyErdu3oKFhbm2C3cNMtLTkZ2Tg549eza6vcKyCpSo1PAf2QXGwvrjrrC8nmzatAl3795lgoEvWbLkBfc9FhaW9kF+fj7Mzc0bnc22PcHn86FqZpxlFhYWFhYWFhaWl4+enh4yMzMhkUhqLWcXJlsBtVqNBw8eIDo6Gh4eHrCxsWFdJVmaTOSTp9gZmQS5cQfocDkAER7ExECtVuMtZ2egDp3Kz8vDw4d/wdXNtdby51FrCMnPijGtvxyu9qxlb3tDo9GAy+Xi8ePHTObWsrIyFBUVoXv37nBxcWHqsLCwtA+qXWRSUlLeKJfmtLQ0dOvW7Y3rNwsLy6shMDAQ2dnZ2Lx5c1uL8gILFy6EmZkZAgICmnzsH3/8AT8/v3rjD1paWuLSpUtwcHBoiZjN5q+//kK/fv1QUFDQJu3XhlgsRlRUFBwdHV9qO66urli3bl2DMUWbSkpKClxdXZGQkNBgQqD2Tnh4OH799VecP3++rUV5I1AqlQ2G3mFduVuIUqnErVu3kJKSgoiICOzYsQN79+6FiYkJKisroavLDjFL4+jXUYqHWYW4mZQHudQAGelVrmkuffvWuSgJAPqC/8/evcflfP+PH390XR1EdV1dlUo6OjOEmcNkzIYJOWwoUfPZEGbMNspGNhszm81sWljO5rA5k7EZmUPMyBBmpRQq0kGn66rX7w/f3j8XlbOY1/12c7u5rvfr/X49X+/Xq+vwul4HSwoKCxAlJZjcprOppESQdCWPlh46Wno8moXtpUertMPRxcWFb775hsuXL/Pss8+SmprK1q1b+fHHH+UPJ5L0H2VjY/NEd9AFBwezfPlyo111T5w4gVs5ayiXTt9+0sstSdLjycLCAjMzM1QqFd26dePEiRMUFRXh5eXFlClTHskOwOX54Ycf7vncrl27kpCQoDz28PAgIiKCrl27Ks/l5ubeV3z3q3SWz528tj/KdQ+trKwe+vvNyZMnH8p1GzVqVGn1+jDqaNOmTUyePJl//vkHtVpNw4YNmTVrFi1btrwlv5KSEkJCQoiJiWHHjh1YWlqiVqvlZ4fHiOw1uw+5ubls3bqVlJQU9uzZQ1JSEu3atWPy5Ml8++23slNSuisqlQn9W7pSZCjhwJkLpCck4O3dpMx1JW9kYWGBCSYUFBZiaWlZbrri/+uUbOKiod+zrqhUsnPqv6y4uJhffvmFHTt2KM81a9aMnJwcrK2tKzEySZKk8r3zzjvKLp2SJEmPAwsLCyIiIqhXrx5qtZp9+/bRuXNn4uPjcXFxeaSxCCEoKSmRa4ZLT5WSkhJMTEyUwRVnz54lICCAVatW0blzZwoKCti9e3eZI0ENBgNBQUGcPHmS3bt3P9Sdu+XAtHsn5/LdBysrK0pKSliwYAG1a9fm119/5auvviI+Pp6DBw9WdnjSE8jCVE2/Zk4YLp7G0sEVlcXt1yk1MTGhSpUq5Ofnl5smp0DPucvXaOlhy6A27liYyg8z/2VCCKysrCgoKDD61XXNmjVy7VtJkiRJkqQyxMXF0bJlS6ytrfH19SUzMxMAMzMzuiSIRwAAIABJREFUGjZsiFqtRgiBiYkJer2ec+fOAbBw4UJat27NhAkT0Ol01KxZk02bNrFt2zbq1auHVqvl3XffLTff8PBw+vTpQ2BgINbW1jRq1Ihdu3Ypxzt06EBYWBgdOnSgWrVqxMbGEhwczIQJE4Dro9GcnJz4+uuvcXZ2xs7Ojvnz5/Pnn3/i7e2NRqNh0KBBGAwGo/QA/v7+JCUl0bt3b6ysrJg4cSJw/ftFfHw8sbGx6HQ6o7V8Y2NjsbW1VdY1Xrp0Kc888wxarRYfHx+OHz+upJ05cyaurq5YW1vj5eXFjz/+WOY9KCgo4I033kCn01GnTh2jH9YBFi9eTKNGjZTrfPvttwBkZWXxyiuvkJaWhpWVFVZWVpw8eZKEhAQ6deqEnZ0d9vb2+Pv7K/VZkaysLIYPH07NmjXRaDT4+PiU+R2rqKiIsLAwPD09sbe3JyAgwOj6/v7+ODs7K9c4duyYciw4OJgRI0bQp08frK2tadKkCUeOHFGOe3h4EB0dDVxvG3379uXNN99Eo9FQu3Zto3uTlJTEiy++iLW1Ne3atVPaSVkSExMxMTGhoOD6pqkdOnRg4sSJvPDCC1SrVo0OHTqQlpbGuHHj0Ol0eHl58fvvvyvnd+jQgdDQUNq2bYu1tTWdOnUiOTm5zGsDDBgwgPDw8HLrCCpuOx4eHsyYMYMWLVpQtWpVo9GWf/31F66urnTt2hWVSkXVqlXp2rUrTZo0MSpzYWEhffv2JSEhgd9++63cTskzZ87wyiuvYG9vT61atfjuu++UY4cOHaJt27ZotVqcnJwYMWKE0ZreJiYmfPfdd9SvXx+tVqvciyVLluDp6YmtrS1jx44tM1/p/5Mdk/epb9++fPzxx8yYMQONRsPmzZtp0aIFLVu2VNIUFxdXYoTSk0QIwdbNG3nRzZwxvs3JKyrmfGYehuKSCs+ztLSkoIw3TUNxCecz88grKiawlTsDn5Odkk+D0qWDFy1aRIMGDTh+/DiRkZEcPHiQy5cvG6WRJEl6nERGRqLT6WjatOl9TVWUJEm6G3q9Hj8/P3r37s2VK1d4++23WbJkiVEaHx8fqlSpQps2bWjfvj2tWv3/9d3//PNP3N3dSUtLY/z48bz++utERUURGxvL4cOHiYyMZO/eveXmv2HDBrp06UJmZibvv/8+fn5+Rh1dCxcu5KuvviI3N5fmzZvfcn5GRgaXLl3i3LlzLFy4kFGjRjFlyhS2bt3KP//8Q0xMDKtWrbrlvBUrVuDm5sbatWvJzc3lk08+MTr+3HPP4eDgwJYtW5Tnli1bxmuvvYaFhQUbN27kgw8+YMWKFVy+fJnAwEB69OhBUVERp06dYtKkSezYsYOcnBz++OOPWzqOSn388cccO3aM+Ph4/vjjD5YtW2Z03N7envXr15Odnc2iRYt47733OHjwIBqNhq1bt1K9enVyc3PJzc2lQYMGCCEYP348qampxMfHc+HCBT788MNy73+poKAgLl26xF9//cWVK1eYPn16meuyh4aGcvjwYQ4cOEBycjLm5uaMGjVKOd65c2dOnTpFWloazz33HP7+/kbnL1++nHfeeYerV6/y4osvMnr06HJj2rRpE926dePKlSuMHDmSIUOGKMf8/f1p2LAh6enpzJkzh6ioqNuW8UbLli3j+++/V3bbbt26NY0aNSI9PZ0RI0YwdOhQo/Q//PADc+bMIT09ndq1axMYGHjbPMqro4raTqnFixezevVqcnJycHBwUJ5v0aIFZ8+eZdSoUWzfvr3MtUj1ej2+vr7k5uayffv2cjddycvLo1OnTvTs2ZMLFy6wZcsWpk+fzvbt2wFQq9XMnDmTjIwMDhw4wK5du/jmm2+MrrFq1Sp2795Nenq68tyOHTv4+++/+fPPP4mKiuK333677b16msmOyftQUlKCubk5ffr0ISMjg2nTprF9+3ZGjx7NkSNHGD58OEII1Go1JSUVdyxJEsCBAwdISUmhb5/etKplz3ud69G4hobUrHySruRxrdBQZodSFcv/P2JSCMG1QgNJV/JIzcqniYuW9zrXo5WXnZy+/ZQo/QBVq1YtFi1axBtvvMHUqVMpKiri7bffruToJEmSyjZ69GhOnz5NWloaX331Fe+//z4//fRTZYclSdJTYN++fVy7do0JEyZgZmZG586d6dKli1GamJgYcnJyWLduHV26dDGaTu3i4kJISAimpqYMHDiQjIwMxowZg0ajwcvLi9atW3P48OFy82/atCmDBg3C1NSUoKAgPD092bx5s3J88ODBeHt7o1KpypyuqlKpmDJlCubm5vTo0QNzc3MCAgJwdnbGwcGBzp07V5h/RQYOHKh0FBYXF7Ny5UqlQ2ru3LmMHz+exo0bo1arGTZsGCYmJuzfvx9TU1OEEPz999/k5+fj7OxMw4YNy8xjxYoVfPDBB1SvXp3q1asro0FLdevWjdq1a2NiYoKPjw9dunRh9+7d5cbs5eVF586dsbCwwN7enrFjxxqNQi3LxYsXWb9+PZGRkTg4OKBWq3n++edvud9CCCIiIpg1axbVq1fH0tKSjz/+mNWrVysDkl5//XVsbGywsLBg0qRJHD9+XBkcANCrVy/atWuHWq1m8ODBFdZNmzZt6N27N2q1mqCgIJKTk8nIyCApKYl9+/bx6aefUqVKFby9vRk4cGCFZbxZcHAw9evXp2rVqvTu3Rtzc3OGDBmCWq0mICCAM2fOGK1LGRgYSPPmzalSpQqfffYZMTExnD9//q7yLFVR2yk1atQovLy8MDMzM5oi7enpSUxMDFeuXGHw4MHY29vTq1cvLl26pKTJzc0lJiaGwMBAqlWrVm4cmzZtwtnZmZCQEMzMzKhXrx5vvvkmK1asAK4vhdW2bVtMTU1xd3dn6NCht7SlCRMmKG2h1JQpU6hWrRpeXl60b9/+nv/+nhayY/I+3PjryR9//IGJiQlfffUVu3bt4rXXXiM1NZWPPvrolrSSVJbz58/z66+/8tprrynTbe2sLAh+3pPQVxrQpaEjBYYSkq/kkXw5j+QreVzMKuBSdgH5WJCSVag8X2gooWsjJ0JfaUBQWw/srJ7undeeVrm5ufz000/s27ePxo0bM3jwYE6dOkVubq7cAEeSpMdO8+bNsbe3x9TUlI4dOzJy5EhWr15d2WFJkvQUSE1NxcXFxeg7m7u7+y3pzM3N8fPzY8uWLWzYsEF5vnRqNKB8jr/5uYo2Hrl5ky93d3dSUlIqjOVGOp0OMzMzo/zuJv+KBAYGsnnzZrKzs9mxYwdVqlTBx8cHuD6F97333kOr1Sr/Lly4QEpKivID+ezZs3F0dMTX15f4+Pgy80hNTTW6BzeXd+vWrbRu3RqdTodWq2Xz5s1kZGSUG/OlS5cYMGAALi4u2NjYKJ3FFUlKSkKj0RiNzCtLeno6eXl5tGnTRilz48aNUalUXLx4keLiYiZMmECtWrWwsbFRynJj/jfXzbVr18rNr6y2lZubS2pqKhqNxmgDF1dX1wpjv921y8ur1I11pNVqsbGxMWqnd6OitlOqonbfsmVLli9fzoULFzhy5AhJSUmMGTNGOW5ra8tPP/3EyJEjWblyZYVx/PXXX0ZxzJgxQ5k6fvr0abp3746TkxM2NjaEhobe0pbKivNB/f09LeTKnPepdJ0RPz8/Xn75Zfr3709ubi579+7FwcEBHx8fhgwZctcvEtLTJT8/n9WrV9OxY8cy20p16yp0fcaZlxo4kpFbxKXsAlKv5pNVoEdfLHAyKyDhVDJBbT1wtKmCvZU5pmrZGf40K11nMi8vj8uXL6PT6QgJCWHYsGFUqVKlssOTJEm6LZVKJZedkCTpkahRowYpKSmUlJQonZNJSUnl7tprMBg4e/bsA8s/KSnplsevvvqq8vhh/qB8u2t7eXnh7e3NmjVr2LlzJwEBAco5bm5uvP/++wQHB5d5br9+/ejXrx95eXlMmDCBN998k5iYmFvS1ahRg6SkJJo2bQoY34/SdQJ/+OEH+vbti5mZGb1791beH8qKPywsjJKSEuLi4rCzs2P9+vUMGzaswnK6ubmRlZVFRkZGhRuk2NvbY2lpyZEjR/Dw8Ljl+JIlS/j555/Zvn07np6eZGdno9VqH/j7WY0aNcjKyjLa2LJ0zceH5cZ6uXr1KtnZ2bi4uCjfLfLy8pT/X7x4kfr16wNl19Ht2k5555WlUaNGDBkyhO+//97o+e7du/Pjjz8yYMAAVCoVr732WplxtG3b1mg9zRuFhITQuHFjli9fjo2NDV9//bUymvJu45TKJ3su7lNpI1y5ciWvvvoqCQkJzJ07FwcHB/bt20enTp2wsrJCr9cDcl036VZCCNatW4eTkxNt2rSpMK2pWoWTpgpNXbW80tiZAS3dGNTanYGtPalemEqTmhqcNFVkp6SkvNa8+uqrpKamMnjwYFxcXHj11VflbnGSJD2WVq1aRU5ODiUlJezZs4c5c+bQu3fvyg5LkqSnQJs2bbC0tGTGjBno9Xp27NihbEBy6NAhdu7cSWFhIUVFRSxYsIB9+/aVu8nIvTh69CjLly/HYDCwZMkSzp49S7du3R7Y9Svi6Oh4207WwMBA5s+fz7p164zWFQwJCWH69OkcPXoUIQS5ubls3LiRnJwcTp06xY4dOygoKMDCwgIrK6tydxPv378/n376Kenp6aSnp/PZZ58px4qKiigsLMTBwQFTU1N++eUXfvnlF6P4MzMzjdbkzMnJoVq1amg0GlJTU5k5c+Zt74OTkxM9evRg+PDhZGRkUFxczN69e402OoHrP5oNGzaMsWPHcuHCBQDS0tJYv369kreFhQV2dnbk5+fzwQcf3Dbve+Hm5kbr1q2ZOHEihYWFxMXFsXz58oeSV6lly5Zx5MgRCgoKCA0N5fnnn6dmzZrY29tTs2ZNFi1aRHFxMevXr2ffvn3KeWXVUUVt53b27NlDRESEMqrx3LlzLFu2rMzv0j179mTFihUEBQWVuTxM9+7dSUxMZMGCBRQWFmIwGDh27JiymXFOTg42NjZYW1tz+vRpIiIi7vq+Sbcney8ekAMHDjBnzhzCw8MJCAjgs88+Y+rUqbi5uREdHa28SMjedOlmsbGxXLp0iV69et1z+9Bqtej1ejlEXFKU/tofHBxM48aNqV+/Pk5OTtjZ2ck1byVJeizNmTMHV1dXNBoNw4YNY+rUqQwYMKCyw5Ik6SlgZmbG+vXrWbNmDba2tsyaNYtBgwYB1zfRGDduHPb29jg6OjJv3jzWrFlDs2bN7imvmJgYrKysjJ7r2bMnW7ZswdbWlmnTprF27Vp0Ot19l+tOhIaGMmPGDLRabbkbxPTv359Dhw5Rp04do3Ui/fz8mDx5MkFBQWi1WurUqaNsGlRYWMjEiRNxcHDAzs6O/fv3K506N9+DSZMm0aBBA+rWrUvbtm2NNouxtrZm9uzZBAQEYGtry5IlS+jRo4dyvH79+gQGBlK7dm20Wi0nT55k8uTJxMXFodVq6datG7169bqje7Fo0SI0Gg2NGzfGzs6O0NDQMj83T58+naZNm+Lj44O1tTVt27YlNjYWuL4eqJeXFy4uLjRo0MBoU9wHbfny5cTFxWFvb09ISAgDBw40WhOzUaNGt2wkdD9KdxR3cHDg1KlTRtdesGABc+bMQafTsXnz5tvWUUVtpyyvvPIKn376KXD9u++WLVvw9vamWrVqtGvXjmbNmvHFF1+Uea6fnx/Lly9n8ODBrF271uiYlZUV27dvZ8OGDbi6uuLg4MDQoUPJzs4Gru8sv2rVKqytrfnf//5X5qhL6f6ZCDmE74H766+/MDExITc3l5SUFM6dO4efnx916tSRa01KRkpKSigoKCAnJwdHR8f7utaXX37Jq6++essaNZIE139tfuWVV/j1118rOxRJkh6w7OxsNBoNWVlZ5U47/C86f/48rq6uT125JUn6bwkPDyc+Pp4ff/yxskORnnBvv/02165dY/78+Q/82h06dGDAgAEMHz78gV9b+m+7k8+pcj7fA1RcXIxaraZZs2ZkZ2czadIknnvuOUaNGkVmZiY//PADffr0QafTYTAY5HRKCZVKhaWlpbK48P2wtbXlypUrsmNSKpO5uTkzZsxg/fr12Nra0qxZM6ytrZV1ciVJevKlpKQov/A/DUqn0D1t5ZYk6b8lOzubvLy8e97dWHp6xcXFYW1tjYeHBwcPHmThwoV88803D6UtFRYWkpmZKdupdNfuZHq+HDH5kMTFxTF06FC2bdvG33//TVxcHEIIHBwcePbZZzl58iTPPfdchQvrStLdWL9+PTY2NnTs2LGyQ5EeM7t370YIQcuWLVmyZAknT56katWqfPrpp8oPKpIkPbmuXr2Kk5PTLetgPQ3Mzc0pKiqq7DAkSZIkSZKkclhYWHDx4kW0Wm2Zx+WQvYdACEGTJk0YPHgwQUFB1K9fn5YtW9KuXTsiIyOZMWMG48ePl52S0gNla2tLenp6ZYchPUZKR0NmZ2cTHR1NvXr1+Ouvvxg3bpyyELfslJSkJ59KpaKwsJDk5OSnakpzSkoKDRs2fOrKLUlS+SZPnkxaWhpz586t7FBuMWbMGKpXr05YWNhdn7t3715CQkI4evRouWlq1KjB7t27qV279v2EeVemTZtGZGSk8h70tH2u1Gg0HDx4kLp16z7Q686cOZOzZ88+0nY8bdo0Tp8+TVRUVJnHo6KimDZtGrm5ufzxxx94eno+stikJ1t2djaurq4VLmsoOyYfohEjRmBlZUX9+vWpWbMm27Zt4/z581y+fFl5w5DTKKUHRafTcfr06coOQ3qMlL62PPfcc8yYMYM9e/bQqlUr6tSpQ8+ePSkpKZHr3krSf4iNjc1/ooPup59+YvLkySQkJGBvb8+sWbPo06fPLelKp2//V8otSdL9s7CwwMzMDJVKRbdu3Thx4gRFRUV4eXkxZcoU/Pz8Ki22H3744Z7P7dq1KwkJCcpjDw8PIiIi6Nq1q/Lco94EMzk5mS+//JKEhARq1KhxX9dKTEzE09OT/Px8qlSp8oAifDSsrKzu6z1o4cKFREREsH//fuW5jz766EGEdldK/3bKKoteryc0NJRdu3Y9kM18TExMOHnyJPXr17/va91OTEwMr7zyivL42rVrVK1aVfmetHXrVnx8fPjqq68YO3Ysa9euNdosqbRt1qlTx+i7tl6vx8XFhfT0dKXdBgcH4+TkxPTp041ieJTlfVLJb6QPgYmJibJ71+DBg6lfvz5ffvklycnJfP/99xw4cIC6dety7do1o7SSdD9K15iUpJtVr16dL774giNHjihT/VNTU4mMjGTr1q1cvny5kiOUJEm67rfffmPMmDF8//335OTkcPDgQby9vSs7LEmSnjAWFhZERERw6dIlsrOzmTt3LoGBgaSkpDzyWIQQFBcXP/J8H7Zz585ha2t7352SD4rBYKjsEP6zLl26RH5+Po0bN67sUIC7q2sfHx9yc3PJzc0lIyMDgOPHjyvP+fj4ANdHhOp0OhYuXFjmdUxMTNi7d6/yePPmzeh0unsvhGREdkw+JDeOQjpw4AA7d+5k4sSJyuM33niD119//Za00n9fecu63m8HtU6nIy8v76lcY0y6vZYtW9KvXz+io6N5//33Wb9+PXPnzkWj0WBmZlbZ4UmSJAEwadIkJk2axPPPP49KpaJ69ep4eXlVdliSJD2m4uLiaNmyJdbW1vj6+pKZmQmAmZkZDRs2RK1WKzPU9Ho9586dA66PUmvdujUTJkxAp9NRs2ZNNm3axLZt26hXrx5arZZ333233HzDw8Pp06cPgYGBWFtb06hRI3bt2qUc79ChA2FhYXTo0IFq1aoRGxtLcHAwEyZMAOD333/HycmJr7/+GmdnZ+zs7Jg/fz5//vkn3t7eaDQaBg0apHTAlKYH8Pf3Jykpid69e2NlZaV8xzQxMSE+Pp7Y2Fh0Op3R+ruxsbHY2toq3xOWLl3KM888g1arxcfHh+PHjytpZ86ciaurK9bW1nh5eZW5W3h0dDQvv/wyaWlpWFlZ0bt3bwAOHjxI+/btsbW1pUGDBvz888/KOVu3bqV58+bY2Njg6urKhx9+qBxr27YtAPb29lhZWREdHa3U0Y2cnJz4/fffjepgyJAhaLVaPv/8c4qKiggLC8PT0xN7e3sCAgKUNlEeIQTvvfcejo6O2NjYUL9+fSUPIQRffvkldevWRafT0a1bt3I3f7ld3rGxscq9cXJyYtq0aRw7dozhw4dz8OBBrKyssLKy4tq1a4SHhzNgwACje9e0aVM0Gg2tWrVi3759yrHg4GBGjBhBnz59sLa2pkmTJhw5cuSuy3qjkpISRo4cSceOHTl69Cj16tVT6qdFixYAnDlzhldeeQV7e3tq1arFd999p5x/6NAh2rZti1arxcnJiREjRihtr7SuW7RogZWVFZGRkUbtu1Tr1q2VTsLStjB+/HiqV6/OyJEj76pubufw4cP8/fffzJs3j82bN5OWlnZLmqCgIKNOy4ULFxIUFHRP+UllENIjsXjxYnHixAlRWFgo3n33XXHo0CExbtw4sWTJksoOTaok8+fPFx9//LEYN26cyMjIeCDXnDZtmkhNTX0g15L+W3bt2iWaNGkiNmzYIJKTk8XBgwdFWFhYZYclSdIDkJWVJQCRlZVV2aHcF4PBIMzMzMT06dNFnTp1hLOzswgODhZXr14tM31ycvJ/otySJN2boqIi4eHhIT755BNRVFQktm3bJiwtLUVQUJCSpl27dsLc3FwA4uWXXxYGg0EIIURUVJQwNTUV3333ndDr9WL27NnC3t5e9O/fX1y9elWcPXtWWFtbiz/++KPMvCdPnizUarVYvHix0Ov1YuHChUKj0YgrV64IIYR44YUXhLOzs/jrr79EcXGxKCgoEEFBQWL8+PFCCCF27twp1Gq1CA0NFYWFhWLDhg3CwsJC9OjRQ6Smpoq0tDTh7u4uli1bpqR3dHRU8nd3dxdbt241igkQJ0+eFEIIUbduXbF27Vrl2OjRo8Wbb74phBBiw4YNwt3dXcTFxQmDwSAiIiKEp6enKCwsFPHx8cLS0lLEx8cLIYRITU0Vx48fL/Me3BxTamqq0Ol0Yu3atcJgMIj9+/cLW1tbceLECSHE9c+iR48eFcXFxSIuLk44OjqK1atXCyGESEhIEIDIz89XrhcVFSVatWpllKejo6PYuXOnUgempqZixYoVori4WOTl5Yl33nlHdOnSRVy6dEnk5eWJoKAgERAQUGb8paKjo0XNmjWV71D//vuvOHv2rBBCiNmzZ4vmzZuLhIQEUVRUJCZOnCief/75Mu95RXknJycLGxsbERUVJQoLC0VWVpbYv39/ueWcPHmy6N+/vxBCiNOnTwtLS0uxadMmodfrxaJFi4RGoxHp6elCCCGCgoKERqMRMTExwmAwiLffflv4+PjcdVlL8ywsLBT9+vUTfn5+Sn3cXD/Xrl0Trq6u4rvvvhNFRUUiPj5euLq6il9++UUIIcThw4fFH3/8IfR6vUhMTBQNGzYUn3/+eZn3TYhb25IQQrRq1UpERUUp90itVosZM2aIoqIikZeXd9u6KUt+fr4AREJCgtHzo0aNEi+99JIQQojatWuLL774QjlWWvZz584Je3t7kZ+fL9LS0oSDg4M4efKk0X258W/8RjeX92lzJ59T5VC9h6x0FNzAgQOZM2cOZ86c4eWXXyYpKYnJkyfL6UlPqWPHjrFw4ULGjh3L8ePHWbJkyQO5rk6nu+2vgtLTqVGjRvTu3ZsePXqQn5+PjY0NYWFhcikJSZIeG5cuXUKv1/Pjjz/y22+/ceLECS5dusSYMWMqOzRJkh5D+/bt49q1a0yYMAEzMzM6d+5Mly5djNLExMSQk5PDunXr6NKli9HmLC4uLoSEhGBqasrAgQPJyMhgzJgxaDQavLy8aN26NYcPHy43/6ZNmzJo0CBMTU0JCgrC09OTzZs3K8cHDx6Mt7c3KpUKCwuLW85XqVRMmTIFc3NzevTogbm5OQEBATg7O+Pg4EDnzp0rzL8iAwcOZNmyZQAUFxezcuVKAgMDAZg7dy7jx4+ncePGqNVqhg0bhomJCfv378fU1BQhBH///Tf5+fk4OzvTsGHDO8pzyZIlvPTSS/Tq1Qu1Wk2rVq3o3bs3q1evBqB9+/Y0adIElUpF48aN8ff3Nxplei9atGjBgAEDUKlUVKlShYiICGbNmkX16tWxtLTk448/ZvXq1RVOpTc3N6egoIDjx4+j1+vx9PRURurPnTuXqVOn4uHhgZmZGeHh4Rw8eJCkpCSjawghKsx76dKl+Pj4EBwcjLm5OTY2NrRq1eqOyrhy5Uq6dOmCr68vpqamylJx69evV9L06tWLdu3aoVarGTx4cLntpqKywvV1Sn19falatSo//fRTuet9btq0CWdnZ0JCQjAzM6NevXq8+eabrFixAoBmzZrRtm1bTE1NcXd3Z+jQofdd19WrV+fdd9/FzMwMS0vLO66b2yksLGT58uXK38fAgQPL3ACoevXqtGnThnXr1rFs2TJ69+79xK2H+jiTHZMPWek0bZVKha+vL6NHj2b+/PlkZWVRXFzMM888IzsGnhI31rOFhQXt27dn8+bNuLi4MGLECH7++WdlIf97JdeZlMpjZ2fHBx98wLJlyxgzZgyffPIJY8eOVaZ6iHKWGJAkSXpUqlatCsCoUaOoWbMmWq2WiRMnsmnTpkqOTJKkx1FqaiouLi5Gy2K5u7vfks7c3Bw/Pz+2bNnChg0blOdvnDpa+vpz83MVbSjj5uZm9Njd3d1oDcuyYrmRTqczWk6natWqd5V/RQIDA9m8eTPZ2dns2LGDKlWqKGvpJSYm8t5776HVapV/Fy5cICUlhVq1arFo0SJmz56No6Mjvr6+xMfH31GeiYmJrF+/3ui6K1eu5MKFC8D15cw6duyIg4MaXIaOAAAgAElEQVQDGo2GuXPnKmv+3asb73F6ejp5eXm0adNGyb9x48aoVCouXrxY7jU6duzIlClTCAsLw8HBgQEDBpCamqqUqX///sr17O3tUalUt0wZvl3eSUlJ97xbekpKCh4eHkbPeXh4GLW1m9vNtWvX7rqscH26+d69e/nwww8r3GE9MTGRv/76y6iuZ8yYodzn06dP0717d5ycnLCxsSE0NPS+69rV1dVo0+A7rZvb2bBhAwUFBfTt2xe4/rfz999/c+jQoVvSBgcHs3DhQhYuXEhwcPAtx83MzNDr9UbPlT6WS2dVTHZMPiJCCLp160ZYWBivvfYawcHBfPjhh5w4cUKuMfkUKN39+MiRI0yePJm6deuSmJjIW2+9xbfffsvUqVM5dOjQfe8qKkdMShVZunQpSUlJ1K9fnw4dOtCpUyflV2z5A4kkSZVNq9Xe8sVDkiSpPDVq1CAlJcXoM0xFo6UMBgNnz559YPnfnFdSUhIuLi7K44f5Wna7a3t5eeHt7c2aNWtYunQpAQEByjlubm7MmTOHq1evKv/y8vLw9/cHoF+/fuzatYuLFy9Sq1Yt3nzzzTuKyc3NjQEDBhhdNzc3l7lz5wIQEBCAr68vSUlJZGVlERISovwwXlZ5rKysyMvLUx7r9fpbBmDceJ69vT2WlpYcOXLEKIaCggKjeinLiBEjiI2NJSEhAYPBwPjx45Uybdiwweh6+fn5yjqJd5q3m5sb//zzT5l5364uXVxcSExMNHouMTHxtmW627ICvPjii0ybNo0XX3zRaBf4m7m5udG2bVujsubk5LBlyxYAQkJCqF27NqdPnyY7O5tp06ZVOAji5roGbulMvvk+3Wnd3E5UVBRFRUXUrl0bJycn2rVrh4mJSZmjJrt3787hw4cpKCigTZs2txx3c3O75b79+++/qFQqXF1d7yqup43sEXvEOnXqxGuvvQZc34xi1apV/Pnnn2zevJk9e/YAsoPgv0ilUnH58mXGjx9Ps2bNAJg3bx6DBw9m8uTJXLhwgU8//RS4v5FrcsSkVJbS15Rq1apx9epVRo8ezcaNGyksLFTeJCv6VVSSJOlReeONN5gzZw4XL14kJyeH6dOn07Nnz8oOS5Kkx1CbNm2wtLRkxowZ6PV6duzYQXR0NHB9842dO3dSWFhIUVERCxYsYN++fXTo0OGB5X/06FGWL1+OwWBgyZIlnD17lm7duj2w61fE0dHxtp2sgYGBzJ8/n3Xr1inTVOF6p9H06dM5evQoQghyc3PZuHEjOTk5nDp1ih07dlBQUICFhQVWVlZ3/BkxMDCQrVu3snHjRgwGA0VFRRw4cICTJ08CkJOTg62tLZaWlhw6dIjly5cr5zo4OKBSqYzK1LRpU2Uzn6KiIiZNmlTh92SVSsWwYcMYO3asMkozLS3NaMpzWQ4ePMi+ffsoKiqiatWqVK1aVSlzSEgIEydOVOLKzMxk1apVd533wIED2b17N0uWLEGv15Odnc2BAweA63WZkpJS7gam/fr1Y9u2bWzduhWDwcDSpUuJj4/Hz8+vwnLdbVlLjR49mrFjx9KxY8dbOkRLde/encTERBYsWEBhYSEGg4Fjx45x8OBB4Hpd29jYYG1tzenTp4mIiDA6/+b2W7duXYqLi/n5558xGAx8++23RiNCy3KndVOR1NRUfvnlF1avXs2RI0eUf3PmzGHFihW31Im5uTm//PILa9euLfN6r732Gr/88gubNm3CYDBw+fJlQkND6du3L+bm5ncV29NGdkw+ImX9EjJ48GDCw8NJTEzk4MGDjBw5kvT0dDmC8j/kxk7G3NxcNBoN3bt3B64Ps//ss8+YPn068+bNA66vAXM/v67KEZNSWUpfUzp27EheXh41a9bEycmJzMzMO/4VXJIk6VEICwujXbt2NGzYkFq1amFvb8+sWbMqOyxJkh5DZmZmrF+/njVr1mBra8usWbMYNGgQcH103bhx47C3t8fR0ZF58+axZs0aZYDA3YqJicHKysrouZ49e7JlyxZsbW2ZNm0aa9euRafT3Xe57kRoaCgzZsxAq9Ua7W59o/79+3Po0CHq1KljtE6kn58fkydPJigoCK1WS506dZT17gsLC5k4cSIODg7Y2dmxf/9+pVOprHtwo5o1a7Jlyxa++uorHB0dqVGjBqGhoUrnznfffcdHH32EtbU14eHhymAduP696IMPPqBjx45otVq2bdtGnTp1mDp1Kr6+vri7u+Pq6oq9vX2F92X69Ok0bdoUHx8frK2tadu2LbGxsRWek52dzfDhw7Gzs6NGjRpkZWUxffp0AN566y0GDBhA9+7dsbGxoWnTpmzbtu2u83Z1dSU6OpqIiAgcHByoV6+eshv2iy++iLe3N87Ozmi12lumYdetW5dVq1Yxfvx47Ozs+Prrr9m8efNt70WpV155RRkAU1FZb/T2228zZsyYcjsnrays2L59Oxs2bMDV1RUHBweGDh2qLEs2c+ZMVq1ahbW1Nf/73/+M6hpgypQpvPHGG2i1WubNm4eNjQ0RERG89dZbODo6cvHiRWX37/Lcrm4aNWqkrLNansWLF1OvXj169eqFk5OT8m/IkCGo1WrWrVt3yzne3t40aNCgzOvVr1+fNWvW8NFHH2Fvb0/Tpk2xs7Pj+++/rzAOCUyEXFisUuTk5LB9+3b8/PwYN24cgwYNIiMjg5SUFIYMGUJmZiZqtfq+p/ZKlU8IwaJFi2jfvj07d+7EzMwMPz8/li9fztGjR5U3eyHEfU/5yMrK4quvvuKDDz6QI+CkMh0+fBhXV1dOnz6NwWAgLi6OHj164OHh8UDaoCRJj152djYajYasrKyn6nPD+fPncXV1ferKLUlS5QsPDyc+Pp4ff/yxskORJEl6rN3J51TTRxyT9H+sra357bffSE1N5Y033uDQoUMEBwezbds2PvzwQ1JTUxk/frz8oP0fkJubS3Z2NitXrsTZ2RkTExOGDx9Ofn6+suYKPJh1aGxsbFCr1Vy9ehU7O7v7vp7033Hu3DlOnjzJ1atXWbp0KXB9indcXBwlJSW8/fbbsmNSkiRJkiRJkiRJeqRkx2QlKP3yP336dIKDg/n555+ZOnUqxcXFrF27lqioKH788Ufq1q1b2aFK9+DYsWN4enpiZWXF0qVL6d27N6+//jrR0dGcOXOGbt260bNnT4qLi9HpdBQXFz+w0Y0mJiZotVquXLkiOyYlI+np6ezdu5eSkhLOnz/P/PnzsbGx4cCBA3z++ee8/fbbslNSkp5wKSkpyjSqp0HpOl5PW7klSap82dnZ5OXl3fUOwFLlWLt2LRMmTCjz2KlTpx5xNJL0dMnJybltGjmVu5KUdkbl5ORgbm5OYmIi77zzDj179sTd3R21Wk2nTp1QqVRyFNMTJDMzk6ioKEpKSggICGD16tWcO3eO0NBQbG1teeedd7C2tmby5MmYm5s/lLpdvnw5tWvX5rnnnnug15WebHq9HjMzM3Jzc+ncuTOfffYZKpWKmJgYtFotw4cPr+wQJUm6R1evXsXJyanchfP/y8zNzSkqKqrsMCRJkiRJkqRyWFhYcPHiRbRabZnHZcfkY+DUqVMEBwczZ84cWrRoQUlJCb///jvm5ua0a9eussOT7lJKSgqRkZFotVr8/Pw4fPgwW7Zswd/fn8jISKZMmWK0APWDtnXrVkxMTOjatetDy0N6sq1evZqMjAwyMzOxtbXFz8+PGjVqVHZYkiTdo9K1e5KTk5+qJWBSUlJo2LDhU1duSZLuzOTJk0lLSzNaOulxMWbMGKpXr05YWNhdn7t3715CQkI4evRouWlq1KjB7t27qV279v2E+UD4+vrSp08f/ve//z2U67dq1Yrp06fTsWNHAKZNm0ZkZCSFhYXs37+f1q1bk5CQgIWFxQPN937qUKpcMTExDBkyhDNnzlR2KE+F7Ozs264JLqdyPwbq1atHw4YNcXFx4eDBg4SHh1OvXj1SU1NJTk7G39+/skOU7sKGDRuIj4/HwcGBlStX0r9/f6pWrUpERARBQUEPtVMSru/M/e+//z7UPKQnW+fOnTl79ize3t5ERkYya9YsmjVrRkBAwANdWkCSpEfLxsbmie6gu3m314KCArp168aGDRvKTF86fftJL7ckSQ+HhYUFZmZmqFQqunXrxokTJygqKsLLy4spU6bg5+dXabH98MMP93xu165dSUhIUB57eHgQERFhNCghNzf3vuJ7kNRqNZaWlg/tdfrkyZPK/5OTk/nyyy9JSEhQfnR/EPeirM2O7qcOH5bg4GCcnJzK3Gn7QTAxMeHkyZPUr1//oVz/buzdu5fx48dz7NgxAGrXrs1HH31Et27dSExMxNPTk/z8fKpUqQLAJ598wrfffsv27dupVq0aJiYm8rPDY0RV2QE87YqLiwGYM2cOer2eqKgoiouLqVWrFp988gk7d+6kpKSkkqOU7oQQgnPnzrF27Vpmz57NxIkT0Wg0LFy4kGbNmrFs2TJ69uz50OOwtbXlypUrDz0f6cm1b98+Nm3aRGZmJomJifTt21cZTaBSybcFSZIqR25urvIvKysLR0dH+vXrV9lhSZL0hLOwsCAiIoJLly6RnZ3N3LlzCQwMJCUl5ZHHIoRQvv9JD965c+ewtbWVM4HK8bDbn8FgeGjXvlF2dja+vr688cYbZGRkcOnSJWbNmlVuR+P48eOZN28eMTExNGrU6KHFVVJSgpyQfG/kN9BKVjoyydLSkqtXr1JUVER0dDQ7duzgq6++4uWXXzbqKJAN/fFTWicmJia4u7vTt29ffv31V5ydnWnZsiXx8fGcOnVK+bXmYdPpdGRmZsq2IpXrmWee4eeffyYjI4Pc3Fxat27NSy+9JNezlSTpsREdHU1ubi59+/at7FAkSXpCxMXF0bJlS6ytrfH19SUzMxMAMzMzGjZsiFqtVj7r6PV6zp07B8DChQtp3bo1EyZMQKfTUbNmTTZt2sS2bduoV68eWq2Wd999t9x8w8PD6dOnD4GBgVhbW9OoUSN27dqlHO/QoQNhYWF06NCBatWqERsbS3BwsLIZy++//46TkxNff/01zs7O2NnZMX/+fP7880+8vb3RaDQMGjRI6fQpTQ/g7+9PUlISvXv3xsrKiokTJwLXv5fEx8cTGxuLTqczWos3NjYWW1tbZV3ipUuX8swzz6DVavHx8eH48eNK2pkzZ+Lq6oq1tTVeXl5GIwZvVFJSwhdffEHdunWVe3D48OFb0iUkJNCpUyfs7Oywt7fH399fqaeK8vv333958cUX0Wg02NnZ0b59e+UcDw8PoqOjiY6O5uWXXyYtLQ0rKyt69+5NYmIiJiYmFBQUAJCVlcXw4cOpWbMmGo0GHx8f8vPzAXjnnXdwc3PD2tqa5s2bK3W4adMmPv30U3766SesrKzw8PAAMKpDgEWLFlG/fn20Wi0vvvgi8fHxRm3gww8/pGPHjlhbW/P8888r7a8sBw8epH379tja2tKgQQN+/vlnABITE9HpdMTExADXO+g8PT1ZunQp3333HcuWLePLL7/EyspKuUdltb+tW7fSvHlzbGxscHV15cMPPyw3llJt27YFoEWLFlhZWREZGanc34ULF+Lp6UmTJk0A2LZtG88++yxarZbmzZsr8ZbGXFoHTk5OjBo1Sqmfy5cv4+fnh62tLba2trRq1YqMjIxbYjl9+jR6vZ6goCBMTU2xsLDAx8fnlmXwhBCEhISwbt06YmJiqFWrVpllu3jxIv3798fR0RFXV1fCw8OVwWG3a7MeHh7MmDGDFi1aULVqVS5evIiJiQmRkZHUr18fjUZDYGCgXA/7doT0WPH39xcLFiwQ//zzj/j0009FQkKCmDZtmlixYoUQQoiSkpJKjlAqyy+//CLeeecdMXr0aPH999+L5cuXi4kTJ4oBAwaIr7/++pHGotfrRXh4uMjKynqk+UpPli+//FKMGTNGxMbGCiGEyMjIkK8vkvSEysrKEsB/6nW/b9++YujQoRWmSU5O/s+VW5Kke1NUVCQ8PDzEJ598IoqKisS2bduEpaWlCAoKUtK0a9dOmJubC0C8/PLLwmAwCCGEiIqKEqampuK7774Ter1ezJ49W9jb24v+/fuLq1evirNnzwpra2vxxx9/lJn35MmThVqtFosXLxZ6vV4sXLhQaDQaceXKFSGEEC+88IJwdnYWf/31lyguLhYFBQUiKChIjB8/XgghxM6dO4VarRahoaGisLBQbNiwQVhYWIgePXqI1NRUkZaWJtzd3cWyZcuU9I6Ojkr+7u7uYuvWrUYxAeLkyZNCCCHq1q0r1q5dqxwbPXq0ePPNN4UQQmzYsEG4u7uLuLg4YTAYREREhPD09BSFhYUiPj5eWFpaivj4eCGEEKmpqeL48eNl3oNZs2aJevXqiWPHjomSkhJx+vRpkZiYqJR/7ty5Qgghzp49K7Zt2yYKCgpEenq6eOGFF8TIkSOFEKLC/AYMGCCGDRsmioqKRFFRkdi9e3eZ5b/53iQkJAhA5OfnCyGE8PPzE7169RJpaWnCYDCIPXv2iIKCAiGEEEuXLhXp6elCr9eLWbNmCQcHB3Ht2jWljvv3729U5pvrUKvVin379onCwkIxdepU4eXlpVz7hRdeEG5ubuLYsWOisLBQ9O3bVwwaNKjMe5mamip0Op1Yu3atMBgMYv/+/cLW1lacOHFCCCHEkiVLhLu7u7h69aoIDAwU/v7+ZcZUqqz2t2vXLnH06FFRXFws4uLihKOjo1i9enWZ8dzoxnZ14/3t37+/yMrKEnl5eeLIkSNCp9OJmJgYUVxcLDZu3Ch0Op1IT08XQgjRp08fERQUJLKyskRmZqbo3LmzCAsLE0IIMWHCBNG9e3dx7do1YTAYxKFDh0ROTs4tcWRlZQk7Ozvh7+8vNm7cKNLS0oyOl8b12muvicaNG4uLFy8aHb+xnRQXF4uWLVuKsLAwkZ+fL1JSUoS3t7eIjIwUQlTcZoW43v4aNWokzp49K4qKioRer1deYzIyMsSlS5eEl5eXWLBgwW3v73/VnXxOlSMmHxOlPfILFiygqKgIDw8PevbsSUREBDt37mTx4sUcPXpUjmZ6jIj/G5F49epVJk2aRPfu3encuTMrV65EpVLxwgsv0L59e0aPHm2U/mEzNTXFxsbG6JccSbrZmDFjGDduHC1btkQIgZ2dnXx9kSTpsZCRkcHGjRsZMmRIZYciSdITYt++fVy7do0JEyZgZmZG586d6dKli1GamJgYcnJyWLduHV26dDFaU9vFxYWQkBBMTU0ZOHAgGRkZjBkzBo1Gg5eXF61bty5zBGCppk2bMmjQIExNTQkKCsLT05PNmzcrxwcPHoy3tzcqlarMTVhUKhVTpkzB3NycHj16YG5uTkBAAM7Ozjg4ONC5c+cK86/IwIEDWbZsGXB9GbGVK1cSGBgIwNy5cxk/fjyNGzdGrVYzbNgwTExM2L9/P6ampggh+Pvvv8nPz8fZ2bnctfIjIiKYOnUqzzzzDCYmJtSpUwd3d/db0nl5edG5c2csLCywt7dn7NixysjEivIzNzfnwoULnDt3DjMzM3x8fO76Ply8eJH169cTGRmJg4MDarWa559/XqmPgQMHYm9vj6mpKWPGjEGv1xutX1mRpUuXEhwcTOvWrTE3Nyc0NJT8/Hz27NmjpHn99dd55plnlLotrz6XLFnCSy+9RK9evVCr1bRq1YrevXuzevVqAAIDA2nbti0+Pj7ExMTc0eZON7e/9u3b06RJE1QqFY0bN8bf399olO/dCg8Px8bGBktLS77//nveeOMN2rVrh0qlonv37nh7e7NlyxbS0tLYsGED33zzDTY2Nmi1Wj744ANWrFgBXK/ny5cv888//6BWq5XRmTezsbFh7969VK1alZEjR+Lk5ETHjh35559/jNJFR0fTu3dvHB0dy4390KFDJCcnM3XqVKpUqUKNGjV45513lJgqarOlRo0ahZeXF2ZmZpiaXt/GJSwsDDs7O6pXr46vr+89//0+LWTH5GNCpVIhhMDS0hJ/f3+2bt1KUFAQjRo1Ytu2bbz00kssXbq0ssOU/k9JSQkmJiacP3+enTt30qxZMzp27Iivry/vv/8+V65c4eWXXyYkJMQo/aMi15mUbueff/655Q2ytPO8dEqLJElSZVi2bBm1a9emVatWlR2KJElPiNTUVFxcXIyWwCqrY8zc3Bw/Pz+2bNlitLFW6dRogKpVq5b5XEWbqLi5uRk9dnd3N1rDsqxYbqTT6TAzMzPK727yr0hgYCCbN28mOzubHTt2UKVKFaVjLzExkffeew+tVqv8u3DhAikpKdSqVYtFixYxe/ZsHB0d8fX1NZqefKOkpKQ72gH80qVLDBgwABcXF2xsbJROYKDC/D7//HNq1KhBx44d8fLyuqfNXZKSktBoNDg4OJR5fObMmTRo0ACNRoNWqyUrK6vMacRlSUlJUaZ4w/Xv9m5ubkZt4E7rMzExkfXr1xvVycqVK7lw4YKSZvjw4Rw7doyhQ4ei0WhuG9/N7e/AgQN07NgRBwcHNBoNc+fOveOy3u76iYmJfPPNN0bx79+/n9TUVBITEykuLsbV1VU51r17d9LS0gB47733aNeuHX379qVGjRq8//776PX6MvOsW7cu8+fP59y5c/z777+Ym5szaNAgozSbN2/m66+/ZubMmeXGnpiYSHp6Ora2tkpMI0eO5NKlS0DFbbas8pd6UH+/TwvZMfkYMTExITU1lS+//JLFixfTpEkTBg0axJUrV7CwsGDAgAGVHaL0f1QqFdnZ2cybNw9bW1tcXV3p168f+fn5/P777yQmJt6S/lEqXWdSksrj6upK8+bNAZROcxMTE7KysggPD5dvnpIkVZqoqChef/31yg5DkqQnSI0aNUhJSTHaNDQpKanc9AaDgbNnzz6w/G/OKykpCRcXF+XxwxygcLtre3l54e3tzZo1a1i6dCkBAQHKOW5ubsyZM4erV68q//Ly8vD39wegX79+7Nq1i4sXL1KrVi3efPPNMvNwc3O7ZbRaWcLCwigpKSEuLo7s7GyWLVtmNKusvPyqV6/O3LlzSU5O5ueff+bzzz/n119/vaP7c2OM5XU2xsTEMG3aNFauXElmZiZXr15Fo9EY7SVQERcXF6PvfyUlJSQnJxu1gbuJc8CAAUZ1kpubq4yMLCgoICQkhCFDhvDFF18Y5VtenDc/HxAQgK+vL0lJSWRlZRESEnJfs/tuvL6bmxvvv/++Ufylo5nd3NwwNTUlLS1NOZaVlaV877CysmLGjBmcOXOGmJgY1q1bx+LFi2+bv7u7O2+99RZ///230fMtW7Zk27ZtTJ06lVmzZpV5rpubGzVr1jSKNzs7W1lr9XZt9ubyS/dGdkw+ZjIzM7G2tmbVqlVkZmby1ltvMX36dNLT0/ntt9/46KOPlGkBcrfuR+/Gez579mx27drFCy+8QGhoKC1atGDcuHGkpKQQHh5eeUEiR0xKt1elShWysrKU0ZGZmZn8+++/XL58mejoaPbu3VvJEUqS9DQ6fPgwx48fv2XUgyRJUkXatGmDpaUlM2bMQK/Xs2PHDqKjo4HrUzV37txJYWEhRUVFLFiwgH379tGhQ4cHlv/Ro0dZvnw5BoOBJUuWcPbsWbp16/bArl8RR0fH23ayBgYGMn/+fNatW6dM4wYICQlh+vTpHD16FCEEubm5bNy4kZycHE6dOsWOHTsoKCjAwsICKysro+nvNxo6dCiTJk3ixIkTCCE4c+ZMmZu75OTkUK1aNTQaDampqUYj2SrKb9WqVSQnJwOg1WpRq9XlxlIeJycnevTowfDhw8nIyKC4uJi9e/dSWFhITk4Opqam2NvbYzAY+OSTT8jOzlbOdXR0JDExsdzv3wMHDmThwoXExsai1+v57LPPMDc3v2UzljsRGBjI1q1b2bhxIwaDgaKiIg4cOKBMK3/33Xdxc3Nj/vz5jBgxgsDAQGWnbUdHR/7999/b5pGTk4OtrS2WlpYcOnSI5cuX31Fsd9LWhg4dSmRkJHv27KGkpIT8/Hx27tzJ+fPncXJywtfXl7ffflvZrDU5OVn5W920aROnT5+mpKQEGxsbzMzMyqzn+Ph4Pv/8c5KSkhBCkJaWxvz582nTps0taVu1akV0dDTh4eF8/fXXtxxv2bIlDg4OfPzxx1y7do2SkhLOnDmjTNeuqM1KD47smHzMNGrUSNn17fvvvyc0NJQePXoQHh6OTqejfv36fPzxx2RmZj7yUXhPu5KSEuWeX7x4kQ8++IDmzZszbtw4AMaPH89nn31GVFQUlpaWyhtEZZAjJqWKlP7Kt3r1ambNmsWePXtYtWoVq1atYuXKlfTv399oOookSdKjEhUVha+vb4XrQUmSJN3MzMyM9evXs2bNGmxtbZk1a5byA4der2fcuHHY29vj6OjIvHnzWLNmDc2aNbunvGJiYm5Z965nz55s2bIFW1tbpk2bxtq1a9HpdPddrjsRGhrKjBkz0Gq15e6u3L9/fw4dOkSdOnWM1on08/Nj8uTJBAUFodVqqVOnDkuWLAGgsLCQiRMn4uDggJ3d/2PvvsOjKvO3gd+TmUzaTCa9h/RIFUILHUGRLk0MCoKCCv50l6zA0lQiKFJ1laWIgkBApUmvovTei4AJSQghlfSeac/7R96ZJZJAAkkmhPtzXVy7mTkzz31OzjlOvvMUR5w6dQrLli0D8OAxGD9+PEaOHIn+/ftDqVRi8ODB5XaSmDFjBq5cuQI7Ozv06dMHAwcOND73sPbOnz+P9u3bw8bGBl26dEF4ePhjFZZXr14NlUqFZs2awdHREVOnToVer0fPnj3Rt29fNGzYED4+PjA3N4e3t7fxdUOHDoW5uTkcHR3LXdm5W7duWLhwId588004Oztj//792LVrV7nziZanSZMmxnlAvby8sHv3bvznP/+Bq6srPDw8MMAS06kAACAASURBVHXqVJSUlGDPnj3YuHEjVq1aBYlEghkzZkCj0WD27NkAgDFjxiA6Ohr29vbo1q1bhe0tWbIEM2fOhFKpREREBIYOHVqpnJ999hneeecd2NnZ4fvvvy93m5YtW2L16tWYNGkSHB0d4ePjg4ULFxqLuqtXr4a5ublxxfmePXsiKioKQOlUU7169YJSqcTzzz+Pl19+2Xgdjxs3DuPGjQMAKJVKnDt3Dh06dIBCoUCLFi2gUCiwevXqcjO1a9cOe/fuxaeffopFixaVeU4qlWLHjh2Ijo5GUFAQ7O3t8dprrxmHzj/snKXqIxG1tSIHVYmhCJaYmIi1a9di8uTJGDFiBL744gvExMTA2toa7dq1gxCCXYdrwf3HefTo0cjLy4OjoyNmzpyJb7/9FrGxsZX+pqk2JCUlITIyEpMnTzZ1FKqDDPeXyMhIzJkzB+Hh4bCxsUFwcDD8/f1r7YM0EVWP3NxcqFQq5OTkwNbW1tRxas3du3fh7e39zO03EdUtERERuHnzJn755RdTRyEiqnMq8zlVVsuZqJIMPfM8PT1x/fp1zJ07F3379kVGRga6d+9u3I5FydphOM5Lly6FSqXCypUrsWjRIkydOhU//PADZs+ejfj4+EdObF1bnJyc0LJlS+h0uioPc6D6z3B/efHFF3H48OFy5wvilx5ET5/ExMQyQ8/qO0Nvhmdtv4mobsnNzUVhYSHu3r1r6ihERHVOXl7eI7dhj8k6zNCrKT8/H59//jkyMjIwefLkMiueCSHw66+/oqCgACNHjjRh2vrHUJgx/G9JSYlxmP2cOXNgY2ODKVOmoH379hgwYECZ19QFGo2mzOp+ROVRq9WQy+UQQhjvOXXlHCaiysnOzoabmxtKSkpMHaXWyeVyqNVqU8cgIiIiogpYWFggJSUFdnZ25T7PwmQdd/+8hn9XWFiI5cuXQ61WY9euXRg7dizeeOONWk5Yf2VmZhqHtBp+D2q1GrNmzYJGo0GbNm2wdOlSfPLJJ+jatauJ0xI9nh07dsDd3R2tW7c2dRQiekyGITIJCQnP1JDmxMRENG7c+Jnbb6oZM2bMQFpamnHl27okPDwcLi4umDZtWpVfe+LECbz//vu4fPlyhdt4eHjgyJEjZTo/1KaoqCi0adMGOTk5JmnfVGJiYvD2228jJiYG48ePx7///W9TR3pAaGgo5syZ89D5CqluCgoKwsqVK9G5c2csWLAAMTExj3V/e5LXEgGln1MfNfUOh3LXcfcXJbdu3Qq9Xo/BgwcDKF1VNysrC3l5eVi1ahU+//xzDBgwADY2NqaKW2+kp6ejS5cumDt3Lvr37w8zMzNoNBrI5XLMmjULCxYswC+//IJ//vOfLErSU61Dhw6QyWTQarXIzc1FTk4OCgoK0LRp0zrVA5iIHs3W1vapL9Ddvn0bH3zwAU6ePAmZTIZevXph8eLFUCqVD2xrGL5dH/abTM/CwgLm5uYwMzNDnz59cP36dajVavj7++Ozzz4zjo4xhZUrVz72a3v16oW4uDjjz76+vli2bBl69eplfCw/P/+J8j0pwyIqlbmODx06hGHDhiElJeWJ233hhRcwbNgw44IatW3JkiXo2LEjLl26ZJL2/66842FYCbo+KO/cr06rVq3CsmXLcOrUqRp5/6qSSCSwsbGBra0tZs6cWanXlLcPlX1tdYuJicEnn3xiXKnd09MTYWFhmDRpEusd9RCXdX6KDBw4EAMHDsTp06cBlBYtJ02ahJiYGPj6+mLKlCmcT7CamJmZITAwEAsXLsTPP/8MAGWGRU+cOBG9e/fGb7/9hvj4eFPFJHoiaWlpyMzMxPXr1/Hzzz9j7dq1mDp1KmbMmMGiJBGZxHvvvQcHBwckJibir7/+wt27dytcZZaoJlhYWGDZsmVITU1Fbm4uli5dihEjRiAxMbHWswghoNPpar1dKqXX61GTgwvj4uLQrFmzx3qtVqut5jRUl49pXc5WE2JjY9G2bVs4OjriwoULyM3NxZ49e5Ceno5bt26ZOh7VABYmnzIpKSmYPn06YmJiAABnz56Fn58fJBIJgoKCYGlpaeKE9YODgwPGjh2LV155BZs2bcLSpUuRl5eHoqIibNu2DXv27MHo0aPRqFEjk3/LXJ69e/di6tSpmD17Ns6dO2fqOFRHbdmyBRs2bMCiRYuwf/9++Pr64osvvoCFhQUuXrwIADX6gZyI6O/i4uLw+uuvw8rKCvb29hgyZAiuXr1q6lhUD125cgVt2rSBUqlE3759kZWVBaD0i+jGjRtDKpUav6TTaDTGL6JXrVqFdu3aYcqUKXBwcICXlxd27tyJffv24bnnnoOdnZ1xTvLyREREYPDgwRgxYgSUSiWaNGmCw4cPG59/4YUXMG3aNLzwwguwsbHBmTNn8NZbb2HKlCkASnsMurm54ZtvvoG7uzscHR3xww8/4Pz582jRogVUKhXefPNNYyHDsD0AvP7667hz5w4GDRoEhUKB6dOnAyjtWXXz5k2cOXMGDg4OZeZtPXPmDOzt7Y1z2K5duxZNmzaFnZ0dOnfujD///NO47YIFC+Dt7Q2lUgl/f/8KV6kuLi7GO++8AwcHBwQFBeHAgQNlnl+zZg2aNGlifJ/FixcDAHJyctC7d2+kpaVBoVBAoVDgxo0biIuLw4svvghHR0c4OTnh9ddfN/4+KzJ58mQcPXoU4eHhUCgUePPNNwGU9qqbN28eWrVqBWtra6SkpFSY5/7ju2jRIri7u8PFxQXz5883Pn/u3DmEhobC1tYWzs7OGD58OACgS5cuOHjwoLH9s2fPIjc3F2PGjIGbmxu8vLzw0UcfGY/77du3IZFIsGrVKvj5+eH555+v8rmQm5uL/v37w8XFBfb29ujTpw/u3LnzyOOxd+9eAKXzkk+aNAleXl5wdXXFW2+9VWb4vUQiwfLly9GwYUOoVCqMGDHioXMAr1mzBk2bNoVSqURgYKCxncoch8jISPj5+cHe3h7/+te/jO8ZGxuL7t27Q6VSwdHREV26dAHw8HN/yZIlaNiwoXHuO8P1YDBlyhS89dZbxp/PnDmDLl26wN7eHm5ubvjyyy9x9epVjBs3DmfPnjWemwUFBRXuu+E+8tFHH8HOzg4BAQHYsGGD8fm33noL48aNM+bduHEjcnNzMW7cOHh5ecHNzQ0ffvghiouLja/5+uuv4enpCRcXFyxcuLBMexERERg2bNhj7cPfX7tnzx40b94cKpUKoaGhOHnyZJnc//d//4fBgwdDqVTi+eefL9MjuLL3iIiICLRt2xaLFi2Cl5cXgNJz8b///S+aN29u3If27dtDpVKhWbNm2LlzZ5nXDxkyBKNHj4atrS2CgoJw5swZrFmzBj4+PnBycsLXX3/92NtX97VAAAQ9NfR6vRBCiO+++068/PLL4rPPPhMvv/yy+OOPP0ycrH4xHOfLly+L5cuXCyGEGDx4sGjbtq1ISkoyZbRK+f7778WcOXPEpUuXxOnTp0X79u3FjRs3TB2L6qBjx46Ju3fvin/+859i8+bNQgghkpKSxODBg8XGjRtNnI6IKisnJ0cAEDk5OaaO8sT++9//iuHDh4u8vDxx79490bVrVzF//vxyt01ISKg3+021S61WC19fX/HFF18ItVot9u3bJ6ysrMSoUaOM23Tq1EnI5XIBQPTo0UNotVohhBA//vijkMlkYsmSJUKj0Yhvv/1WODk5ibCwMJGdnS1iYmKEUqkUx48fL7ftGTNmCKlUKtasWSM0Go1YtWqVUKlUIjMzUwghRNeuXYW7u7u4ePGi0Ol0ori4WIwaNUpMnjxZCCHEwYMHhVQqFVOnThUlJSVi+/btwsLCQvTv318kJSWJtLQ04ePjI9atW2fc3tXV1di+j4+P2LNnT5lMAIyfFYODg8WWLVuMz/3zn/8U7777rhBCiO3btwsfHx9x5coVodVqxbJly4Sfn58oKSkRN2/eFFZWVuLmzZtCiNLPE3/++We5x2DatGmibdu2IjU1VaSmpop27dqJ+/8s3bVrl4iOjhZ6vV4cOXJEWFlZiTNnzpS7P0IIERMTI/bt2yeKi4uN940PPvig3Lbv17VrV7F06dIyj/n4+IgmTZqImJgYoVarhUajeWQeqVQqJk2aJEpKSsTp06eFubm5iI6OFkII0a5dO/H5558LnU4nioqKxLFjxypsf9SoUaJnz54iKytLJCcni7Zt24pp06YJIYSIi4sTAERYWJjIyckRhYWFVT4XsrKyxMaNG0VBQYHIy8sTYWFhom/fvo88HobzZcaMGSIkJEQkJiaK7Oxs0b9/f/HGG28YtzVcK+np6SI1NVX4+/uLFStWlHvst2zZIlxdXcXx48eFXq8Xd+7cEdevX6/0cRg5cqTIz88XMTExQqVSid9//10IIcSwYcPE2LFjhVqtFmq1Whw5cqTcfbk/c9euXUVqaqooLCw0Pnb/306TJ0823hsSEhKEra2t+PHHH0VJSYnIyckRp06dEkKU3htCQ0PL3d+/+/HHH4VUKi1zD7K0tBRRUVHGY2BjYyP++OMPodfrRWFhoRg8eLAYNWqUyMnJEVlZWeLll182Hpf9+/cLBwcHcf78eVFUVCTGjh0rpFKpOHjwoPF3FxYW9lj7cP9ro6KihJWVldi5c6fQaDRi9erVQqVSiXv37hlzq1QqcfToUaHVasX48eNF586dhRCiSvcIV1fXCs8dIYTIzMwU9vb2Yvny5UKj0Yh9+/YJa2tr4zk0Y8YMIZfLxbZt24RWqxUTJ04UDRo0EGPHjhWFhYXi1KlTwtzcXNy5c+ext6+ua+FZUJnPqSxMPqUuXLggfv/9d5GVlVXmcZ1OZ6JE9dOnn34qNmzYIF555RUREhIifvnlF+NzhgJmXWH4wDxo0CCxd+9e4+Pjx483/ke5rmWmumHLli3itddeE4sXLxbffPON2Llzp6kjEVEV1KfC5LVr10SrVq2EmZmZACBefvllUVJSUu62LEzS4zp8+LBwdnYu87l54MCBZQqTQghRUlIitm7dKhYsWGB87McffxQ+Pj7GnzMyMgQAcfLkSeNjPXr0EIsWLSq37RkzZoiWLVuWeaxFixYiMjJSCFFaHDIUIQ3+Xpg0NzcXarXa+LxSqRQ///yz8ed3331XTJgwwbh9VQqTn332mXj11VeFEKWfLV1dXcXhw4eFEEL07t1bLFmypMxr/f39xeHDh8WtW7eEpaWl2LRpk7HAUxE/Pz+xfft2489bt24VD+svM3DgQOPvoLzC5N9t3bpVNG3a9KHbCFFxIe7vjz0qj7m5eZn7VLNmzYxf8Hbp0kW8++67IiEh4aHta7VaIZfLxaVLl4zP79mzR3h5eQkh/leQu79gVtVz4e8uXbokFArFI4+H4XwJCAgQW7duNT5348YNIZVKjfsOwFgIE0KIf/zjHxUWiHv27Fnul06VPQ5xcXHG5/v37298r5EjR4pXXnnFWBiuaF8MADz0ehCibGHyyy+/LFPMvV9VC5Pl3YNmzZolhCi95g3FQCGESE1NFTKZTOTm5hofO3LkiPDz8xNCCPH222+Ljz76yPhcVlaWkEgk5RYmq7oP97921qxZYuDAgWWeDw0NFT/88IMx9/330fPnzwsbGxshhKjSPUImkz3we7lfZGSkaNGiRZnHwsLCxMcff2zM3LVr1zI5AIjk5GTjY0FBQWLHjh2PtX11XgvPgsp8TuVQ7qeM+P/DKkNCQtCmTRtjt3agtNv777//juTkZACl86JQ5fx9/h7Dz4ah3MOHD8eFCxcQFhZm3Kauzb9nyDNgwABs3rwZM2fOxDvvvINr164ZhxIIDsulcgwcOBArVqyAnZ0dZDIZioqKnmiifSKix6HT6dCrVy/07dsXBQUFyMnJgaenJ0aMGGHqaFTPJCUlwdPTs8wikz4+Pg9sJ5fLMWDAAOzevRvbt283Pm4YGg0A1tbW5T72sKl+GjRoUOZnHx+fMnNYlpflfg4ODmXmPre2tq5S+w8zYsQI7Nq1C7m5uThw4AAsLS3RuXNnAKXDaCdNmgQ7Ozvjv+TkZCQmJiIgIACrV6/Gt99+C1dXV/Tt27fMcNj7JSUllTkGf9/fPXv2oF27dnBwcICdnR127dqF9PT0CjOnpqZi2LBh8PT0hK2tLYYPH/7Q7R+lqnkcHBwgl8uNP99//FeuXInCwkK0bNkSTZo0qfDzVXp6OtRqNXx9fY2P+fr6Ijk5uczn979nq8q5UFhYiLFjx8LHxwe2trbo3Lkz8vPzy/w9+TCJiYkP5NPpdGUWIqrseXjnzp1yV4Gv7HGoqJ358+fDw8MD3bp1g7+/P+bMmfPI/XrU9VaZ3I+jvHtQRfeB27dvQ6fTwdvb23jt9evXD2lpaQAevKbs7OwqXEzqSfbh7+cAUPr7uT/33383hr9Dq3KPcHR0RFJSUrXmKO+x+8/PqmxfndcClWJh8ilzfzEsMTERFy5cgF6vR0xMDDZu3IhZs2bho48+AlC6gAsLUY+m1+shlUqRn5+P9evXA4BxEaEmTZrg448/xmuvvWbctq4eU8N/2AYNGoQGDRpg06ZNSElJgb29Pe7evVtmG6K/27ZtG7Kzs2FhYYHTp0/j119/Na7EWFfPeSKqX7KysnD37l384x//gKWlJWxtbfH+++9j9+7dpo5G9YyHhwcSExPLfIlvmGuvPFqt1ji/e3X4e1t37tyBp6en8eea/PL7Ue/t7++PFi1aYNOmTVi7di3eeOMN42saNGiA//73v8jOzjb+KywsxOuvvw4AeO2113D48GGkpKQgICAA7777brlteHh4lDkG9///kpISDBkyBOHh4UhNTUV2djb69u1r/CxSXv5p06ZBr9fjypUryM3Nxbp16yr12aWiY3H/44/K8ygBAQFYu3YtUlNTsXjxYowbN67cxTucnJwgl8tx+/Zt42O3b9+Gu7t7mTxPcm4sXLgQ169fx6lTp5Cbm4ujR48CwEOP7f08PT0fyGdmZlamAFNZDRo0eKLjUBEXFxcsXboUCQkJ+PXXXzF//nz8/vvvACr3+wYAGxsbFBYWGn++v9hUUe6HvX9FyrsHVXQfaNCgAWQyGdLS0ozXXk5OjrHY9fdrKjs7G7m5ueW2+yT78PdzACj9/dyf+2Eqe494+eWXsWnTphrL8aSq81qgUqxSPMUaNmyITp064dy5c1i8eDFSU1Oxbt06uLm5Yfbs2QDqXq++ushQrBsxYgTS09PLfNBISEgwftskhICZmVmdP6YFBQW4cuUK9u7di507d2Ljxo145513WFyichnOCx8fHwQEBKBXr16YP39+mQnt6/o5T0T1g5OTE/z9/bFkyRKo1WoUFBRg+fLlxonuiapL+/btYWVlhXnz5kGj0eDAgQPGhTfOnTuHgwcPoqSkBGq1GitWrMDJkyfxwgsvVFv7ly9fxk8//QStVovIyEjExMSgT58+1fb+D+Pq6vrIIuuIESPwww8/YOvWrWV6LL///vuYM2cOLl++DCEE8vPzsWPHDuTl5eGvv/7CgQMHUFxcDAsLCygUCuMX/X8XFhaG2bNn4969e7h37x7mzp1rfE6tVqOkpATOzs6QyWTYv38/9u/fXyZ/VlZWmcVt8vLyYGNjA5VKhaSkJCxYsKDajsWj8jzKmjVrkJaWBolEAjs7O0gkknKPi1QqxbBhwzB16lRkZ2cjNTUVn332mXERmuqQl5cHKysr2NnZISsrC7NmzSrz/KOOx/DhwzFr1iwkJycjNzcXU6ZMQVhYWJneopX13nvvYeHChTh16hSEELh79y5u3rz5xMdhw4YNSEhIAFDaa1AqlRqPd2V+30DpyMTIyEjodDqcOHEC27ZtK3MMjhw5gsjISGg0GuTm5uL06dPG909MTKx0D9TMzMwy96A9e/Zg6NCh5W7r5uaGvn37Yvz48cjKyoIQAgkJCcb7VlhYGFavXo1Lly6huLgY06ZNq7BDypPsw2uvvYZ9+/Zhz5490Gq1WLt2LW7evIkBAwY8cn+rco+IiIjA6dOnER4ebuw5eefOHYwfPx5XrlxBnz59cPv2baxcuRJarRYHDhzAjh078MYbbzwyR3WozmuBSrEw+RS7c+cOfv/9d6xbtw7p6emYNm0anJ2d4erqig4dOhi3Y0GqfPcfl+PHj6OgoADvvPMOJk2ahNmzZyMuLg4JCQmwt7cH8PQUZ5ycnCCVSuHh4QGg9BucPXv2PDBcnQj433ndqVMn9OzZE1FRUVi0aBF8fHyMKxnyHkJEtWXLli04fPgw3Nzc0KBBAyQlJWHNmjWmjkX1jLm5ObZt24ZNmzbB3t4eX3/9tbHwodFoMGHCBDg5OcHV1RXff/89Nm3ahJCQkMdq6+jRo1AoFGUee+WVV7B7927Y29vjyy+/xJYtW+Dg4PDE+1UZU6dOxbx582BnZ4dPPvmk3G3CwsJw7tw5BAUFoXHjxsbHBwwYgBkzZmDUqFGws7NDUFAQIiMjAZT2LJw+fTqcnZ3h6OiIU6dOYdmyZQAePAaffvopGjVqhODgYHTo0MHY4xIAlEolvv32W7zxxhuwt7dHZGQk+vfvb3y+YcOGGDFiBAIDA2FnZ4cbN25gxowZuHLlCuzs7NCnTx8MHDiwUsdi/Pjx2L59O+zt7TFq1Khyt3lUnkf57bff0KxZMygUCgwdOhRLliyBn59fudsahrg+99xzCAkJQdu2bfHpp59Wuq1HCQ8Ph1qthrOzM0JDQ9GjR48yzz/qeEybNg1du3ZF69atERgYCFtbWyxZsqRSbd+5cwcKhcLYq2/w4MH4/PPPjasgd+vWzbjy/ZMch/Pnz6N9+/awsbFBly5dEB4ebvxSoTLnvqH933//HXZ2dli4cGGZ89Pb2xt79+7FsmXL4OzsjOeeew6HDh0CAHTv3h0tWrSAu7s77OzsHroqNwC0bt0aaWlpcHZ2xnvvvYeVK1fiueeeq3D71atXw9zc3LjiuuFzOwD07NkTU6dORd++fdGgQQMEBATAycmp3Pd5kn0IDg7Ghg0bMHnyZDg6OuKbb77Brl27KmzrflW5R/j7++P06dNITU1F8+bNYWtri549e8LR0RGBgYFwcHDA7t278d1338HR0RHh4eH46aef0KhRo0fmqA5Pci1Q+SSCf3E+tY4fP47du3ejdevWWLJkCYYMGQIrKytkZGSgdevWuH37Nl599VVYW1tDr9dzGO99hBDGgkx8fDx8fHwwb9485Obmol+/fti8eTNcXV0xceJEEyd9PIMHD0Z0dDScnZ3h7OwMa2trrFixgucAPdSJEyfw22+/oUmTJsjLy8OKFStw7NgxU8ciokfIzc2FSqVCTk5OhXNK1Ud3796Ft7f3M7ff9HSLiIjAzZs38csvv5g6ChGZyKpVq7Bs2TKcOnXK1FGIalxlPqfKajkTVaOOHTsiODgYzs7OAEqH8MpkMigUCly+fBkHDx7EiRMnsGzZMhak7nN/UXL69Om4efMmJBIJ5s+fDz8/P1y5cgUnT56s9DCQusSwb7NmzYJKpYKnpyckEgkuXLiAyMhI40TXlflWi54906dPx8KFC9GyZUsApfNO3rhxo9a+fSSiJ1PRfFL1VV5eHoBnb7/p6VZSUmIcPklEz6aioiLodDreB+iZUJnznD0m6xGtVovVq1cDKB120K1bNyxfvhzjx4+HQqEoU5AjYP369fjjjz/w3XffYdOmTfjpp5+wcOFCXLhwAVKpFAMHDnxqj5larcbPP/+MgwcPwsbGBidOnIBEIsGSJUvQsGFD2NnZmToi1SE6nQ5SqRQffPAB3N3d4eHhgUuXLkGtVuPTTz81TgtARHVTcXEx/Pz8ykzQ/6ywsLCo9HxeRERERFT73NzcEBcXB0tLy3KfZ4/JesRQYw4ODkbnzp1x7NgxKJVKLF68GP369UOTJk04pPs+8fHxKCoqQkJCAl599VXk5ORg27ZtCA8PN27ztBUlDYXUY8eO4ffff8fChQvh5OSEU6dO4Y8//kC7du1MHZHqIMN5Hh4ejgMHDsDe3h5vvvkmgoODoVKpTJyOiB7F0tIScXFxUKvVpo5S6/i5hoiIiKhuk8vlFRYlAfaYrDcMBamCggLY2Nhg27ZtOHr0KPr164dr165h//792L59u6lj1gkHDx5Eeno6XnzxRaxatQrx8fHo2LEjNmzYgAEDBlTr6nemkp2djfXr12Ps2LHQaDSIi4vD6dOn68W+Uc0qLi7G7NmzcfnyZRQWFuLll1/GhAkTIJFInrpCPREREREREdVt7DFZTxgKBtbW1gCAK1euYOTIkXj++efh7++Po0ePIikp6Zkfkrl37158+eWXePXVVzFs2DAsXrwYR44cwYYNG9C7d+96Ubg7efIkkpOTUVxcjHnz5iE/Px95eXkQQmD48OHsWUIPtWTJElhbW2POnDlo1KgR+vbti2HDhsHb29vU0YiIiIiIiKieYWGynjEUKE+cOAE/Pz/cvn0bp0+fxgcffABXV1eo1WrI5XLjnHLPAkMvUqB0ouE5c+YgMzMTFhYWCAoKgre3N1xcXLBt2za0adMGzz//vIkTP5lz587B3Nwct2/fhkKhQJ8+faDX67FgwQLExsYiMDDQ1BGpDjIMh7x8+TLee+8944I3fn5+mD9/PtLS0vDSSy/hnXfeMXFSIiIiIiIiqi9YmKxnDMWFRYsWYd68eXBwcEBYWBh27dqFRYsWQSaTYeXKlbCyssLNmzfRsGFDU0euUdeuXUN+fj5KSkpw8uRJ+Pr64quvvoJarcamTZswbdo0yGQyzJw5EwUFBcjIyDB15CcWFhYGFxcXjBkzBoMGDUL79u1RXFwMCwsLJCQkIDAw8Kld1Idq3tChQzF9+nTY2NggMzMTLi4uGDRoENzc3NC1a1dTxyMiIiIiIqJ6hHNM1kOG4mR+fj5sbGwwevRoyOVyfPPNN1i1ahWSk5NRWFiI98QivwAAIABJREFUqKgobNiwARYWFqaOXGPS0tIwb948REZGYv78+Rg5ciTmzp2Lu3fvwsPDA9euXcO6detMHbNGrFu3DuvXr4ePjw8sLCwQEBCAUaNGGYf7E93PUKxOS0vDqlWr0KNHDzRv3hxFRUU4c+YMunXrZuqIREREREREVM+wMFnPxcbG4ssvv8T3338PANiwYQNmz56Nd999Fx988IGJ09Wc+4eqX79+HXPmzEGPHj3QvXt3eHp64uDBg/Dx8YGvry/MzMyg1Wohk9WvDsQlJSUoKipCSkoKEhMTodfrodFojEO7OdckPcyiRYuwf/9+CCHg4uKCBQsWwMHBwdSxiIiIiIiIqB6pX5UYeoCLiwtiYmJw+PBhHDx4EIcOHcKKFSvg7OyM1NRUuLq6mjpijZBKpdDpdHjxxRexZMkSLF26FCtWrMDOnTuRlpYGiURi7AGm1+vrXVESACwsLHDo0CH85z//gUKhgEqlQuvWrQGARUl6qJUrVyIxMRFr1qyBvb298XFOAUBERERERETVqf5VY8hIp9NBoVBg3rx5GDVqFEJDQ/Hbb79h9+7deP/99/HFF1+gR48epo5ZYxYsWID+/fujcePGOHLkCJo3b46UlBTk5OSUWX27vhbptFotZs6cie3bt8PR0dHUcegpYOhJq9fr4eTkBHt7e+Tk5ODw4cPIzMzEW2+9xd62REREREREVG1YmKzHDEOZW7dujfXr16Np06aYNGkSUlNTsX37dri5udXr1bllMhni4uLw/vvvw8vLC2q1GhMnToRSqQSAel9gkclkaNGiBTIzM1FUVIQbN27g/PnzGDlyJDw8PEwdj+ogw/XQq1cvDB06FJs3b4aDgwMCAwMREhJSZhsiIiIiIiKiJ8XCZD1nGHrZtGlTXL9+HYWFhVizZg0A4NixY5g+fTq+//57BAcHmzhp9TEUWydMmIDDhw+jVatWuHfvHgYMGIBRo0YZC5P1ucBiKLqam5ujZ8+eaNSoEZo0aQJbW1vY2tqaOh7VcV5eXvj3v/+NNm3awMvLC7GxsXB3dzd1LCIiIiIiIqpnuPjNMyQmJgZvv/02Vq1ahV9++QXnzp3D5MmTERoaatzmaexFeP+8dxkZGXB0dIRer0d4eDhGjx4Nd3d3jBgxAhMmTECvXr2eiXnyDPuYk5MDhUIBAMjKysLJkydx7NgxeHl5oXfv3ggMDDRxUqqLDOfP119/ja1bt0KlUqFBgwbo1q0bhgwZUq97WhMREREREVHtYY/JZ4Rer0dAQACmTp2KPn36oGvXrli/fj3Wr1+PX3/9FU5OTpg0adJTV5QEYCwyzpkzBxKJBJMmTcK2bdtgb2+PFi1aAACWL18OPz+/MtvXZ4Z9NDc3x7Jly3DmzBk4OTkhPj4e165dw/bt2+Hi4mLilFRXSSQSnDhxArdu3cLhw4cBAHv37sXSpUsxZMiQZ+IaIiIiIiIioprHwuQzwlBw7N27N+zs7NC+fXu899570Gg0mDlzJsaMGQNHR0eMHj3axEkfz9WrV3HgwAHs27cP165dg7u7Ozp27AigtPeXoSj5rElKSsLevXuxZcsWyGQyaDQaRERE1Kuh+1S9DL0llUolLl68aHy8V69eiI6OBlC/p0EgIiIiIiKi2iONiIiIMHUIqh16vR4SiQTe3t64ceMGoqOjsWjRIqhUKvTr1w/Jyclo3LgxANT54c5arRY3btyAi4sLrl+/jj///BNpaWnIycnBxo0bodPpYGZmBl9f3zq9HzVNpVJBrVajVatW0Gq12LlzJzw8PNCoUSNTR6M6ynC9uLq6oqioCG3btkVmZibOnDmDTp06GedoJSIiIiIiInpS7DH5DLm/l1N2djb27NmDiIgIWFhYQKFQQKFQIDo6GkFBQZBIJHW6OKnT6bBv3z589dVXSEtLw9q1a+Hs7Ax3d3e8/fbb6NevHywsLPDCCy+YOqpJSaVSREdHIykpCYmJiTh//jxSUlIQEhICX19fU8ejOu7DDz/EqlWr8NNPP8HBwQHt2rWDr68vBg4cyHkmiYiIiIiI6IlxPN4zqn379njzzTdx6NAhXLhwAaNGjcLBgwfxww8/YPz48QDq7lyMOp0OFhYWGD16NFJSUmBtbQ2VSoXu3bujUaNGmDt3LoKDgzF27FhTRzUpvV4PAEhMTERaWhq2bt2Kd999FxYWFjh+/HiZbYjKc+bMGaSmpuLdd99Fo0aN8NJLL2HdunUA6u79gYiIiIiIiJ4eLEw+gwzFqAkTJqB79+64ePEigoKCkJqairlz5yI1NRVxcXEmTlkxqVSKe/fu4ZtvvsHGjRvRuXNnTJ06FUVFRdi+fTuee+45fPXVVwBKi5jPKiEEAMDX1xfff/89VCoVJBIJhg0bhoCAAACcK5DKZzh3bGxscP78efTu3Ru7d+/GqVOn4OPjA4DnDhERERERET05/mX5DLq/oKDVarFv3z58+OGHcHNzQ/v27eHv7w83NzcTJizf/b37nJ2dkZCQgE8//RQffvghfH19MWHCBGzZsgUDBw40bv8sDzU17PvEiRMxcOBABAcHw9LSEp07d0br1q1NnI7qMkNvyCZNmiAhIQFyuRzPP/880tPTER4ebuJ0REREREREVF9IhKFrDD2zli9fjuPHj2PlypVYsGABRo0aVScLk0BpD8hffvkFw4cPBwB88sknuH79OiIjIyGEgFwuh7m5eZ2eH9NU7t27h7Fjx+LXX3+FXq9njzd6KMM1FBUVheDgYGRlZcHe3t7UsYiIiIiIiKgeYWWC8N5778HLywsZGRn417/+VeeKkvf3lNRqtfjpp58wZ84cAMCsWbMghMCUKVNgY2PDouRDODg4wNnZGQCH4dKjGa6h4OBgAGBRkoiIiIiIiKode0w+4+p6z7n7i4zz58+Hq6srunbtiqlTp8Lf3x9NmjTBgQMHMHPmTHh6epo4bd2XkJCAvXv3wtLSEkOGDIG1tbWpI1EdptFoIJPJWOgnIiIiIiKiGiGNiIiIMHUIMp26XnAw5Js3bx5iY2MhlUrx119/YfTo0cjOzsbt27fxxhtvoGHDhtDr9XV+f2qbobA7ceJEWFlZQSKR4MKFCzh//jysra0RHBzM40YV+vPPPxEbGwtvb29TRyEiIiIiIqJ6SGbqAGR6hk6zdak4VVJSAgsLCwDAH3/8gatXr2LYsGHo27cvNm7ciKVLl+Kbb74xLvBS13t+mophASAbGxvExMQgLy8P7du3h7u7Ow4ePIi+ffuaOiLVYRqNBtHR0ejQoYOpoxAREREREVE9xErOM04IgaKiIkRHR5s6ipFarca+fftw69YtbN68GUlJSQgNDcXly5dx8eJFdOjQAbdu3UJCQoLxNSxKls9QbG7Tpg22b98OtVqNgIAADBs2DP/6179MnI7qOg8PDyQnJ4MzfhAREREREVFNYI/JZ5xEIsHdu3exdetW/OMf/4CVlZWpI0Eul8PV1RXDhg1DZmYmoqKiIJPJsHz5cqxYsQI5OTkICwuDr6+vqaPWeYaCbd++fdGyZUucPn0aBQUFOH78OLZt24aOHTsiNDQUoaGhJk5KdZGLiwu0Wi0yMzPh6Oho6jhERERERERUz7CbGSEoKAgeHh44dOiQqaMYNWvWDF9//TXatGmD3377DQAwZswYNGnSBEFBQWjVqhVKSkpMnPLpIZFIcOXKFURFRWHLli3Izs7G8ePHERgYiAYNGpg6HtVRUqkUrq6uSEpKMnUUIiIiIiIiqodYmCRIJBL06tUL58+fR1pamkmz6HQ65Ofn45VXXkF6ejqWL1+OzZs348cff8RHH32Epk2bIjAwEJs2bUJ+fr5Jsz5tvv32W3Tu3BkTJ07E//3f/2Ho0KFo2rQp3N3dTR2N6jAPDw8WJomIiIiIiKhGcFVuAgBYW1ujuLgYFy9eRPPmzWt9IRzDytBarRZWVlbo1KkTFixYAJlMhokTJ+Kvv/6CQqHAq6++imbNmiEwMBBubm61mvFpZViZWyaTwdPTE3fu3MGxY8fQoUMHNG3a1NTxqI4rKChAdHQ0QkJCTB2FiIiIiIiI6hkWJsnI09MTBw8ehL29PZydnWutXcOK2pmZmRgzZgyCgoLQqFEj9OjRA5MmTUJaWho+/PBDtG3b1vgalUpVa/medoYic+PGjXHgwAF89913iI2Nhbm5Oa5fv44WLVpAp9NxASEql5mZGY4cOYKOHTvW+hcWREREREREVL+xEkFGlpaWeOmll7Bv3z5oNJpaaVMIATMzM6Snp+OHH35AdHQ0pk+fjlOnTsHBwQFDhgzhMNInZFhReffu3bh8+TL+/e9/o1GjRujZsyf27NkDACw4UYWcnZ0hhEBGRoapoxAREREREVE9w8IkldGiRQsoFAocP368VtqTSCTQ6XQYNWoULC0tMX36dCiVSoSHh2Pq1KnYt28fvv76awD/K7BR1RiKjv7+/rhz5w4aN26Mn3/+GWvXrkXjxo0BgL0lqUJmZmZwd3fnFwRERERERERU7ViNoDIkEgl69+6N48ePIzs7u1balEql6NSpE/r3748hQ4Zg7NixcHV1xdChQ7F+/XrI5XLjHJT0+AIDA2FnZwcHBweMGTMG3bp1wyeffGLqWPQU4AI4REREREREVBNYmKQHeHl5oUmTJti/f3+ttenv74/169fj7t27yM7Ohr29PRo2bAilUmkc7k1PRi6XY/78+bh69SpcXFxw7do1TJo0CSkpKaaORnUcC5NERERERERUE2SmDkB100svvYRFixYhLi4Ofn5+Nd5eWFgY1q9fj59++gn79+/H/PnzYW1tbVxRmp7MokWLcOLECchkMmi1WjRu3BhKpRLp6emIj4+Hm5sbjzVVyMPDA8nJycaFqoiIiIiIiIiqg0Rw4j6qwIkTJ3Dp0iWMGzeuVosReXl5UCqVLIJUo5iYGNja2mLnzp3Izc3F+PHjAQCLFy9GVlYWPv74Yx5vqpAQAl9++SXGjBkDV1dXU8chIiIiIiKieoJVCKpQaGgodDodzp07V2ttCiGgUCgAcEGW6uTv7w9nZ2eoVCqcO3cOO3bswPz58/Hbb78hJCQEAI83VUwikXA4NxEREREREVU7DuWmCkmlUvTu3RubNm1CkyZNcPv2bWi1WjRv3rzG2tTpdJBIJJBKpTXWxrPIMES7V69eUKvVOHToENq1a4c33ngDnp6eJk5HTwNDYdJQyCYiIiIiIiJ6UuwiRQ8VGBgIlUqFjz76CBEREdW+IE5JSQni4uIAAHfu3MGSJUtQXFxcrW3Q/1hbW6Nt27ZYuHAhQkJCMGvWLEyZMgV6vd7U0aiOY49JIiIiIiIiqm4sTFKF9Ho9tm7diqNHj+LMmTOwsbFBXl5etbZx5coVRERE4Oeff8b69esRGhoKGxubam2DygoLC0NOTg62bduGsLAwREVFISoqytSxqI7z8PBASkoKdDqdqaMQERERERFRPcHCJFWopKQEFy9ehFarRXBwMJKSklBQUFCtbSQmJqK4uBjffvstLl26hICAgGp9f/ofQ6/I5557Dps3b8aNGzfQqlUrNGjQwFiY5FpYVBF7e3uYm5sjLS3N1FGIiIiIiIionpBGREREmDoE1U3m5ubo2LEjrKys8NdffyE+Ph5SqRRDhw41zlkIAFqdHml5JYi5l4+Ld7JwLj4Tl+5k43pyDu5mFqFQXdrDytLcDGZmkjJt7NixA7GxsZBKpZBKpbh48SLatGkDa2vrWt3XZ4EQAhKJBHK5HBcuXEBoaChCQkLwwgsvoGnTpgBQ5vdKdD+JRILY2FhYWlrCw8PD1HGIiIiIiIioHuDiN/RQcrkcAwYMQEhICObOnYtLly5Bp9NBJpMhLa8Y5+OzcDwmA/nFGphBAkgAc6kZJBJACECj0wMC0ENAYWmOjgGOaOVjDxelJYQQuHr1KpKTk+Hl5QVHR0f06NEDKpXK1LtdLxlW3e7Xrx9atWoFd3d3AIBMxtsAVQ7nmSQiIiIiIqLqJBEcu0mVpFarceTIEYS064wdl5NwOTEbgASONnJYy6UP7W0nhEChWoeMAjUAgeaedmjrJsWoYUPg5eWFMWPGoGvXrlAqlbW2P8+qe/fu4dChQxg6dCiA//WkJHqU69ev48iRIxg3bpypoxAREREREVE9wMIkVZpeL3D2dia2XEqERqeHm60lZNKqT1Oq1emRklsM6HWwSbuGj0YOhB17Sdaq1NRU5ObmQqlUws3NzdRx6CmRnZ2Nb7/9FlOnToW5ubmp4xAREREREdFTjmM4qVJKtDqsP5uAs7cz4ay0gNLS8rHfSyY1g5e9NfKKNbjn1Azbr2chrI0CFjJpNSamh3F1dcVXX30FKysrpKSkoHv37ujevTvkcjlsbW1NHY/qKJVKBUtLS6SmpsLLy8vUcYiIiIiIiOgpx1W56ZFKtDpEnozH+fgs+DjaQGlZPT2llJbm8HG0wbn4LESejEeJVlct70sPp9OVHmeNRoPmzZtj2bJlcHFxwfvvv49Ro0bhxIkTJk5IdZVEIuE8k0RERERERFRt2GOSHkqvF1h/NgFXE3PQwMH6gVW1n5TUTAIfB2tcSczBhnMJGN7Wp9rboLKkUilyc3Px119/ISoqCvv27UNMTAycnZ2h0+mQlpZm6ohUh3l6erIwSURERERERNWChUl6qLO3M3H2diZ8HG1qrGBoZiZBAwdrnInLRLCLEqH+jjXSDgF6vR5mZma4desWLC0tYWtriylTpsDJyQkKhcK4HRfEoYp4eHjgxo0bpo5BRERERERE9QALk1Sh9PwSbLmUCGelBaQ13ItRaiaBs9ICWy4lItBFAUeFRY2296wyMyudvaFly5bYvHkzYmJi4OvrC6C0GGn4J5Vyvk8qn4eHB+7duwe1Wg25XG7qOERERERERPQU4xyTVKGdl5Og0emrbU7JR1FamkOj02PnleRaae9ZptfrsWfPHgQEBECr1Rp7SJqZmbEoSQ+lVCqhUCiQkpJi6ihERERERET0lGOPSSpXWl4xLidmw0NlVenXlBQVIvbqWaTdjUNJYQHMLSyhtHdCwzadoVA5VOo93GwtceluFnrnucFF+fgrf9PDmZmZobCwEAAgk5XeBjQaDQuTVCmGBXAaNGhg6ihERERERET0FGNhksp1Pj4LgAQyaeU61RbkZuPs/i2QmEngGdAIljZKaEqKkZuRBk1JcaXbLW1PggvxWejV1P3xwtNDabVaxMTEIDg4GOvXr8eff/6J7OxsxMXFYdq0aWjfvj3nmKSH4srcREREREREVB1YmKQHaHV6HI/JgKNN5eePu3rsN8gtLNG25xDInnDeOUcbOY7FZOClRq6VLoxS5a1fvx579+6FQqFAYWEhXnzxRXh7e+PAgQOIjo5mYZIeycPDA1evXjV1DCIiIiIiInrKsepDD0jPVyO/WANreeWG9GYkJyA7PQWBLUIhk8uh02mh02kfu31ruRQFxRqk56sf+z2oYoMGDUJkZCQGDx4MFxcXjBw5Et26dUOnTp1w9OhRU8ejp4C7uzsyMjJQUlJi6ihERERERET0FGOPSXpAam4xzCCpdI+59KQ7AACZ3AJn9m5G1r1kCCFga++EoJYd4OzpU6X2JRIJJJAgNbcYbirOM1ndrK2tAQB+fn7466+/8OGHH6KkpASxsbGIiIgA8L/Vu4nKo1AoYGtri+TkZOOq7kRERERERERVxcIkPSApuwiowijewtxsAMClw3ugcnJF8849oSkpRszVc7jwxw60fmkAHN29qxZCUpqjubdd1V5HlRYYGIjly5cjPj4erq6u8Pb2Rl5enqlj0VPCMM8kC5NERERERET0uFiYpAdkF6lhXoW5HbVaDQBAYWuPlt36QV1SAgsLCzi4e+HYtnWIungS7atYmDSXmiGnWFOl11DVHTp0CNeuXUN0dDSysrKQnJyMnTt3wsenar1c6dnDBXCIiIiIiIjoSbEwSQ/Q6gSqsu6JmbR0LkqPgIaQSCS4cvUq5HI5ngsOhr2zO7LuJUOrUUNmXvlFcSQSQKMTVY1OlaTX62FmZgYhBBo3bow333wTCoUC69atQ1JSEnx8fLgADj2Uh4cHLly4AAA8V4iIiIiIiOixsDBJD5BJJRBVqAlaWisAAHKr0rkLQ0JCEBcbh7PnzkFdVAyh11e5MCkEYC5loaOmGIqSr7/+epnHvby8kJWVBQAsNFGFMjIycO/ePZw6dQoFBQXIyMhAeHg4vL2rOGUDERERERERPdNYmKQH2FnJodHpK729yskVCVHXUFKQDwCQyWQICg6Ci4sz9vx0CsV52SjRaFGVZWw0Oj1UluZVTE5VIZFIIITA7t27sWTJEhQVFcHb2xvjxo0DwF5wVD6NRoPZs2fj7t27iImJgU6ng5WVFezt7U0djYiIiIiIiJ4yXHqXHuBhZwVUoceki5cfZDJz3L11HXr9/wqaEr0GdgpLuHj74srlq4iNjYVep6vcm4r/n4NqVHp6On799VfMnTsXu3btwueff46xY8eaOhbVYebm5hg8eDCkUim8vb2hVqsRFBQEhUJh6mhERERERET0lGFhkh7gamsJPQREJcdzyy2tENyqA3Iy0nBm32bE37iMW5dP4+z+LZDJzNGp10C0bBmC7KxsnDt3HtnZ2Q99PyEEBARcbavSx5Ieh1KpxOHDh9G0aVNYWVmhpKQEjRs3hl6vZ29JqlCHDh3Qt29f5OfnIyMjAy1btjR1JCIiIiIiInoKcSg3PcBJIYfC0hyFah1sLCp3ijR47nmYyy1x+/pFRF04DomZFA6unggKaQelvRMAIKRlCBITE3H16lW4uLggICAAMtmD71+o1sHG0hxOisrPSUmPx9LSEhMmTMDYsWORnJyMkpIS/Oc//4GZGb+zoIpJJBIMHToUly5dwv79+xEcHGzqSERERERERPQUkojKdoujZ8qea8nYfz0VDRysq/29i4uLERUVhfz8fAQHBcHJ2bnM83cyC9GzsSt6NXWv9rapLMM8khcuXMDVq1fh4eEBPz8/uLi4wNbWlvNM0kMlJSXhk08+wdKlSyGX84sEIiIiIiIiqhoWJqlcaXnF+HLPDXiorCCT1kDvOSGQmpaGW7duwU5lh6CgQMgtLKDV6ZGUU4SpvRvBRcmh3LXh5s2bWLNmDZydnSGVSnH48GH06NHDuAgOUUW0Oj3S89VIzS1GUnYRsovU0OoEZFIJ7Kzk8LCzgqutJZwU8pq5jxAREREREdFTjUO5qVwuSks097TD1aQceNlXf69JSCRwdXWFg709bsXE4MzZswjw94fO0g4tvOxZlKwFer0eZmZmuHXrFhITEzF79mwAQOPGjfHrr79CrVYjLS0NXl5eJk5KdU1aXjHOx2fheEwG8os1MIMEkADmUjNIJIAQgEanBwSgh4DC0hwdAxzRyofXNhEREREREf0Pe0xShTLySzB//1+wlkuhtDSv0bYyMzJw9WYMzCwsMW94JwR6u9Voe/Q/MTExmD59OoYNGwZLS0vs3r0bOp3OOG/g+PHjTZyQ6oqM/BLsuJyEy4nZACRwtJHDWi596HB/IQQK1TpkFKgBCDT3tEP/5h5wVFjUWm4iIiIiIiKqm1iYpIc6HZuBtafj4eNoA6lZzc01qNML3E7PQ7BZGvJiLqJLly7o0KEDpFJpjbVJpXQ6HT788EMolUqoVCo4ODjA1dUVtra2cHNzQ9OmTU0dkUxMrxc4ezsTWy4lQqPTw83W8rGGZmt1eqTkFsNcaoZBLTzRxtcBZjV4XyEiIiIiIqK6jYVJeii9XmDt6Xici8+Cj4N1jRQR9HqB+MxCtPG1x/C2PkhKSsT27dshkUgwaNAguLmx92RNy8zMhIWFBbKzs5GTk4Pbt2+jZ8+eLAwTSrQ6rD+bgLO3M+GstKiW3tN5xRrcyytBG18HhLXxhoWM5xkREREREdGziHNM0kOZmUkQ1sYbaq0eVxJz0MDBulp7Tur0AncyC/G8pwqvtfaGmZkEXl5eGDt2LI4dO4b8/HzjXIhUc65fv45ly5bBzMwMTk5OkEqlaNSoEfz8/EwdjUyoRKtD5Ml4XE3MqdZe00pLc1jLZTgXnwW1Vo832/uwOElERERERPQMYo9JqpQSrQ4bziXgTFz195pq6+eA11qX32tKCPHQ+evoyRiKvvPmzYNarcbHH39s6khURxh6S5+Pz0KDWuotzWHdREREREREzxZpREREhKlDUN0nMzNDMw8VnBQWuJqYg+xCNazl0scqJGh1eiTlFEEvgLDW3ujVxB3mFcxX9/eiZGFhIWJjY+Hk5PRY+0FlGQq/xcXFOHPmDPR6PRISEnDw4EHcuXMHDRs2ZHH4GXUmLhN7/0xBgxqcX1YikcDW0hzXk3LhpLCAl711jbRDREREREREdROHclOlmZlJEOrviEAXxROvzNvCyx79nnev8sq8crkc8fHxcHBwgLOz85PtEBmHyDdu3BgbNmzApUuXIJPJYG9vbzy+LEo+e9LzS7DlUiKclRY1uugVAEjNJHBWWmDLpUQEuii4WjcREREREdEzhEO56bGl5RXjQnwWjsVkoKBYAwkkgAQwl5pBIgGEADQ6PSAAAQGFpTk6BTohpIEdXJSWVW4vPT0dcXFxCAoKgp2dHQBw/slqlJubi9jYWERFRcHOzg7BwcHw9fU1dSwygVXH43A1KadWezDezSrE8552GNXBt9baJCIiIiIiItNiYZKemFanR3q+Gqm5xUjKLkJOsQYanYC5VAKVpTk87KzgamsJJ4UcsgqGbD+KEAJHjhzBwYMHsWXLFgwbNgxvvvkmvLy8ymzD3n2Pb/v27di2bRs6deqExMREnD59GosWLWJx8hmTlleML/8fe3ceH1V973/8fWbLZE8m+x422SGsYgQrm4CKu1asV1Hb2nqt26+i19qCFYsVqrW49rYu16WiICoKyqKigCA7ASITb3ajAAAgAElEQVS7CUnIvu/JzPn9QZOCoEIImSyv5z+ak3PO93OGySOPeefz/X6XpSs22PeUf17ra2t0aOdm5WcdUm1VpWwOhwJDwpTUb4gi4pJO6R5NSzz8z5S+LfrDBQAAAACg42EqN86YzWpRdLBT0cFODU4IOStjFBQU6MiRI9q6davi4+MVGRmpK664Qh999JGioqIkHZ1yTAfl6Wt6zZYtW6bLL79cl112mSTptttu0/r165WcnMzr2oVsziiRZJxyKOlubNCGjxeqtqpC8b36KzA0XPV1Ncrat1ubV32gQWMuUmy33j96n6PjGdqSUaLJA2LO7CEAAAAAAB0CwSTaPdM0tXXrVmVmZqqqqkrPPPOM+vTpo4yMDEnSpk2btHbtWv3qV7+Sjw/r052upqbp4OBgHTp0qPn4rbfeKqfzaOcaoWTX0Oj2aO2BIoX5O075moKsb1VVXqo+w8couV9K8/G4Hv20etHLytq765SCSUkK83dozYEiTegb1eLuagAAAABAx0EwiXavpqZGVVVVWrZsmYYMGaI+ffooLS1NR44cUVRUlK688kqFhYVp0aJF+sMf/qAJEyZ4u+QOxWq1SpLuv/9+BQYGqqKiQunp6fLz89OQIUO8XB3aUmFlvSprG+Rynfrako0N9ZIkp5//ccftPk5ZLFZZbaf+a8bPYVVRZZ0KK+sVHcx0bgAAAADo7Agm0e75+fnpqquuktvt1siRIyVJCxYs0NVXX60NGzbI5XJpyZIlWrx4sYqKinTgwAGVlpZq2LBhXq68YwkLC9OHH36oxx9/XOnp6br55pu1aNEizZ49m/U7u4i88lpZZJzWv3VoVJwMw9DerV/JarM3T+U+tGuLTNNUtwGn/nNoGIYMGcorryWYBAAAAIAugGAS7V5TKHbttddKknJyclRSUqLJkyfr3HPP1ezZsyVJpaWl2rFjh3x9fbVt2zYNGzZMhw8fVkJCgiQ1r5cYHR3ttWdpzxoaGvT3v/9dX3zxhS655BI9+eST6tOnj2bOnCm73e7t8tAGckprpNPMn/2DQpRywRSlb/xCmz9d0nzc6RegkZOuUnB41Ond0Dhax9larxYAAAAA0H4QTKLd+273VmxsrJ599ll98cUXKisr0759++Tj46OlS5fqhhtuUFZWli6//HIdPnxYb731lvr06aO6ujqtXbtWTz31lCR28P4u0zRlt9tVVlamuro6ORwOvf7667rmmmtUXV2t4OBgb5eINlBaUy97C9Z2tDud8g8OVXBUvJyBIQrwderb9G3a/OkSjZh4pQJDw079XlaLymobTrsGAAAAAEDHw+4C6LBmzpyp+fPnKysrSx9//LEeeughBQYGqra2VoMHD9bGjRsVHByssLAw/fOf/9TChQtVUFAg6WjYaZpm88YvXV3T6zBp0iQVFxdr0qRJWrFihaZNm0Yo2YU0uk2dbl5fWpinjcsXKzqpp3oMGqnCilpFJvXSyElXy/R4lP716tO6n2FIDW5+LgEAAACgK6BjEh1Sbm6uxo0bp4kTJ2rixInNHZB33XWXXC6Xvv76a+3YsUM/+clPFBkZqR49emj69Omqr6/Xa6+9pr59+2r48OHefox2o2nX7bvvvlv+/v664447dMcdd3i5KrQ1m9XQ6Wb1md/skGmaikrqKYePU+Hh4Tpw8KD69eunkMgYFWZnyOPxnPLO7qYp2a10MwMAAABAV0DHJDqk6Oho/f73v5ckud3u5uO/+c1vVFJSojlz5sjf31/9+/fXggULlJqaqp/+9Kf68ssv9eGHH2r27NmaM2fOcddC8vf/z87KHo+HjtIuJsTXoQa357Suqa+pkiSZnqPX9ejeXUVFRSotLW0+djppZ4Pbo2Ana5oCAAAAQFdAxyQ6PKvV2vz/vXr10tNPP63MzEy53W7V1NRo3bp1+t3vfidJqq6u1nnnnad77rlHu3fvltVqVUNDw3Gbu3g8nqO7A3fBNShramqUm5urbt26nXKHGzqP2BBf6TSzaP8QlwqPHFbOwW/Urf9QOXx8lJyUpN07tqsqP0eBIWGyHPMz+qPMf9cBAAAAAOj0SB7QqXj+3aGVmJiobt26yTRNWSwWvfnmmzpw4IBuvfVWNTY26qOPPlJsbKwkNYeSmzdvVlFRkSwWS3MoWVFR4Z0H8ZIjR47ogw8+8HYZ8JKoIKc8Or21V5P7psju8NHeLeuUtnalDu9JU21RjvZv/FSlJSXqOWTUKd/LNE2ZMhUV5GxJ+QAAAACADoaOSXQq3+3yS05O1pIlS1RQUKCHH35YtbW1CggIUG1trQIDA/W73/1Of/zjH1VaWqqFCxcqPT1dd911l6ZPn65Vq1apoKBA119/vZeepu25XC6VlZXJ7XYf14mKzqWmpkavvPKK7Ha7wsPDFRoaKrvdroDAIAU47aqud8vf59R+PfgGBCl16jQd3LFRxXnZyv12ryxWmxK791K9LUChUfGnXFd1vVv+TrvCAxwtfTQAAAAAQAdCMIlOrSlgi4iI0Isvvqh//etfys/P169+9Sv5+PjoL3/5i0JDQ2W1WvWTn/xEb7zxhpYuXapXXnlFc+fO1euvv958r9PZwKOjCgoKkmEYKisrk8vl8nY5OEscDof27dun7Oxs1dXVqbi4WAUFBZo4caKm/PJ/tHx33ikHk5Lk6x+o/ueNO+H4zrSdOnTokM4555xTuk9RVb0m9YuSzdq5f84AAAAAAEfx6Q+dWlPXX9MU72nTpunuu++Wj4+PJGnkyJEKDQ1VSkqK9u7dq08++UQXX3yxysvLVVlZqZdeekmVlZWSTuzG7IwsFotCQkJUUlLi7VJwFpWXlysiIkIHDhxQVVWVHA6HrrjiCs2ZM0fDkkIlmWo8zU1wTqZHzx7Ky81T5SksiXB0PFNDk0LPeFwAAAAAQMdAxyS6hKZQ0TTN4za1KSoq0ty5cxUUFKRJkybp/fff1/jx47V8+XJt375dISEh+uCDD7R69Wr17dtXP//5z5uv7awdlKGhoSouLlaPHj28XQpamdvt1uuvv67MzExFRkYqPj5eQUFBio+P1wMPPCB/f3/5SxocF6K0nDLFh/qd0Xi+vr6Kj4/Tvv37NSQlRfqBDaVyy2uVEh+qyEDWlwQAAACArqLzpSrAD/juTtthYWGaOXOmSktLtXTpUt1xxx1auHChhgwZopCQEH344Yd64YUXdPvtt2vVqlVau3Zt87VNoWRTN2Zn4XK56JjsBEzzxE1sTNPU8OHDdd999+n2229XamqqwsLCdO+99yo4OLj5vKmDY2W3WlRR23DGdSQmJam2tlZ5+fnfe05FbYPsVosuHRRzxuMBAAAAADoOOibRpZmmKT8/P82dO7c5YJwzZ4769OkjSVq0aJEOHDigxYsXa9KkSdq7d6/CwsL03HPP6frrr1dqamqn65oMDQ1VRkaGt8vAGfpuCC8dXdqgf//+zV/fdNNNslgsioiIOO68sAAfXZkSp9c3ZMjPYZPV8v2djj/GarWqR48eOrD/gMLDwmS1Hf9rx+0xVVhZp5+NTFJYgE+LxwEAAAAAdDydK1EBTpNhGM2dZRaLRRaLRX/+858VHx+v7OxsuVwu7d69Wz169ND777+vxMREvfPOOzp06JDmzp2rhx566Lj7naxLraOhY7LjOdl7buvWrZo/f75WrlzZfOy7YWV0dLQiIyNPes8RyS6NSHYps7haHs+ZvacjIyLk6+t7QuDt8ZjKLK5uHgsAAAAA0LUQTKLLMwyjObAxTVOxsbG6+eabFRERobCwME2aNEmxsbFatGiRYmJidODAAb377rt64oknlJCQoPLycn3wwQdKS0s77l4dVWhoqEpKSjp8wNpVHLtu6saNG5WVlaWcnBzNnj1b/v7+WrBggQ4ePHja97VYDP10RIIGxQUro7ha7jMJJw1DPXv1VFZ2tmqqqyUd7ZTMKK7WoLhgXTc8QZYz6MoEAAAAAHRMBJPAMY4NFR0Ohx566CH96U9/0vbt25Wfn6/nn39e/fr1k91uV2Zmpvbu3at//OMfWrt2rX7xi1/oxRdfVGNjY4cO9UJDQ1VfX6+qqipvl4J/Ky4u1sKFC5u/bnp/NTQ0yDAMZWdna+zYsZozZ44qKyu1YMECXXfdderVq5c+//xzLV++XI2Njac9ro/Nqv86L0kjkkOVUVR1RmtOBgQEKCY6Rvv3H1BFTYMyiqo0IjlU/3Veknxs1hbfFwAAAADQcVlnzZo1y9tFAO1RU/gTHx+vESNG6ODBg3r33XdVXl6uzMxM7d69W1OnTlVMTIxM09Tzzz8vu92u2NjY5oBz06ZNWrVqlebNm6fk5GTFxLT/zT2sVqs2b96s7t27H7chCtpeUzek3W7XzTffrF/84heqrq6Wx+ORzWbTNddco/79+2vNmjVKSUnRnDlzFB4err179+rpp5+W0+nUI488osmTJ8tqbVn4Z7NYNDA2WOEBPkrLLlNpdb38HNYWdTj6BwQo7UCW7D4+uun8XprcP0Z2K38fAwAAAICuik+EwPdompZtmqbq6ur06quv6qabbtIvfvELFRYWavr06YqPj1dqaqqOHDmiFStWaPDgwc2b6Bw8eFDPPPOMzjnnHN1666168803lZ2d7eWnOjWsM+kdHo9Hbre7+eumgLu+vl4JCQk6cOCA5syZo5dfflmSNGrUKG3evFkxMTHasmVL83UXXHCBxo4dqxEjRmjBggW65pprVFdX1+K6LBZD53YP0/0X9dbA2GDllNUos7haVXU/3h1smqaq6hqVWVyt/KpGje6XqKTS7RqWGMz0bQAAAADo4gyzI885BdqIaZp67733NHXqVNn+vatwWVmZHn74YVVXV2vChAlat26d5s+fL4/HI4vFouuvv755qu19992n1NRULz/FqXvvvfcUHByssWPHeruUTu/YzZeO5Xa7ZbVaNWPGDO3atUtWq1UpKSm67rrr9Je//EUvv/yydu3apd///vd66623NHbsWL355ptat26dEhISNHDgQD399NPq3r27pkyZorCwsFarOb+iVlsySrTmQJGqahtkyJAMyW61yDAk05Qa3B7JlEyZCnDaNbpnuIYkhijc36H//d//Vb9+/TRmzJhWqwkAAAAA0PEQTAKnqSkwavK3v/1NhmHosssuU1JSkiRp/fr1uu+++7R48WKVlJToX//6l5pWTTh2HcumELO9+eKLL1RYWKirrrrK26V0St/3715aWqqZM2fqyJEjmjp1qiZNmqTf/OY3WrBggTIyMnTzzTfr888/1/jx47Vq1SpJUp8+ffTpp58qKytLf/vb3yRJDz74oAYMGHDWn6PR7VFhZb3yymuVU1qjstoGNbhN2a2Ggp12xYb4KirIqfAAh2zHTNnOzMzU66+/rjvvvFNBQUFnvU4AAAAAQPtk83YBQEfTFEo2hUt33XWXJKmxsVEzZszQrFmztHXrVt19992KiorS6tWrtXv37uMCydLSUoWEhDSHU8furNwehIaGau/evd4uo9M6NpTcs2eP3njjDRUVFenGG2+U2+3Wn/70Jz3wwAMaPny4CgsLVVBQ0Bx679+/X1dffbV+/vOfy+PxaNCgQcrKytLIkSP1+uuvnzDW2Qy/bVaLooOdig52anBCyClfl5iYqD59+mjFihW6+uqrz0ptAAAAAID2j81vgBZqChKbQkWLxaKxY8fKx8dHHo9HH374oUaNGqWHHnpIM2fOVGxsrCSpqqpKL730kp599lk5nU716tXrhHt5W2NjozZu3Kjzzz/f26V0SB6PR8uXL9fWrVvVv39/ud3u5nCwrKxM7733nhYtWqTRo0dr8eLF2rBhg+bNm6dly5YpNTVVI0eOVFFRkex2u/Lz81VRUSF/f3998MEHMk1Td999t/z9/TVhwgTdeeediouLax67aY3KpvdRe3g/nUx8fLyWLl2qxMREhYSceqgJAAAAAOg82t8cUqCDOTZUtNlsMk1TAwcOlNvt1oMPPqif/exnGj58ePP5+/bt06ZNmzR69Gj99a9/1aJFi064l7eFhoaqqqrqjDZM6YrKy8v1P//zPxo/frwef/xxvfHGG5L+02W7a9cu3XjjjVq3bp0qKir06quvKjg4WCNGjJDT6VRUVJR27twpSYqIiND27dv16KOPKisrS/fff7/uvfdeTZ8+XZI0fvx4DRw4UJKO2zDHarW2y+UBviswMFAXXHCBli5d2rxhFAAAAACga2EqN9BKju1Q8/Pz03PPPaf6+no5HA699dZbGjp0qGJiYrR27Vqdd955uv3222W1WhUSEqKVK1dq165dGj16tIYNGybp+zdFaQu+vr5yOp0qKSlRdHR0m4/fUQUFBenyyy/XI488oh07digtLU21tbVyOp2Sjga+MTExuuGGGzRs2DC9+uqrys3NVVRUlNLT03XJJZfonXfe0aRJk1RfX6/f/va3Cg8P1+9///uTvg+aOmyPXfO0Ixk1apS2bt2qzZs3a8SIEd4uBwAAAADQxtp/Ww3QAXk8HpmmKYfDIelod1t0dLT27t2rFStWaOLEiaqoqJBpmtqzZ492796t9evX67HHHlNRUZFycnKap4d7g2EYCg0NVXFxsVfG7whO1uVnmqZGjRolh8OhjRs3KicnR06nU42NjZKkkJAQjR49Wps3b5bdbldNTY369u2rqqoqpaWlKTAwULfddpv++Mc/6pNPPtEll1wi6eialKZpNr+vmrSXDtuWstlsmjx5sj799FNVV1d7uxwAAAAAQBujYxI4C44NFE3TVEREhPLy8rRt2zb5+vrq/vvvV1RUlM477zzZbDbV1dXp+eefV0FBgd555x299NJLSk1N1aOPPqrAwEBJUkNDg+x2e5s9g8vlUklJSZuN1955PB4ZhtEcBlosFtXU1Gjz5s1KTExUYmKiDMNo3mwmJSVFTz75pKSjAZwk+fn5qXv37nrsscdUWVmp999/X++//7569OghPz+/5uvPPffc5jGb3kvHjt2Z9OrVSwkJCfr000916aWXerscAAAAAEAbomMSOMuawqSMjAzt3btXzz77rP7whz/ohhtu0Lhx41RWVqakpCT5+/vrmWeeUUJCgr7++muVlZU1rzdYU1PTHErOmDFDNTU1Z73urt4xeWxnonQ0iDQMo7mzb8WKFRoyZIhWrFhx3BqPTUGiw+HQoEGDmv+tDh48qEOHDikxMVEpKSmqqKjQm2++qYiICPXs2bN5c6RjQ+2OsFZka5g8ebK2b9+uI0eOeLsUAAAAAEAbomMSaCMjR46Ur6+vXC6XXC6XJCk/P1+RkZHq06ePcnJyFBQUpEsuuUTl5eWyWCzq1q2bnn76aa1cuVITJ06U1WpVfn6+fH191djY2NyJdza4XK7mYLQr+O6O6Mf+f01Njd5++22tWLFCJSUlmjNnjvbs2aMnnnhCw4YNU2ho6An327Fjh4KDg+Xr6ytJys3NVWBgoBITE9WzZ085HA4lJye3m53YvcnlcmnUqFFatmyZbrnlli7/egAAAABAV9E12nGAdqJpF+WmbrzIyEjdcMMN6tatm+Li4nTo0CGlpaXppZde0tixY7Vw4UJt3LhR77zzjoqKivTmm29q3rx5knRWQ0mpa3VM5uXl6d133z3u2ObNmzVv3jwVFxerpKREf/7zn3XjjTdq5syZWrhwoeLi4lRQUKDZs2dr7ty5evvttyVJW7ZskXR0PcljO1tTU1M1cOBABQUFqVevXgoODm6eHg5pzJgxKi0tVVpamrdLAQAAAAC0EYJJwAuawqimgNI0TdlsNo0bN04PPvig9u3bp4svvliLFy/W/fffL6fTqerqak2YMEEFBQX66U9/etZrdLlcKisrO26acmdy7OY1UVFRevHFF/XRRx8pOztby5Yt01//+lfZ7XY9//zzyszM1BVXXKH6+nqNHDlSYWFhamho0G233aY5c+bI7XbL399f+fn52rhxoyTpyiuv1AMPPHDcmE3dkRdccIEuu+yyLjNV+1Q4HA5NnDhRK1asUF1dnbfLAQAAAAC0AT4VA17UFFA2/ffWW2/VRx99pDlz5igwMFA9evRQr169lJ+fr/Xr1+uBBx7Qc889pyuuuEKStGjRIn355ZfH3bO1gsTAwEBZLBaVlpa2yv3am6bNa9atW6c1a9aoqqpK9957rzwej7755huNHDlSgwYN0qpVq5Senq7zzz9fS5culSRVVFQoMTFRf//733XVVVepqqpKY8aMUWRkpG6//fbmMb67TiXdkT9swIABcrlcJ7ynAQAAAACdE8Ek0I40dfEFBQXJbrerX79+mj59ui699FJNmTJFBw8eVFZWlqZNmyZJmj9/vkJCQiRJ27ZtkyRZrdYzriM3N1fbtm1TXl6eXnrpJT3++OMdcortsV2R3/XGG29o6NChWrNmjRISEnT33XfrsssuU0JCgkJDQ/XEE09oy5YtevbZZ3XTTTepf//+2r59u6688kqtW7dOAwYM0LRp0/Tpp5/qL3/5i4KCgk4YgyDy9BiGoSlTpmjDhg0qKirydjkAAAAAgLPMML/b0gOgXamsrNRrr72mm2++We+8844cDoemTZumZ599VgUFBfrVr36l//f//p8qKyvV0NCgZ555Rt27d2++3uPxnPaU4SeeeEIbNmxQXl6eQkND5XQ69eijj6pPnz6t/XityuPxKD09XaGhoc27XEvSoUOHVF1drf79+zcfe+GFFxQUFKSLL75YISEhqqur09SpU7V8+XIVFxfrt7/9rW688UatWLFCAQEBuvXWW/XCCy9o8ODBuuqqq44b1+12N+/ajTP30UcfqbS0VD/72c+8XQoAAAAA4Cyyzpo1a5a3iwBwItM0ZZqmfHx8NGLECNntdhUVFenRRx+V3W7X/Pnz9fzzz+vJJ5/UqFGj9Pjjj6tv376yWCyy2WzatWuXYmNjWxSWRUdHa926dXI6nTIMQ3FxcZo2bVq7XxPRMAxt2rRJ69evV+/evVVXV6dLLrlEX375peLi4tS3b195PB5t3LhRvXv31meffaYVK1Zo0aJFGj16tHbv3q3PP/9c8fHxGjFihFatWqXk5GTdeuutCgsL08GDB7Vv3z5deOGFamxsbH49CCVbV3x8vFauXKnIyEiFhYV5uxwAAAAAwFlCxyTQzjVtmNLk22+/1auvvqq6ujo9/PDDuuyyy7Ry5crm7z/xxBMKCgrSwoULNWjQIM2aNeuk04x/zHvvvafnnntONTU1uvPOO9tkw51T9d3XpEleXp5ee+01zZ07V2PGjNGf//xnPf7445o3b558fX1VXFysBx98UB6PR88//7z8/PxkGIZuueUW3XnnnTJNUwsXLtQNN9ygQYMGnXD/goICVVVVKTk5uQ2esmvbtGmT1q1bpzvuuOOs70APAAAAAPAOPu0B7dyxAZzH41FycrJmzpwpt9stwzCUkJCgd955R9dee63S09O1ZMmS5rDyzjvvVFFRUYuCyYsvvlirVq3SmjVrNHTo0NZ8pBZrCiSbXhOPx6O9e/eqT58+WrhwoQoLC9WvXz9dccUV+sMf/qC4uDide+65uuuuuxQaGqp+/frplVdeab722Wef1UsvvaTLL79cffr0kb+/v4YPH37cmMdOhY+IiFBERESbPnNXNXToUG3atElfffWVxowZ4+1yAAAAAABnAVO5gQ7EMAx5PB4ZhtE8fdjlcunVV1+Vv7+/srKy1KtXL1VVVenOO++U2+3W7bff3qINcaxWq5KTk7Vv3z7dcsstbTqNuymAbFq7sYlhGCouLtZbb70lPz8/FRUV6dVXX1VycrICAgJ08OBBpaamyjAMpaWlacSIERo4cKCuvvpqlZeX64svvtBll10mj8cj0zTVt29f3X333brwwgvlcDiaxz62I5Mp2t5hGIYiIyP14YcfavDgwfLx8fF2SQAAAACAVsZUbqCTqK+v15EjR/TII4/o4YcflsvlUnp6us4999wWhYqNbo8KK+uVV16rnNIaldbUq9FtymY1FOLrUGyIr6KCnAoPcMhmbb3Q8uDBg1qzZo1uuumm5mNVVVXy9/fX/Pnz9frrr2vYsGGKjIyUy+VSQkKCJGnChAn6+9//rpSUFLlcLj333HO65ZZb5Ovrq/vuu09+fn763e9+p9TU1BPGZPOa9uvdd9+VaZq6+uqrvV0KAAAAAKCVMZUb6OCapho7HA4lJSVpypQpuuWWW3TTTTfptttuO+375VfUanNGidYeKFJlbYMsMiRDslstMgzJNKUGt0cyJY9MBTjtOr9HmIYlhSoy0HnGz5OUlKTu3bsrKytLH3zwgT788ENFRkbq8ccfV48ePWSz2fTcc8/pm2++0YwZMzRjxgx9/vnnuvLKK5WRkSGHw6H//u//lmEYevnll/WHP/xBS5YsUUBAwPeO2ZKOUrSNCRMm6JlnnlFGRoaSkpK8XQ4AAAAAoBXRMQl0UoWFhQoPD5dpmqqoqPjRdSaLKuu0ZHuOtmeXSjIU5u+Qn8P6g12Epmmqut6toqp6SaYGx4Vo6uBYhQW0fNqt2+3WihUr9Nprr2nDhg367LPPtG/fPr3//vv661//qrFjx2rVqlWyWq36+c9/rnvvvVdPPvmk8vPzFRgYqGuuuUZXXHHFcWtRSkcDXEntfmdxnGjNmjXauXOnfvnLX/LvBwAAAACdCB2TQCfjdrtltVqPCyX/+Mc/6pFHHpG/v/8J53s8pjZ+W6zF27LV4PYoNtj3lKdmG4Yhfx+b/H1sanR7lJZTpm/yKnRlSpxGJLtksZz+1Gir1Sqn06mUlBRt27ZNYWFhioyM1KJFi1RRUaHg4GDNmTNHNTU1iouLU//+/XX33XertrZWI0eO/M6zeZoDSgKtjmvUqFHaunWrNm/erBEjRni7HAAAAABAK2HzG6CTOTaAc7vdmjdvng4cOCC73a7+/fsfd25do1v/+mPWuk0AAB2vSURBVDpTH+/KVYifXRGBzhaFiUfHNRTka5dpmlp/sEhFVfXqExMoWwsCQYvFotzcXOXk5CgsLEz9+vXT1q1bVVRUpMmTJ+tvf/ubxo0bpyuuuEIul0tRUVGKi4uT9J8wUtIJXZPomCwWi1wul5YuXaqhQ4fKbrd7uyQAAAAAQCughQjopEzT1KFDh7RkyRLV19fr/fffV1ZWVvP36xrdeu2rDG3OKFFSmL8Cna0T9gQ67UoK89emjBK99lWG6hrdp32PyMhIBQcHy8/PTwsXLpQkXXLJJUpMTFRKSoqCgoL0y1/+Uj179my+pmlVCjojO6devXopISFBn332mbdLAQAAAAC0Ej7BA51Ubm6uZs+erd69e6u8vFxpaWl66qmn5Ha75fGYWrDxsNKyy5To8pO1hV2S38dqMZTk8tOO7DK9vemwPJ7TW8rW6XQqNDRUV199tW688UZJ0pAhQ5SamqrAwED16tVLe/bsOe4aOiM7v8mTJ2vbtm3Kzc31dikAAAAAgFbA5jdAJ7Vv3z6lp6crPz9f+fn52rNnjw4dOqQXX3xR5T6Ren1DhpLC/Fs9lDyW22Mqo6hKN56bpHO7h33veeXl5crNzVW3bt1+cJpu0/qZ6LpWrlypzMxM3XLLLYTRAAAAANDBEUwCXYhpmiqqqte85Xvk57C22vTtH1JR26Dqerfuv6j3cbt1NzY2as+ePdq2bZsOHjyovn376uqrrz4hbPJ4PEzPRrP6+nrNnz9fF110kQYOHOjtcgAAAAAAZ4BduYEuxDAMfbg9Rw1ujwKdzjYZM9BpV1lNgz7ccUQ3pyYrNzdXW7du1Y4dO+Tr66uUlBRdeumlCg4OPun1hJI4lsPh0EUXXaTly5erd+/ecjgc3i4JAAAAANBCdEwCXUh+Ra3mLEtXbLCvbNYfD/z2b9+g/du//t7v+wUG64Irb/rR+9TU1Sk9I0+DzYOqKy1Q//79NWTIECUmJjIdF6fNNE298sorSkhI0IQJE7xdDgAAAACgheiYBLqQzRklkoxTCiUlKSqxh/wCT+xkLMnL0eF9uxQRn/y915qmqZLiYuXm5qmwsFCNvsHyGzBAv/n5MPn4+HzvdcCPMQxDU6ZM0T//+U8NGTJEYWHfv34pAAAAAKD9IpgEuohGt0drDxQpzP/Up74GhoYrMDT8hON5GQckSfE9+53wvZrqauXm5io3L08ypajoKI0YMVweq0M5jR5ZbWd/XUt0ftHR0UpJSdEnn3yiG264wdvlAAAAAABagMXbgC6isLJelbUN8nOc2a7WdTXVys86pODwqObQ0u12K/fIEW3dulUbN25SdU2Nep9zjkadN0rdu3eXr5+f/BxWVdU2qLCyvjUeB9DYsWN1+PBh7d2719ulAAAAAABagI5JoIvIK6+VRcYZr+mYc/Abmaap+B59VFZaqtzcPOUX5MvX11cx0dEaMGCA7PYTuyINw5AhQ3nltYoObpuNd9C5+fn5ady4cfr444/VvXt32Wz8SgMAAACAjoSOSaCLyCmtkVphn5mMPWmqrKjU4YIy7dy5SxarRUNSUjR8+HDFxcefNJRsZvy7DqCVDBs2TA6HQ+vXr/d2KQAAAACA00R7CdBFlNbUy36Km96cTFlZmXbv2KI9O9MU2723ep3TW2Hh4bJYTv2edqtFZbUNLa4B+C6LxaIpU6bojTfe0KBBgxQUFOTtkgAAAAAAp4iOSaCLaHSbOpNZ3NXV1aotyVd8fJxSx09WRGTkaYWSkmQYUoPbbHkRwEkkJSWpd+/eWrFihbdLAQAAAACcBoJJoIuwWQ2ZZ5AJRkaEy1NdpsAQl1zR8S26h2lKdmsrzCcHvmPixInas2ePMjIyvF0KAAAAAOAUEUwCXUSIr0MNbk+Lr8/LPKCG+jrF9ezX4g10GtweBTt/YA1KoIWCgoJ0wQUXaNmyZfJ4Wv4+BwAAAAC0HYJJoIuIDfGVzqBjMvvANzIMQ3E9+rb8Jua/6wDOglGjRqm+vl5btmzxdikAAAAAgFPA5jdAFxEV5JRHpkzTPO2Ox5rKchXnZiksJkFO/4AWjW+apkyZigpytuh64MfYbDZNnTpVfn5+LXqfAwAAAADaFsEk0EWEBzgU4LSrut4tf5/T+9HPPpAu0zQV37Nfi8evrnfL32lXeICjxfcAfky3bt3k8XgIJQEAAACgAzBM80y2wwDQkSzbeUTLd+cp0eXX5mNnFldrUr8oTR4Q0+ZjAwAAAACA9oc1JoEuZFhSqCRTjWewCU5LHB3P1NCk0DYdFwAAAAAAtF9M5Qa6kMhApwbHhSgtp0zxoW3XNZlbXquU+FBFBrK+JNpGdXW1Fi1apPr6esXHx2vSpEnyeDyyWPh7HAAAAAC0F3xCA7qYqYNjZbdaVFHb0CbjVdQ2yG616NJBTOFG28jNzdWMGTNks9lkt9v18MMPq7i4WBaLRR5P23YLAwAAAAC+H8Ek0MWEBfjoypQ4FVTUye05u0vMuj2mCivrdGVKnMICfM7qWECTdevW6fzzz9e0adN00003ady4cVqyZIkk0TEJAAAAAO0In9CALmhEsksjkl3KLK6W5yyFkx6Pqczi6uaxgLaydu1abdmyRcXFxXrqqaf03nvv6bnnntP+/fvFfm8AAAAA0H6wKzfQRdU1uvXaVxnakV2mRJefrBaj1e7t/ncoOSguWP91XpJ8bNZWuzfwfUzTlGEYqqur0z333KNvv/1Whw4d0iWXXKIHH3xQERER3i4RAAAAAHAMgkmgC6trdOvtTYf19aFiRQT6KNBpP+N7VtQ2qKCiTiO7uXTd8ARCSbSppg1uKioqtGrVKmVlZenOO++UJK1Zs0Yvv/yy/vnPf0qSSktLFRIS4s1yAQAAAKBLY1duoAvzsVn1s5FJOicyUIu3ZauspkHRQU7ZrKe/ykOj26Pc8lrZrRbdeG6SRiS7ZGnFLkzgVDStIRkYGKj8/Hxt3rxZkjRv3jx9+eWX+vWvf62tW7fq3Xff1cSJEzVmzBgZBu9TAAAAAPAGgkmgi7NYDJ3bPUw9IwO0ZHuOtmeXSjIU5u+Qn8P6g6GNaZqqrnerqKpekqmU+FBdOiiGjW7QLvzyl7/UkSNH1L9/f5133nl6++23tXHjRl166aW66KKLdMEFF3i7RAAAAADo0pjKDeA4+RW12pJRojUHilRV2yBDhmRIdqtFhiGZptTg9kimZMpUgNOu0T3DNSQxRJGBTm+XD0j6z5Tu/Px8rV69Wtdee63mzp2rgoICDR06VGvXrtVjjz2moKCg5rUpAQAAAABti2ASwEk1uj0qrKxXXnmtckprVFbboAa3KbvVULDTrtgQX0UFORUe4GjR1G/gbGsKJyXpzTff1JEjR/TrX/9afn5+WrVqlY4cOaIbb7xRkggnAQAAAMALmMoN4KRsVouig52KDnZqcAIbhKDjaQolJWn//v0KCAiQn5+fPvvsM33++ec6cOCAGhsbNX36dO8VCQAAAABdGB2TAIBO69iuyYULF6q8vFx79+7VyJEjlZSUpAcffFBPPfWUBgwY4OVKAQAAAKDroWMSANBpNYWSO3bs0L59+3TkyBH17dtXV111lQoLC+Xn5ye73d58/rFBJgAAAADg7KJjEgDQqXk8Hj355JNKTk5WTk6OduzYocmTJystLU1jxoxRdHS0Vq5cqXvuucfbpQIAAABAl0IwCQDo9MrKymS1WhUQEKBHH31Upmnq2muv1aJFi/Tll1/qtttu03XXXaemX4lshAMAAAAAZx/BJACgS2lsbJTNZtMTTzyh/fv3a/bs2YqMjFRdXZ3KysoUGRnp7RIBAAAAoEtgIS0AQJdis9lUUVGhbdu2af78+YqMjNSaNWs0duxYrVq1SpK0evVqlZSUeLlSAAAAAOjc2PwGANDlBAYGKiYmRi+88IIKCgq0bds2vfDCC7Jarbrooos0bdo0DR061NtlAgAAAECnRsckAKBL8Xg8kqR58+Zp7dq1ysjI0MKFC1VaWqrHHntMO3bsUM+ePRUYGOjlSgEAAACgc2ONSQBAl+PxeGSxWFRaWqqQkBC9+OKL2rlzp2bMmKGGhgatXr1a06dPl2EYzecCAAAAAFoXn7QAAF1OU9AYEhKi7Oxsffnll5o7d64SEhLk8Xi0Y8cOvfbaa8edCwAAAABoXXzaAgB0aXFxcYqPj9eOHTskSbt27dL48eO1YMECvfjii16uDgAAAAA6L4JJAECX1bTe5MyZM7VixQrt3btXkpSRkaFXXnlFTqdTbrfbmyUCAAAAQKdFMAkA6LKapmn7+vpq2LBhuueeexQWFqbS0lJFRETo+uuvl9VqbQ4wAQAAAACth2ASAABJkydP1sCBA7V8+XIlJSVJknx8fCT9J8DMz89XY2Oj12oEAAAAgM6EXbkBAF3esTtvV1dXq7KyUpGRkc3fX79+vWbNmqUBAwaoqKhIL7/8srdKBQAAAIBOg45JAECXd+zO2263W5988knz1y+88IL+8Y9/6P7779e8efOUmZmprVu3eqNMAAAAAOhUbN4uAACA9iQwMFAXXXSRTNPU9OnTFRISoscff1zh4eHKyMjQ5MmTNWTIEG+XCQAAAAAdHh2TAAB8R1RUlDIyMhQVFaWnn35a4eHhWrdunWbMmKHo6GhJEiuhAAAAAMCZoWMSAICTCA0N1dKlSxUTEyOPx6O1a9fqjjvu0IQJEyRJhmF4uUIAAAAA6NjY/AYAgO9o2gxn48aNqqqqUklJicaPH6+goCBvlwYAAAAAnQbBJAAAp8g0TTolAQAAAKCVsMYkAACngFASAAAAAFoXwSQAAD/C7XZrzZo1bHgDAAAAAK2IYBIAgB/hdrv19ddfa9euXd4uBQAAAAA6DYJJAAB+hMPh0MSJE7V8+XLV19d7uxwAAAAA6BQIJgEAOAUDBw5UcHCwvvzyS2+XAgAAAACdAsEkAACnwDAMXXzxxVq/fr2Ki4u9XQ4AAAAAdHgEkwAAnKKYmBgNGjRIn3zyibdLAQAAAIAOj2ASAIDTMH78eGVmZmrfvn3eLgUAAAAAOjSCSQAAToOfn5/Gjh2rjz/+WG6329vlAAAAAECHRTAJAMBpGj58uGw2m9avX+/tUgAAAACgwyKYBADgNFksFk2ZMkVffPGFKioqvF0OAAAAAHRIBJMAALRAcnKyevXqpZUrV3q7FAAAAADokAgmAQBooYkTJyo9PV2HDx/2dikAAAAA0OEQTAIA0ELBwcEaPXq0li5dKo/H4+1yAAAAAKBDIZgEAOAMpKamqra2Vlu3bvV2KQAAAADQoRBMAgBwBmw2myZPnqxVq1appqbG2+UAAAAAQIdBMAkAwBk655xzFBsbq88++8zbpQAAAABAh0EwCQDAGTIMQ5MnT9aWLVuUl5fn7XIAAAAAoEMgmAQAoBWEh4fr3HPP1bJly2SaprfLAQAAAIB2j2ASAIBWcsEFF6iwsFC7du3ydikAAAAA0O4RTAIA0Ep8fHw0ceJELV++XPX19d4uBwAAAADaNYJJAABa0aBBgxQcHKw1a9Z4uxQAAAAAaNcIJgEAaEWGYejiiy/WV199peLiYm+XAwAAAADtFsEkAACtLCYmRoMGDdInn3zi7VIAAAAAoN0imAQA4CwYN26cMjIytH//fm+XAgAAAADtEsEkAABngb+/v8aNG6dly5bJ7XZ7uxwAAAAAaHcIJgEAOEuGDx8um82mDRs2eLsUAAAAAGh3CCYBADhLLBaLpkyZotWrV6uiosLb5QAAAABAu0IwCQDAWZScnKyePXtq5cqV3i4FAAAAANoVgkkAAM6yiy66SOnp6Tp8+LC3SwEAAACAdoNgEgCAsyw4OFijR4/WsmXL5PF4vF0OAAAAALQLBJMAALSB1NRU1dTUaOvWrd4uBQAAAADaBYJJAADagM1m06RJk7Rq1SrV1NR4uxwAAAAA8DqCSQAA2kjv3r0VGxurzz//3NulAAAAAIDXEUwCANBGDMPQ5MmTtXnzZuXl5Xm7HAAAAADwKoJJAADaUHh4uEaOHKlly5bJNE1vlwMAAAAAXkMwCQBAG/vJT36iwsJC7d6929ulAAAAAIDXEEwCANDGfHx8NHHiRC1fvlwFBQV66623tGPHDm+XBQAAAABtimASAAAv6Nevn7Kzs3Xbbbfptdde08GDB71dEgAAAAC0KYJJAADaWGFhoWbPnq39+/ersLBQISEhKi8v93ZZAAAAANCmCCYBAGhjNptNDodDvr6+ioyMVG5urioqKrxdFgAAAAC0KZu3CwAAoKsJCQnRAw88oNWrV+v//u//tG/fPh06dOi4cxrdHhVW1iuvvFY5pTUqralXo9uUzWooxNeh2BBfRQU5FR7gkM3K3xkBAAAAdDyGaZqmt4sAAKCrysvL05/+9Cfl5OTonXfeUX5FrTZnlGjtgSJV1jbIIkMyJLvVIsOQTFNqcHskU/LIVIDTrvN7hGlYUqgiA53efhwAAAAAOGUEkwAAeJnb7da+zCNan9Og7dmlkgyF+Tvk57DKMIzvvc40TVXXu1VUVS/J1OC4EE0dHKuwAJ82qx0AAAAAWopgEgAAL/J4TG38tliLt2Wrwe1RdJCzRVOzG90e5ZbXym616MqUOI1Idsli+f5QEwAAAAC8jWASAAAvqWt0a8HGw9r4bbEiAn0U6LSf8T0rahtUUFGnEcku/XREgnxs1laoFAAAAABaH8EkAABeUNfo1mtfZSgtu0wJLj9ZW7G70e0xlVlcrUFxwfqv85IIJwEAAAC0S2zjCQBAG/N4TC3YeFhp2WVKbOVQUpKsFkNJLj/tyC7T25sOy+Phb5AAAAAA2h+CSQAA2tjGb4u18dtiJbj8zto6kBaLoUSXn74+dHQsAAAAAGhvCCYBAGhDhZV1WrwtWxGBPq3eKfldVouhiEAfLd6WraLKurM6FgAAAACcLoJJAADa0Ifbc9Tg9rTKRjenItBpV4Pbow93HGmT8QAAAADgVBFMAgDQRvIrarU9u1TRQc4fPK+mslz7t29QeXFBq4wbHeTUtqwS5VfU/uB569ev17Zt21plTAAAAAD4MQSTAAC0kc0ZJZIM2aw//Ov3aDD5tSqKC1tl3KPjGdqSUfKD5xFMAgAAAGhLBJMAALSBRrdHaw8UKczf4ZXxw/wdWnOgSI1uj1fGBwAAAIDvMkzTNL1dBAAAHVVVVZU+++wz7d27V1VVVQoICFC/fv104YUXysfHp/m8l994Wy8uXqXrbr9PhvGfTW9qKsu1+t1X1XPwSPUcfK6y96crbd3KE8aJ69FXA8+f0Pz94RMuV0letrIPfKOGuhoFhISp15DzFB6b2HxNcW6Wvl6+WANTJyi2Rx8dLq7WA5P7KjrYqVdeeUWlpaW65557JEmzZs066fN933EAAAAAOFM2bxcAAEBHVVtbq5deeknFxcUaOnSooqOjlZ2dra+++krffvutbrvtNtlsR3/VltY0yJBxXCh5MqFRseo+YLgO7tykhF79FRoVK0nyDQg+7ry9m9fKND1K6jtYHrdbWft2afOqDzR8wuUKi0k44b6GYciQobzyWkUHn7jG5VVXXaWPP/5Yfn5+uuCCC1r6kgAAAADAKSOYBACghdauXauioiJNnTpVw4YNkySNGDFC4eHhWrlypb7++mulpqZKkoqr6qQfziQlSX6BwQqPTdDBnZsUEhGj2O59TnpeQ0Odzr/0BtkcR6eGx/XsqzXvv6H0r7/Q6Mt/dvKbG1JOaY0GJ4Sc8K1Bgwbp008/VUBAgAYNGnQKTw8AAAAAZ4Y1JgEAaKFvvvlGAQEBGjJkyHHHR40aJYfDoW+++ab5WFV9oyw/0i15OhLOGdAcSkqS0y9AMd3OUWVZsarKT77Jjd1qUVltQ6vVAAAAAABngmASAIAWKikpUXh4uCyW43+d2mw2uVwulZT8JyD0uKVWzCXlHxT6vceqK8pPeo1hSA1ulpYGAAAA0D4QTAIA0AYsVulk2815PG23S7ZpSnar0ebjAgAAAMDJEEwCANBCoaGhKioqkvmdxLGxsVElJSUKDf1PV6MrMFAe01R9Xe1x59ZUnry78cecbLp20zG/wCBJkt3n6CY3DfVHx2xwexTstEuSSktLWzQuAAAAALQWgkkAAFqoT58+qqio0LZt2447vmHDBtXV1alPn/9sXNM9PloypeLcrOZjpmkq45vtJ9zXajsaHjYFiidzeO9ONdbXN39dW12pI4f2KiDY1Tyl29c/SIZhqKhpTFOKDfHVrl27VF5+YiDqcDhUU1NzCk8OAAAAAGeOXbkBAGih888/X7t379aSJUuUnZ2tqKgoZWdna/v27YqJidHIkSObz00dMUTPvfmedq5bpaqyEtnsDuVlHpDb3XjCfQNCXLLZ7Mrckyar1Sabw0e+AUEKiYhuPsdu99GGj99RbI++Mj0eHd67U+7GBvUZMab5HJvDobgefZS1P13bv/xE9T4u7bR8qyMZ++VyuU6Yzh0fH6+tW7fq888/V1hYmAzD0IABA87CKwcAAAAAknXWrFmzvF0EAAAdkc1m04ABA1RXV6c9e/YoPT1d1dXVSklJ0ZVXXinHMbtmB/g6tL3MrsrifBUc3q/y4nyFxySq15BRytyTJld0nFzR8ZIki8WqgBCXSvJzlHNwj3Iz9ss0TUUldldFcaHyDx/UwNETZbHYlLVvlwpzMuT0D9SA1PGKiEs6rkZXVLzqaqqV/e0BleYd1sCEUF17zTXKzMxUbW2tRo0a1XxuXFyciouLtXPnTqWlpWn37t268MIL2+S1BAAAAND1GOZ3F8YCAABnxbKdR7R8d54SXX4tvkf2/nSlrVupkRdd2RxknorM4mpN6helyQNiWjw2AAAAALQm1pgEAKCNDEsKlWSq0d22O2IfHc/U0KTQHz0XAAAAANoKwSQAAG0kMtCpwXEhyi3//k1tzobc8lqlxIcqMtDZpuMCAAAAwA8hmAQAoA1NHRwru9WiitqGNhmvorZBdqtFlw5iCjcAAACA9oU1JgEAaGMbDhbp9Q0ZSgrzl9VinLVx3B5TmcVV+tnIJJ3bPeysjQMAAAAALUHHJAAAbWxEsksjkl3KLK6Wx3N2/j7o8ZjKLK5uHgsAAAAA2huCSQAA2pjFYuinIxI0KC5YGcXVcrdyOOn2mMoortaguGBdNzxBlrPYlQkAAAAALcVUbgAAvKSu0a23Nx3W14eKFRHoo0Cn/YzvWVHboIKKOo3s5tJ1wxPkY7O2QqUAAAAA0PoIJgEA8CKPx9TGb4u1eFv2/2/f7nGiDKAoDJ+PcXBUVBTif4yFtcHCziXYuFa3YG3CAiwsJGokIoKODMPnCkyU4jDF86zg1m/OzWJ5lns3Zrk0+f+HhtPlWT4dzjOdrOX1zsO8eHLbUhIAAFhpwiQArID9o995s7uX3Y8HSYZsXVvP1fVJhuHvcXEcx/w8WWb/+CTJmJ1Ht/Lq2f1sbVyu3Q0AAHBewiQArJAvP+Z59+Fb3r7fz/F8kSFDMiTTyVqGIRnHZLE8S8ZkzJiN2TQvn27n+ePN3Lk+u+jzAQAA/pkwCQAr6HR5lq9HJ/l8OM/ewa98ny+yWI6ZTobcnE3zYPNK7t6YZXtj/Vyv3wAAABdNmAQAAAAA6kwsAAAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqBMmAQAAAIA6YRIAAAAAqPsDbPCyBdszhPIAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "for result in search.history:\n", + " pipeline_utils.plot_pipeline(result.pipeline)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/axolotl/examples/load_csv.ipynb b/axolotl/examples/load_csv.ipynb new file mode 100644 index 0000000..0b26179 --- /dev/null +++ b/axolotl/examples/load_csv.ipynb @@ -0,0 +1,424 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Axolotl CSV manipulation [Binary Classification]." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example, we are showcasing different components of the system.\n", + "- Loading syntethic data for a univariate regression task.\n", + "- Easy use of the backend.\n", + "- Use of simple interface for search predefined method.\n", + "- Exploring searched pipelines." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import multiple utils we will be using" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2020-07-12 15:23:25,435\tINFO resource_spec.py:212 -- Starting Ray with 4.39 GiB memory available for workers and up to 2.2 GiB for objects. You can adjust these settings with ray.init(memory=, object_store_memory=).\n", + "2020-07-12 15:23:25,965\tINFO services.py:1170 -- View the Ray dashboard at localhost:8265\n" + ] + } + ], + "source": [ + "import os\n", + "from pprint import pprint\n", + "import pandas as pd\n", + "from sklearn.datasets import make_regression\n", + "\n", + "from d3m import container\n", + "from d3m.metadata.pipeline import Pipeline\n", + "\n", + "from axolotl.utils import data_problem, pipeline as pipeline_utils\n", + "from axolotl.backend.ray import RayRunner\n", + "from axolotl.algorithms.random_search import RandomSearch\n", + "\n", + "# init runner\n", + "backend = RayRunner(random_seed=42, volumes_dir=None, n_workers=3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load csv file and transform it as dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "table_path = os.path.join('..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'tables', 'learningData.csv')\n", + "df = pd.read_csv(table_path)\n", + "dataset, problem_description = data_problem.generate_dataset_problem(df, task='binary_classification', target_index=5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create an instance of the search and fit with the input_data." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# The method fit search for the best pipeline based on the time butget and fit the best pipeline based on the rank with the input_data.\n", + "search = RandomSearch(problem_description=problem_description, backend=backend)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Current trial is failed. Error: [StepFailedError('Step 7 for pipeline 47ec5c86-46b8-4dee-9562-1e5ebc3d0824 failed.',)]\n", + "Current trial is failed. Error: [StepFailedError('Step 7 for pipeline 64da5190-c2ee-4b8e-abef-697b54cfa32b failed.',)]\n", + "Current trial is failed. Error: [StepFailedError('Step 7 for pipeline 9e03188f-2120-49ac-a087-1e4fb1b29754 failed.',)]\n", + "Current trial is failed. Error: [StepFailedError('Step 7 for pipeline af32bc20-64fa-44a5-ab34-bbe810b671b1 failed.',)]\n", + "Current trial is failed. Error: [StepFailedError('Step 7 for pipeline 5dbc9e87-19be-4cda-ac51-c1d7ea9328c1 failed.',)]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(pid=85426) class_weight presets \"balanced\" or \"balanced_subsample\" are not recommended for warm_start if the fitted data differs from the full dataset. In order to use \"balanced\" weights, use compute_class_weight (\"balanced\", classes, y). In place of y you can use a large enough sample of the full training set target to properly estimate the class frequency distributions. Pass the resulting weights as the class_weight parameter.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Current trial is failed. Error: [StepFailedError('Step 7 for pipeline 918c088e-58dd-4991-8336-deb0b41cb5eb failed.',)]\n", + "Current trial is failed. Error: [StepFailedError('Step 7 for pipeline 41dfec8f-0b07-4f8e-8ff3-cdbb1dab11c7 failed.',)]\n", + "Current trial is failed. Error: [StepFailedError('Step 7 for pipeline d465a878-1ea5-4b72-b8a7-3a4122d1a482 failed.',)]\n", + "Current trial is failed. Error: [StepFailedError('Step 7 for pipeline 8c39e981-f446-4fde-8744-5606c35a7fdf failed.',)]\n", + "Current trial is failed. Error: [StepFailedError('Step 7 for pipeline df127bce-11af-4fae-b8bb-722cb0666484 failed.',)]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(pid=85426) class_weight presets \"balanced\" or \"balanced_subsample\" are not recommended for warm_start if the fitted data differs from the full dataset. In order to use \"balanced\" weights, use compute_class_weight (\"balanced\", classes, y). In place of y you can use a large enough sample of the full training set target to properly estimate the class frequency distributions. Pass the resulting weights as the class_weight parameter.\n", + "(pid=85426) The parameter 'presort' is deprecated and has no effect. It will be removed in v0.24. You can suppress this warning by not passing any value to the 'presort' parameter. We also recommend using HistGradientBoosting models instead.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Current trial is failed. Error: [StepFailedError('Step 7 for pipeline 0985e11e-8db0-4c1c-9f34-3ce8fbc626c1 failed.',)]\n", + "Current trial is failed. Error: [StepFailedError('Step 7 for pipeline 8977a9c0-dd79-4771-9dc1-455586b80947 failed.',)]\n", + "Current trial is failed. Error: [StepFailedError('Step 7 for pipeline c0238551-5fbb-41cd-8187-d3d23bc5571d failed.',)]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(pid=85426) class_weight presets \"balanced\" or \"balanced_subsample\" are not recommended for warm_start if the fitted data differs from the full dataset. In order to use \"balanced\" weights, use compute_class_weight (\"balanced\", classes, y). In place of y you can use a large enough sample of the full training set target to properly estimate the class frequency distributions. Pass the resulting weights as the class_weight parameter.\n" + ] + } + ], + "source": [ + "fitted_pipeline, fitted_pipelineine_result = search.search_fit(input_data=[dataset], time_limit=30)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "produce_results = search.produce(fitted_pipeline, [dataset])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
d3mIndexspecies
00Iris-setosa
11Iris-setosa
22Iris-setosa
33Iris-setosa
44Iris-setosa
.........
145145Iris-virginica
146146Iris-virginica
147147Iris-virginica
148148Iris-virginica
149149Iris-virginica
\n", + "

150 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " d3mIndex species\n", + "0 0 Iris-setosa\n", + "1 1 Iris-setosa\n", + "2 2 Iris-setosa\n", + "3 3 Iris-setosa\n", + "4 4 Iris-setosa\n", + ".. ... ...\n", + "145 145 Iris-virginica\n", + "146 146 Iris-virginica\n", + "147 147 Iris-virginica\n", + "148 148 Iris-virginica\n", + "149 149 Iris-virginica\n", + "\n", + "[150 rows x 2 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "produce_results.output" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Print information about scores of the succeded pipelines." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----------------------------------------------------\n", + "Pipeline id: 676360d8-71ac-401c-b44a-31a810c4e8d3\n", + "Rank: 0.22667216466666668\n", + " metric value normalized randomSeed fold\n", + "0 ACCURACY 0.773333 0.773333 42 0\n", + "----------------------------------------------------\n", + "Pipeline id: 85d44359-0dac-4260-aea8-c78950025c3f\n", + "Rank: 0.33333446433333336\n", + " metric value normalized randomSeed fold\n", + "0 ACCURACY 0.666667 0.666667 42 0\n", + "----------------------------------------------------\n", + "Pipeline id: 3efb07be-28ff-45d8-b1fb-1c49f96b3381\n", + "Rank: 0.6666653826666668\n", + " metric value normalized randomSeed fold\n", + "0 ACCURACY 0.333333 0.333333 42 0\n", + "----------------------------------------------------\n", + "Pipeline id: abd9eb99-a4ba-4210-bb34-c2dec7c3ccfa\n", + "Rank: 0.6666606186666667\n", + " metric value normalized randomSeed fold\n", + "0 ACCURACY 0.333333 0.333333 42 0\n", + "----------------------------------------------------\n", + "Pipeline id: 8948a194-0dfe-4d07-a7c8-d1f5136f68c6\n", + "Rank: 0.21333939733333337\n", + " metric value normalized randomSeed fold\n", + "0 ACCURACY 0.786667 0.786667 42 0\n", + "----------------------------------------------------\n", + "Pipeline id: 22866f54-ba68-49e5-8f84-a2a6aba98253\n", + "Rank: 0.16000235200000004\n", + " metric value normalized randomSeed fold\n", + "0 ACCURACY 0.84 0.84 42 0\n", + "----------------------------------------------------\n", + "Pipeline id: 37a1c72a-9efd-4b0a-9d3d-811d47571b45\n", + "Rank: 0.6666753326666668\n", + " metric value normalized randomSeed fold\n", + "0 ACCURACY 0.333333 0.333333 42 0\n", + "----------------------------------------------------\n", + "Pipeline id: 2d3cae0f-66f6-46e0-9fa5-128bf02b4d7e\n", + "Rank: 0.6666655736666668\n", + " metric value normalized randomSeed fold\n", + "0 ACCURACY 0.333333 0.333333 42 0\n", + "----------------------------------------------------\n", + "Pipeline id: d1e5a59d-be50-42f3-a71b-cf8ba59b3c47\n", + "Rank: 0.08666869166666667\n", + " metric value normalized randomSeed fold\n", + "0 ACCURACY 0.913333 0.913333 42 0\n", + "----------------------------------------------------\n", + "Pipeline id: 35d47611-bded-4669-9803-9d259f686ec1\n", + "Rank: 0.35999672099999996\n", + " metric value normalized randomSeed fold\n", + "0 ACCURACY 0.64 0.64 42 0\n", + "----------------------------------------------------\n", + "Pipeline id: 7398d17f-e91f-4c75-9a95-c9f85763c858\n", + "Rank: 0.6666598006666667\n", + " metric value normalized randomSeed fold\n", + "0 ACCURACY 0.333333 0.333333 42 0\n", + "----------------------------------------------------\n", + "Pipeline id: 5293503b-4cb6-4b8b-bf8e-8b9d981c3b03\n", + "Rank: 0.04666429966666663\n", + " metric value normalized randomSeed fold\n", + "0 ACCURACY 0.953333 0.953333 42 0\n", + "----------------------------------------------------\n", + "Pipeline id: 756e2a15-3315-4aa1-8620-f73ffc69f8a4\n", + "Rank: 0.6666748276666667\n", + " metric value normalized randomSeed fold\n", + "0 ACCURACY 0.333333 0.333333 42 0\n", + "----------------------------------------------------\n", + "Pipeline id: 46633510-6f46-479e-982e-263aaa2e187a\n", + "Rank: 0.17999182400000005\n", + " metric value normalized randomSeed fold\n", + "0 ACCURACY 0.82 0.82 42 0\n", + "----------------------------------------------------\n", + "Pipeline id: 49a750b0-5c86-4ff3-9b2d-c58c6390dd0d\n", + "Rank: 0.6666588986666667\n", + " metric value normalized randomSeed fold\n", + "0 ACCURACY 0.333333 0.333333 42 0\n", + "----------------------------------------------------\n", + "Pipeline id: 84c24452-b2cf-41a2-813c-a135eaeef480\n", + "Rank: 0.36000324699999997\n", + " metric value normalized randomSeed fold\n", + "0 ACCURACY 0.64 0.64 42 0\n", + "----------------------------------------------------\n", + "Pipeline id: 82117b6b-6960-48bb-b1f4-91355acf51d6\n", + "Rank: 0.026667331666666617\n", + " metric value normalized randomSeed fold\n", + "0 ACCURACY 0.973333 0.973333 42 0\n" + ] + } + ], + "source": [ + "for pipeline_result in search.history:\n", + " print('-' * 52)\n", + " print('Pipeline id:', pipeline_result.pipeline.id)\n", + " print('Rank:', pipeline_result.rank)\n", + " print(pipeline_result.scores)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/axolotl/examples/random_search/oracle.json b/axolotl/examples/random_search/oracle.json new file mode 100644 index 0000000..1e4ae71 --- /dev/null +++ b/axolotl/examples/random_search/oracle.json @@ -0,0 +1 @@ +{"ongoing_trials": {}, "hyperparameters": "{'space': [{'class_name': 'Enumeration', 'config': {'type': d3m.metadata.hyperparams.Enumeration, 'default': '6a520746-108c-45bf-a6d8-c875b5a9d326', 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'values': ['6a520746-108c-45bf-a6d8-c875b5a9d326', 'a6b468a5-4d03-405e-a707-8e377f9ad1c3', 'ef1c483a-34fc-4398-a6b3-063b33786972'], 'p': Enumeration(values=['6a520746-108c-45bf-a6d8-c875b5a9d326', 'a6b468a5-4d03-405e-a707-8e377f9ad1c3', 'ef1c483a-34fc-4398-a6b3-063b33786972'], default=6a520746-108c-45bf-a6d8-c875b5a9d326)}}, {'class_name': 'Enumeration', 'config': {'type': d3m.metadata.hyperparams.Enumeration, 'default': 'mean', 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The imputation strategy. - If \"mean\", then replace missing values using the mean along each column. Can only be used with numeric data. - If \"median\", then replace missing values using the median along each column. Can only be used with numeric data. - If \"most_frequent\", then replace missing using the most frequent value along each column. Can be used with strings or numeric data. - If \"constant\", then replace missing values with fill_value. Can be used with strings or numeric data. .. versionadded:: 0.20 strategy=\"constant\" for fixed value imputation.', 'values': ['median', 'most_frequent', 'mean', 'constant'], 'p': Enumeration(values=['median', 'most_frequent', 'mean', 'constant'], default=mean)}}, {'class_name': 'UniformBool', 'config': {'type': d3m.metadata.hyperparams.UniformBool, 'default': False, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'p': UniformBool(default=False)}}, {'class_name': 'Union', 'config': {'type': d3m.metadata.hyperparams.Union, 'default': None, 'structural_type': typing.Union[int, NoneType], 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'When strategy == \"constant\", fill_value is used to replace all occurrences of missing_values. If left to the default, fill_value will be 0 when imputing numerical data and \"missing_value\" for strings or object data types.', 'configuration': {'int': {'type': d3m.metadata.hyperparams.Hyperparameter, 'default': 0, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter']}, 'none': {'type': d3m.metadata.hyperparams.Constant, 'default': None, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter']}}, 'p': Union(configuration={int: Hyperparameter(default=0), none: Constant(default=None)}, default=None)}}, {'class_name': 'Enumeration', 'config': {'type': d3m.metadata.hyperparams.Enumeration, 'default': 'SPEC', 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'different method to choose for feature selection', 'values': ['SPEC', 'fisher_score', 'reliefF', 'CIFE', 'f_score', 'chi_square'], 'p': Enumeration(values=['SPEC', 'fisher_score', 'reliefF', 'CIFE', 'f_score', 'chi_square'], default=SPEC)}}, {'class_name': 'Uniform', 'config': {'type': d3m.metadata.hyperparams.Uniform, 'default': 0.5, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'percentage of features to select, between 0 and 1', 'lower': 0.25, 'upper': 1, 'lower_inclusive': True, 'upper_inclusive': False, 'p': Uniform(lower=0.25, upper=1, q=None, default=0.5, lower_inclusive=True, upper_inclusive=False)}}, {'class_name': 'Bounded', 'config': {'type': d3m.metadata.hyperparams.Bounded, 'default': 10, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The number of trees in the forest.', 'lower': 1, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False, 'p': Bounded(lower=1, upper=None, default=10, lower_inclusive=True, upper_inclusive=False)}}, {'class_name': 'Enumeration', 'config': {'type': d3m.metadata.hyperparams.Enumeration, 'default': 'gini', 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The function to measure the quality of a split. Supported criteria are \"gini\" for the Gini impurity and \"entropy\" for the information gain. Note: this parameter is tree-specific.', 'values': ['gini', 'entropy'], 'p': Enumeration(values=['gini', 'entropy'], default=gini)}}, {'class_name': 'Union', 'config': {'type': d3m.metadata.hyperparams.Union, 'default': 'auto', 'structural_type': typing.Union[str, NoneType, float], 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a percentage and `int(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=sqrt(n_features)`. - If \"sqrt\", then `max_features=sqrt(n_features)` (same as \"auto\"). - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features.', 'configuration': {'calculated': {'type': d3m.metadata.hyperparams.Enumeration, 'default': 'auto', 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'values': ['auto', 'sqrt', 'log2']}, 'none': {'type': d3m.metadata.hyperparams.Constant, 'default': None, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter']}, 'percent': {'type': d3m.metadata.hyperparams.Uniform, 'default': 0.25, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 0, 'upper': 1, 'lower_inclusive': True, 'upper_inclusive': False}}, 'p': Union(configuration={calculated: Enumeration(values=['auto', 'sqrt', 'log2'], default=auto), none: Constant(default=None), percent: Uniform(lower=0, upper=1, q=None, default=0.25, lower_inclusive=True, upper_inclusive=False)}, default=auto)}}, {'class_name': 'Union', 'config': {'type': d3m.metadata.hyperparams.Union, 'default': None, 'structural_type': typing.Union[int, NoneType], 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.', 'configuration': {'int': {'type': d3m.metadata.hyperparams.Bounded, 'default': 10, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 0, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False}, 'none': {'type': d3m.metadata.hyperparams.Constant, 'default': None, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter']}}, 'p': Union(configuration={int: Bounded(lower=0, upper=None, default=10, lower_inclusive=True, upper_inclusive=False), none: Constant(default=None)}, default=None)}}, {'class_name': 'Union', 'config': {'type': d3m.metadata.hyperparams.Union, 'default': 2, 'structural_type': typing.Union[int, float], 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a percentage and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for percentages.', 'configuration': {'absolute': {'type': d3m.metadata.hyperparams.Bounded, 'default': 2, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 1, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False}, 'percent': {'type': d3m.metadata.hyperparams.Uniform, 'default': 0.25, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 0, 'upper': 1, 'lower_inclusive': False, 'upper_inclusive': True}}, 'p': Union(configuration={absolute: Bounded(lower=1, upper=None, default=2, lower_inclusive=True, upper_inclusive=False), percent: Uniform(lower=0, upper=1, q=None, default=0.25, lower_inclusive=False, upper_inclusive=True)}, default=2)}}, {'class_name': 'Union', 'config': {'type': d3m.metadata.hyperparams.Union, 'default': 1, 'structural_type': typing.Union[int, float], 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The minimum number of samples required to be at a leaf node: - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a percentage and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for percentages.', 'configuration': {'absolute': {'type': d3m.metadata.hyperparams.Bounded, 'default': 1, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 1, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False}, 'percent': {'type': d3m.metadata.hyperparams.Uniform, 'default': 0.25, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 0, 'upper': 0.5, 'lower_inclusive': False, 'upper_inclusive': True}}, 'p': Union(configuration={absolute: Bounded(lower=1, upper=None, default=1, lower_inclusive=True, upper_inclusive=False), percent: Uniform(lower=0, upper=0.5, q=None, default=0.25, lower_inclusive=False, upper_inclusive=True)}, default=1)}}, {'class_name': 'Uniform', 'config': {'type': d3m.metadata.hyperparams.Uniform, 'default': 0, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.', 'lower': 0, 'upper': 0.5, 'lower_inclusive': True, 'upper_inclusive': True, 'p': Uniform(lower=0, upper=0.5, q=None, default=0, lower_inclusive=True, upper_inclusive=True)}}, {'class_name': 'Union', 'config': {'type': d3m.metadata.hyperparams.Union, 'default': None, 'structural_type': typing.Union[int, NoneType], 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.', 'configuration': {'int': {'type': d3m.metadata.hyperparams.Bounded, 'default': 10, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 0, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False}, 'none': {'type': d3m.metadata.hyperparams.Constant, 'default': None, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter']}}, 'p': Union(configuration={int: Bounded(lower=0, upper=None, default=10, lower_inclusive=True, upper_inclusive=False), none: Constant(default=None)}, default=None)}}, {'class_name': 'Bounded', 'config': {'type': d3m.metadata.hyperparams.Bounded, 'default': 0.0, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19 ', 'lower': 0.0, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False, 'p': Bounded(lower=0.0, upper=None, default=0.0, lower_inclusive=True, upper_inclusive=False)}}, {'class_name': 'Enumeration', 'config': {'type': d3m.metadata.hyperparams.Enumeration, 'default': 'bootstrap', 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'Whether bootstrap samples are used when building trees. And whether to use out-of-bag samples to estimate the generalization accuracy.', 'values': ['bootstrap', 'bootstrap_with_oob_score', 'disabled'], 'p': Enumeration(values=['bootstrap', 'bootstrap_with_oob_score', 'disabled'], default=bootstrap)}}, {'class_name': 'UniformBool', 'config': {'type': d3m.metadata.hyperparams.UniformBool, 'default': False, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest.', 'p': UniformBool(default=False)}}, {'class_name': 'Union', 'config': {'type': d3m.metadata.hyperparams.Union, 'default': None, 'structural_type': typing.Union[str, NoneType], 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': '\"balanced_subsample\" or None, optional (default=None) Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` The \"balanced_subsample\" mode is the same as \"balanced\" except that weights are computed based on the bootstrap sample for every tree grown. For multi-output, the weights of each column of y will be multiplied. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified.', 'configuration': {'str': {'type': d3m.metadata.hyperparams.Enumeration, 'default': 'balanced', 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'values': ['balanced', 'balanced_subsample']}, 'none': {'type': d3m.metadata.hyperparams.Constant, 'default': None, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter']}}, 'p': Union(configuration={str: Enumeration(values=['balanced', 'balanced_subsample'], default=balanced), none: Constant(default=None)}, default=None)}}, {'class_name': 'Enumeration', 'config': {'type': d3m.metadata.hyperparams.Enumeration, 'default': 'deviance', 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': \"loss function to be optimized. 'deviance' refers to deviance (= logistic regression) for classification with probabilistic outputs. For loss 'exponential' gradient boosting recovers the AdaBoost algorithm.\", 'values': ['deviance', 'exponential'], 'p': Enumeration(values=['deviance', 'exponential'], default=deviance)}}, {'class_name': 'Bounded', 'config': {'type': d3m.metadata.hyperparams.Bounded, 'default': 0.1, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'learning rate shrinks the contribution of each tree by `learning_rate`. There is a trade-off between learning_rate and n_estimators.', 'lower': 0, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False, 'p': Bounded(lower=0, upper=None, default=0.1, lower_inclusive=True, upper_inclusive=False)}}, {'class_name': 'Bounded', 'config': {'type': d3m.metadata.hyperparams.Bounded, 'default': 100, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.', 'lower': 1, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False, 'p': Bounded(lower=1, upper=None, default=100, lower_inclusive=True, upper_inclusive=False)}}, {'class_name': 'Bounded', 'config': {'type': d3m.metadata.hyperparams.Bounded, 'default': 3, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'maximum depth of the individual regression estimators. The maximum depth limits the number of nodes in the tree. Tune this parameter for best performance; the best value depends on the interaction of the input variables.', 'lower': 0, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False, 'p': Bounded(lower=0, upper=None, default=3, lower_inclusive=True, upper_inclusive=False)}}, {'class_name': 'Enumeration', 'config': {'type': d3m.metadata.hyperparams.Enumeration, 'default': 'friedman_mse', 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The function to measure the quality of a split. Supported criteria are \"friedman_mse\" for the mean squared error with improvement score by Friedman, \"mse\" for mean squared error, and \"mae\" for the mean absolute error. The default value of \"friedman_mse\" is generally the best as it can provide a better approximation in some cases. .. versionadded:: 0.18', 'values': ['friedman_mse', 'mse', 'mae'], 'p': Enumeration(values=['friedman_mse', 'mse', 'mae'], default=friedman_mse)}}, {'class_name': 'Union', 'config': {'type': d3m.metadata.hyperparams.Union, 'default': 2, 'structural_type': typing.Union[int, float], 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a percentage and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for percentages.', 'configuration': {'absolute': {'type': d3m.metadata.hyperparams.Bounded, 'default': 2, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 1, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False}, 'percent': {'type': d3m.metadata.hyperparams.Bounded, 'default': 0.25, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 0, 'upper': 1, 'lower_inclusive': True, 'upper_inclusive': True}}, 'p': Union(configuration={absolute: Bounded(lower=1, upper=None, default=2, lower_inclusive=True, upper_inclusive=False), percent: Bounded(lower=0, upper=1, default=0.25, lower_inclusive=True, upper_inclusive=True)}, default=2)}}, {'class_name': 'Union', 'config': {'type': d3m.metadata.hyperparams.Union, 'default': 1, 'structural_type': typing.Union[int, float], 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The minimum number of samples required to be at a leaf node: - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a percentage and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for percentages.', 'configuration': {'absolute': {'type': d3m.metadata.hyperparams.Bounded, 'default': 1, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 1, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False}, 'percent': {'type': d3m.metadata.hyperparams.Bounded, 'default': 0.25, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 0, 'upper': 0.5, 'lower_inclusive': True, 'upper_inclusive': True}}, 'p': Union(configuration={absolute: Bounded(lower=1, upper=None, default=1, lower_inclusive=True, upper_inclusive=False), percent: Bounded(lower=0, upper=0.5, default=0.25, lower_inclusive=True, upper_inclusive=True)}, default=1)}}, {'class_name': 'Bounded', 'config': {'type': d3m.metadata.hyperparams.Bounded, 'default': 0, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.', 'lower': 0, 'upper': 0.5, 'lower_inclusive': True, 'upper_inclusive': True, 'p': Bounded(lower=0, upper=0.5, default=0, lower_inclusive=True, upper_inclusive=True)}}, {'class_name': 'Bounded', 'config': {'type': d3m.metadata.hyperparams.Bounded, 'default': 1.0, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. `subsample` interacts with the parameter `n_estimators`. Choosing `subsample < 1.0` leads to a reduction of variance and an increase in bias.', 'lower': 0, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False, 'p': Bounded(lower=0, upper=None, default=1.0, lower_inclusive=True, upper_inclusive=False)}}, {'class_name': 'Union', 'config': {'type': d3m.metadata.hyperparams.Union, 'default': None, 'structural_type': typing.Union[int, str, NoneType, float], 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a percentage and `int(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=sqrt(n_features)`. - If \"sqrt\", then `max_features=sqrt(n_features)`. - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Choosing `max_features < n_features` leads to a reduction of variance and an increase in bias. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features.', 'configuration': {'specified_int': {'type': d3m.metadata.hyperparams.Bounded, 'default': 0, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 0, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False}, 'calculated': {'type': d3m.metadata.hyperparams.Enumeration, 'default': 'auto', 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'values': ['auto', 'sqrt', 'log2']}, 'none': {'type': d3m.metadata.hyperparams.Constant, 'default': None, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter']}, 'percent': {'type': d3m.metadata.hyperparams.Bounded, 'default': 0.25, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 0, 'upper': 1, 'lower_inclusive': True, 'upper_inclusive': True}}, 'p': Union(configuration={specified_int: Bounded(lower=0, upper=None, default=0, lower_inclusive=True, upper_inclusive=False), calculated: Enumeration(values=['auto', 'sqrt', 'log2'], default=auto), none: Constant(default=None), percent: Bounded(lower=0, upper=1, default=0.25, lower_inclusive=True, upper_inclusive=True)}, default=None)}}, {'class_name': 'Union', 'config': {'type': d3m.metadata.hyperparams.Union, 'default': None, 'structural_type': typing.Union[int, NoneType], 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.', 'configuration': {'int': {'type': d3m.metadata.hyperparams.Bounded, 'default': 10, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 0, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False}, 'none': {'type': d3m.metadata.hyperparams.Constant, 'default': None, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter']}}, 'p': Union(configuration={int: Bounded(lower=0, upper=None, default=10, lower_inclusive=True, upper_inclusive=False), none: Constant(default=None)}, default=None)}}, {'class_name': 'Bounded', 'config': {'type': d3m.metadata.hyperparams.Bounded, 'default': 0.0, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19', 'lower': 0.0, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False, 'p': Bounded(lower=0.0, upper=None, default=0.0, lower_inclusive=True, upper_inclusive=False)}}, {'class_name': 'UniformBool', 'config': {'type': d3m.metadata.hyperparams.UniformBool, 'default': False, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just erase the previous solution.', 'p': UniformBool(default=False)}}, {'class_name': 'Bounded', 'config': {'type': d3m.metadata.hyperparams.Bounded, 'default': 0.1, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if ``n_iter_no_change`` is set to an integer.', 'lower': 0, 'upper': 1, 'lower_inclusive': True, 'upper_inclusive': True, 'p': Bounded(lower=0, upper=1, default=0.1, lower_inclusive=True, upper_inclusive=True)}}, {'class_name': 'Union', 'config': {'type': d3m.metadata.hyperparams.Union, 'default': None, 'structural_type': typing.Union[int, NoneType], 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': '``n_iter_no_change`` is used to decide if early stopping will be used to terminate training when validation score is not improving. By default it is set to None to disable early stopping. If set to a number, it will set aside ``validation_fraction`` size of the training data as validation and terminate training when validation score is not improving in all of the previous ``n_iter_no_change`` numbers of iterations.', 'configuration': {'int': {'type': d3m.metadata.hyperparams.Bounded, 'default': 5, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 0, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False}, 'none': {'type': d3m.metadata.hyperparams.Constant, 'default': None, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter']}}, 'p': Union(configuration={int: Bounded(lower=0, upper=None, default=5, lower_inclusive=True, upper_inclusive=False), none: Constant(default=None)}, default=None)}}, {'class_name': 'Bounded', 'config': {'type': d3m.metadata.hyperparams.Bounded, 'default': 0.0001, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'Tolerance for the early stopping. When the loss is not improving by at least tol for ``n_iter_no_change`` iterations (if set to a number), the training stops.', 'lower': 0, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False, 'p': Bounded(lower=0, upper=None, default=0.0001, lower_inclusive=True, upper_inclusive=False)}}, {'class_name': 'Bounded', 'config': {'type': d3m.metadata.hyperparams.Bounded, 'default': 10, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The number of trees in the forest.', 'lower': 1, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False, 'p': Bounded(lower=1, upper=None, default=10, lower_inclusive=True, upper_inclusive=False)}}, {'class_name': 'Enumeration', 'config': {'type': d3m.metadata.hyperparams.Enumeration, 'default': 'gini', 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The function to measure the quality of a split. Supported criteria are \"gini\" for the Gini impurity and \"entropy\" for the information gain.', 'values': ['gini', 'entropy'], 'p': Enumeration(values=['gini', 'entropy'], default=gini)}}, {'class_name': 'Union', 'config': {'type': d3m.metadata.hyperparams.Union, 'default': None, 'structural_type': typing.Union[int, NoneType], 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.', 'configuration': {'int': {'type': d3m.metadata.hyperparams.Bounded, 'default': 10, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 0, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False}, 'none': {'type': d3m.metadata.hyperparams.Constant, 'default': None, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter']}}, 'p': Union(configuration={int: Bounded(lower=0, upper=None, default=10, lower_inclusive=True, upper_inclusive=False), none: Constant(default=None)}, default=None)}}, {'class_name': 'Union', 'config': {'type': d3m.metadata.hyperparams.Union, 'default': 2, 'structural_type': typing.Union[int, float], 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a percentage and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for percentages.', 'configuration': {'absolute': {'type': d3m.metadata.hyperparams.Bounded, 'default': 2, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 1, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False}, 'percent': {'type': d3m.metadata.hyperparams.Bounded, 'default': 0.25, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 0, 'upper': 1, 'lower_inclusive': True, 'upper_inclusive': True}}, 'p': Union(configuration={absolute: Bounded(lower=1, upper=None, default=2, lower_inclusive=True, upper_inclusive=False), percent: Bounded(lower=0, upper=1, default=0.25, lower_inclusive=True, upper_inclusive=True)}, default=2)}}, {'class_name': 'Union', 'config': {'type': d3m.metadata.hyperparams.Union, 'default': 1, 'structural_type': typing.Union[int, float], 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The minimum number of samples required to be at a leaf node: - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a percentage and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for percentages.', 'configuration': {'absolute': {'type': d3m.metadata.hyperparams.Bounded, 'default': 1, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 1, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False}, 'percent': {'type': d3m.metadata.hyperparams.Bounded, 'default': 0.25, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 0, 'upper': 0.5, 'lower_inclusive': True, 'upper_inclusive': True}}, 'p': Union(configuration={absolute: Bounded(lower=1, upper=None, default=1, lower_inclusive=True, upper_inclusive=False), percent: Bounded(lower=0, upper=0.5, default=0.25, lower_inclusive=True, upper_inclusive=True)}, default=1)}}, {'class_name': 'Bounded', 'config': {'type': d3m.metadata.hyperparams.Bounded, 'default': 0, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.', 'lower': 0, 'upper': 0.5, 'lower_inclusive': True, 'upper_inclusive': True, 'p': Bounded(lower=0, upper=0.5, default=0, lower_inclusive=True, upper_inclusive=True)}}, {'class_name': 'Union', 'config': {'type': d3m.metadata.hyperparams.Union, 'default': None, 'structural_type': typing.Union[int, NoneType], 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.', 'configuration': {'int': {'type': d3m.metadata.hyperparams.Bounded, 'default': 10, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 0, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False}, 'none': {'type': d3m.metadata.hyperparams.Constant, 'default': None, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter']}}, 'p': Union(configuration={int: Bounded(lower=0, upper=None, default=10, lower_inclusive=True, upper_inclusive=False), none: Constant(default=None)}, default=None)}}, {'class_name': 'Union', 'config': {'type': d3m.metadata.hyperparams.Union, 'default': 'auto', 'structural_type': typing.Union[str, NoneType, float], 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a percentage and `int(max_features * n_features)` features are considered at each split. - If \"auto\", then `max_features=sqrt(n_features)`. - If \"sqrt\", then `max_features=sqrt(n_features)`. - If \"log2\", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features.', 'configuration': {'calculated': {'type': d3m.metadata.hyperparams.Enumeration, 'default': 'auto', 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'values': ['auto', 'sqrt', 'log2']}, 'none': {'type': d3m.metadata.hyperparams.Constant, 'default': None, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter']}, 'percent': {'type': d3m.metadata.hyperparams.Bounded, 'default': 0.25, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'lower': 0, 'upper': 1, 'lower_inclusive': False, 'upper_inclusive': True}}, 'p': Union(configuration={calculated: Enumeration(values=['auto', 'sqrt', 'log2'], default=auto), none: Constant(default=None), percent: Bounded(lower=0, upper=1, default=0.25, lower_inclusive=False, upper_inclusive=True)}, default=auto)}}, {'class_name': 'Bounded', 'config': {'type': d3m.metadata.hyperparams.Bounded, 'default': 0.0, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19 ', 'lower': 0.0, 'upper': None, 'lower_inclusive': True, 'upper_inclusive': False, 'p': Bounded(lower=0.0, upper=None, default=0.0, lower_inclusive=True, upper_inclusive=False)}}, {'class_name': 'Enumeration', 'config': {'type': d3m.metadata.hyperparams.Enumeration, 'default': 'bootstrap', 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'Whether bootstrap samples are used when building trees. And whether to use out-of-bag samples to estimate the generalization accuracy.', 'values': ['bootstrap', 'bootstrap_with_oob_score', 'disabled'], 'p': Enumeration(values=['bootstrap', 'bootstrap_with_oob_score', 'disabled'], default=bootstrap)}}, {'class_name': 'UniformBool', 'config': {'type': d3m.metadata.hyperparams.UniformBool, 'default': False, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest.', 'p': UniformBool(default=False)}}, {'class_name': 'Union', 'config': {'type': d3m.metadata.hyperparams.Union, 'default': None, 'structural_type': typing.Union[str, NoneType], 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'description': 'Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. The \"balanced\" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` The \"balanced_subsample\" mode is the same as \"balanced\" except that weights are computed based on the bootstrap sample for every tree grown. For multi-output, the weights of each column of y will be multiplied. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified.', 'configuration': {'str': {'type': d3m.metadata.hyperparams.Enumeration, 'default': 'balanced', 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter'], 'values': ['balanced', 'balanced_subsample']}, 'none': {'type': d3m.metadata.hyperparams.Constant, 'default': None, 'structural_type': , 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TuningParameter']}}, 'p': Union(configuration={str: Enumeration(values=['balanced', 'balanced_subsample'], default=balanced), none: Constant(default=None)}, default=None)}}], 'values': {'pipeline_choice': '6a520746-108c-45bf-a6d8-c875b5a9d326', 'step5/d3m.primitives.data_cleaning.imputer.SKlearn/strategy': 'mean', 'step5/d3m.primitives.data_cleaning.imputer.SKlearn/add_indicator': False, 'step5/d3m.primitives.data_cleaning.imputer.SKlearn/fill_value': None, 'step6/d3m.primitives.feature_selection.skfeature.TAMU/selection_method': 'SPEC', 'step6/d3m.primitives.feature_selection.skfeature.TAMU/percentage_selected_features': 0.5, 'step7/d3m.primitives.classification.random_forest.SKlearn/n_estimators': 10, 'step7/d3m.primitives.classification.random_forest.SKlearn/criterion': 'gini', 'step7/d3m.primitives.classification.random_forest.SKlearn/max_features': 'auto', 'step7/d3m.primitives.classification.random_forest.SKlearn/max_depth': None, 'step7/d3m.primitives.classification.random_forest.SKlearn/min_samples_split': 2, 'step7/d3m.primitives.classification.random_forest.SKlearn/min_samples_leaf': 1, 'step7/d3m.primitives.classification.random_forest.SKlearn/min_weight_fraction_leaf': 0, 'step7/d3m.primitives.classification.random_forest.SKlearn/max_leaf_nodes': None, 'step7/d3m.primitives.classification.random_forest.SKlearn/min_impurity_decrease': 0.0, 'step7/d3m.primitives.classification.random_forest.SKlearn/bootstrap': 'bootstrap', 'step7/d3m.primitives.classification.random_forest.SKlearn/warm_start': False, 'step7/d3m.primitives.classification.random_forest.SKlearn/class_weight': None}}"} \ No newline at end of file diff --git a/axolotl/examples/run.py b/axolotl/examples/run.py new file mode 100644 index 0000000..d82c838 --- /dev/null +++ b/axolotl/examples/run.py @@ -0,0 +1,31 @@ +import os +import time +from pprint import pprint +import pandas as pd +from sklearn.datasets import make_regression + +from d3m import container +from d3m.metadata.pipeline import Pipeline + +from axolotl.utils import data_problem, pipeline as pipeline_utils +from axolotl.backend.simple import SimpleRunner +from axolotl.backend.ray import RayRunner +from axolotl.algorithms.random_search import RandomSearch + +# init runner +#backend = RayRunner(random_seed=42, volumes_dir=None, n_workers=3) +backend = SimpleRunner(random_seed=42, volumes_dir=None) +#time.sleep(30) + +table_path = os.path.join('..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'tables', 'learningData.csv') +df = pd.read_csv(table_path) +dataset, problem_description = data_problem.generate_dataset_problem(df, task='binary_classification', target_index=5) + +# The method fit search for the best pipeline based on the time butget and fit the best pipeline based on the rank with the input_data. +search = RandomSearch(problem_description=problem_description, backend=backend) + +fitted_pipeline, fitted_pipelineine_result = search.search_fit(input_data=[dataset], time_limit=30) + +produce_results = search.produce(fitted_pipeline, [dataset]) + +print(produce_results.output) diff --git a/axolotl/examples/synthetic_data_bayesian_hp_tunning.ipynb.REMOVED.git-id b/axolotl/examples/synthetic_data_bayesian_hp_tunning.ipynb.REMOVED.git-id new file mode 100644 index 0000000..7b34017 --- /dev/null +++ b/axolotl/examples/synthetic_data_bayesian_hp_tunning.ipynb.REMOVED.git-id @@ -0,0 +1 @@ +0b793ea6bbd8536751fb6941cb70e3ff2ed5739b \ No newline at end of file diff --git a/axolotl/failed_installation_repos.txt b/axolotl/failed_installation_repos.txt new file mode 100644 index 0000000..9e5064f --- /dev/null +++ b/axolotl/failed_installation_repos.txt @@ -0,0 +1,11 @@ +Repository Name: dsbox-primitives +Package URI: git+https://github.com/usc-isi-i2/dsbox-primitives@390595a708a8702cd6b7b388661127fcf63e4605#egg=dsbox-primitives +Error: "AttributeError: module 'tensorflow' has no attribute 'get_default_graph'" + +Repository Name: distil-primitives +Package URI: git+https://github.com/uncharted-distil/distil-primitives.git@08065c3e867401e444d8e25177c779fcc3ad5af7#egg=distil-primitives +Error: "Cannnot be install due to hard dependency on tensorflow-gpu" + +Repository Name: kf-d3m-primitives +Package URI: git+https://github.com/kungfuai/d3m-primitives.git@17ca6cd4e9ca00e09e2cf91e1cb9f18562645821#egg=kf-d3m-primitives +Error: "Cannnot be install due to hard dependency on tensorflow-gpu" diff --git a/axolotl/images/Devd3mStart.sh b/axolotl/images/Devd3mStart.sh new file mode 100755 index 0000000..5d119f6 --- /dev/null +++ b/axolotl/images/Devd3mStart.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +alias python="python3" + +# check if we are on a deployment container or not. +if [ -d "/user_dev" ]; then + cd /user_dev + echo "Running on deployment" +else + echo "Running on testing" +fi + + +# check output_dir +if [[ -z "$D3MOUTPUTDIR" ]]; then + D3MOUTPUTDIR="$(pwd)/output_dir" + mkdir -p "$D3MOUTPUTDIR" +else + D3MOUTPUTDIR="$D3MOUTPUTDIR" +fi + +# check if time is set, otherwise we use 1 min +if [[ -z "$D3MTIMEOUT" ]]; then + D3MTIMEOUT="60" # 10 gb + else + D3MTIMEOUT="$D3MTIMEOUT" +fi + +# execute d3m server. +case $D3MRUN in + "standalone") + echo "Executing TAMU TA2 Standalone" + echo "No standalone supported yet" + ;; + *) + echo "Executing TAMU TA2" + python3 -m axolotl.d3m_grpc.server + ;; +esac diff --git a/axolotl/images/axolotl.dockerfile b/axolotl/images/axolotl.dockerfile new file mode 100644 index 0000000..a20dd7c --- /dev/null +++ b/axolotl/images/axolotl.dockerfile @@ -0,0 +1,13 @@ +FROM registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.5.18-20200630-050709 + +RUN pip3 install -e git+https://gitlab.com/axolotl1/axolotl.git@9619a077e1d06a152fa01f0fca7fa0321dcd3d2c#egg=axolotl +COPY images/Devd3mStart.sh /user_dev/Devd3mStart.sh + +RUN chmod a+x /user_dev/Devd3mStart.sh + +ENV D3MRUN ta2ta3 +ENV TOKENIZERS_PARALLELISM false + +EXPOSE 45042 + +ENTRYPOINT ["/user_dev/Devd3mStart.sh"] diff --git a/axolotl/images/base.dockerfile b/axolotl/images/base.dockerfile new file mode 100755 index 0000000..51e05b8 --- /dev/null +++ b/axolotl/images/base.dockerfile @@ -0,0 +1,3 @@ +FROM registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.5.18-20200630-050709 + +RUN apt update \ No newline at end of file diff --git a/axolotl/images/build-images.sh b/axolotl/images/build-images.sh new file mode 100755 index 0000000..8b491f3 --- /dev/null +++ b/axolotl/images/build-images.sh @@ -0,0 +1,21 @@ +#!/bin/sh -e + +docker login -u gitlab-ci-token -p "$CI_JOB_TOKEN" "$CI_REGISTRY" + +for IMAGE_NAME in "$@"; do + if [ "$IMAGE_NAME" = "base" ]; then + echo "Bulding "$CI_REGISTRY_IMAGE/$IMAGE_NAME":latest" + docker build -t "$CI_REGISTRY_IMAGE/$IMAGE_NAME:latest" -f images/base.dockerfile . + echo "Pushing "$CI_REGISTRY_IMAGE/$IMAGE_NAME":latest" + docker push "$CI_REGISTRY_IMAGE/$IMAGE_NAME:latest" + echo "Done" + fi + + if [ "$IMAGE_NAME" = "axolotl" ]; then + echo "Bulding "$CI_REGISTRY_IMAGE/$IMAGE_NAME":latest" + docker build -t "$CI_REGISTRY_IMAGE/$IMAGE_NAME:latest" -f images/axolotl.dockerfile . + echo "Pushing "$CI_REGISTRY_IMAGE/$IMAGE_NAME":latest" + docker push "$CI_REGISTRY_IMAGE/$IMAGE_NAME:latest" + echo "Done" + fi +done diff --git a/axolotl/run_tests.py b/axolotl/run_tests.py new file mode 100755 index 0000000..16c264a --- /dev/null +++ b/axolotl/run_tests.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 + +import sys +import unittest + +runner = unittest.TextTestRunner(verbosity=1) + +tests = unittest.TestLoader().discover('tests') + +if not runner.run(tests).wasSuccessful(): + sys.exit(1) diff --git a/axolotl/setup.py b/axolotl/setup.py new file mode 100644 index 0000000..fccf24f --- /dev/null +++ b/axolotl/setup.py @@ -0,0 +1,53 @@ +import os +import os.path +import sys +from setuptools import setup, find_packages +import subprocess + +PACKAGE_NAME = 'axolotl' +MINIMUM_PYTHON_VERSION = 3, 6 + + +def check_python_version(): + """Exit when the Python version is too low.""" + if sys.version_info < MINIMUM_PYTHON_VERSION: + sys.exit("Python {}.{}+ is required.".format(*MINIMUM_PYTHON_VERSION)) + + +def read_package_variable(key): + """Read the value of a variable from the package without importing.""" + module_path = os.path.join(PACKAGE_NAME, '__init__.py') + with open(module_path) as module: + for line in module: + parts = line.strip().split(' ') + if parts and parts[0] == key: + return parts[-1].strip("'") + raise KeyError("'{0}' not found in '{1}'".format(key, module_path)) + + +check_python_version() +version = read_package_variable('__version__') +description = read_package_variable('__description__') +setup( + name=PACKAGE_NAME, + version=version, + description=version, + + packages=find_packages(exclude=['tests*']), + license='Apache-2.0', + classifiers=[ + 'License :: OSI Approved :: Apache Software License', + ], + install_requires=[ + 'd3m', + 'grpcio', + 'grpcio-tools', + 'grpcio-testing', + 'ray', + 'networkx', + ], + extras_require={ + 'cpu': ['tensorflow==2.2.0'], + 'gpu': ['tensorflow-gpu==2.2.0'] + } +) diff --git a/axolotl/tests/__init__.py b/axolotl/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/axolotl/tests/_server_test.py b/axolotl/tests/_server_test.py new file mode 100644 index 0000000..68e5866 --- /dev/null +++ b/axolotl/tests/_server_test.py @@ -0,0 +1,383 @@ +# from __future__ import print_function + +import argparse +import os +import pathlib +from pprint import pprint + +import grpc +from d3m import utils as d3m_utils, runtime as runtime_module +from d3m.metadata import problem as problem_module +from ta3ta2_api import core_pb2, core_pb2_grpc, value_pb2, utils + +from axolotl.utils import pipeline as pipeline_utils +from axolotl.d3m_grpc import constants + +# with d3m_utils.silence(): +# d3m_index.load_all(blocklist=constants.PrimitivesList.BLACK_LIST) + + +# primitives = [ +# 'd3m.primitives.datasets.DatasetToDataFrame', +# 'd3m.primitives.data_transformation.denormalize.Common' +# ] +# +# with d3m_utils.silence(): +# for primitive in primitives: +# d3m_index.get_primitive(primitive) + + +LENGTH = 60 +ALLOWED_VALUE_TYPES = ['DATASET_URI', 'CSV_URI', 'RAW'] +FULL_SPECIFIED_PIPELINE_PATH = 'modules/server/test_full_pipeline.json' +PRE_SPECIFIED_PIPELINE_PATH = 'modules/server/test_placeholder.json' + + +# PRE_SPECIFIED_PIPELINE_PATH = 'modules/server/test_placeholder_pipeline.json' + + +def hello_request(): + request = core_pb2.HelloRequest() + return request + + +def list_primitives_request(): + request = core_pb2.ListPrimitivesRequest() + return request + + +def search_solutions_request(test_paths, specified_template=None): + user_agent = "test_agent" + version = core_pb2.DESCRIPTOR.GetOptions().Extensions[core_pb2.protocol_version] + + time_bound = 0.5 + priority = 10 + # allowed_value_types = [value_pb2.ValueType.Value(value) for value in ALLOWED_VALUE_TYPES] + + problem_description = utils.encode_problem_description( + problem_module.Problem.load(test_paths['TRAIN']['problem']) + ) + + template = None + if specified_template == 'FULL': + with d3m_utils.silence(): + pipeline = pipeline_utils.load_pipeline(FULL_SPECIFIED_PIPELINE_PATH) + template = utils.encode_pipeline_description(pipeline, ALLOWED_VALUE_TYPES, constants.Path.TEMP_STORAGE_ROOT) + elif specified_template == 'PRE': # PRE for PREPROCESSING + pipeline = runtime_module.get_pipeline(PRE_SPECIFIED_PIPELINE_PATH, load_all_primitives=False) + template = utils.encode_pipeline_description(pipeline, ALLOWED_VALUE_TYPES, constants.Path.TEMP_STORAGE_ROOT) + + inputs = [ + value_pb2.Value( + dataset_uri=test_paths['TRAIN']['dataset'] + ) + ] + + request = core_pb2.SearchSolutionsRequest( + user_agent=user_agent, + version=version, + time_bound_search=time_bound, + priority=priority, + allowed_value_types=ALLOWED_VALUE_TYPES, + problem=problem_description, + template=template, + inputs=inputs + ) + return request + + +def get_search_solution_results_request(search_id): + request = core_pb2.GetSearchSolutionsResultsRequest(search_id=search_id) + return request + + +def fit_solution_request(solution_id, test_paths): + inputs = [ + value_pb2.Value( + dataset_uri=test_paths['TRAIN']['dataset'] + ) + ] + expose_outputs = ['outputs.0'] + expose_value_types = ['CSV_URI'] + users = [ + core_pb2.SolutionRunUser( + id='test_user', + chosen=True, + reason='just because' + ) + ] + request = core_pb2.FitSolutionRequest( + solution_id=solution_id, + inputs=inputs, + expose_outputs=expose_outputs, + expose_value_types=expose_value_types, + users=users + ) + return request + + +def get_fit_solution_results_request(request_id): + request = core_pb2.GetFitSolutionResultsRequest( + request_id=request_id + ) + return request + + +def produce_solution_request(fitted_solution_id, test_paths): + inputs = [ + value_pb2.Value( + dataset_uri=test_paths['TEST']['dataset'] + ) + ] + expose_outputs = ['outputs.0'] + expose_value_types = ['CSV_URI'] + + users = [ + core_pb2.SolutionRunUser( + id='test_user', + chosen=True, + reason='just because' + ) + ] + + request = core_pb2.ProduceSolutionRequest( + fitted_solution_id=fitted_solution_id, + inputs=inputs, + expose_outputs=expose_outputs, + expose_value_types=expose_value_types, + users=users + ) + return request + + +def get_produce_solution_results_request(request_id): + request = core_pb2.GetProduceSolutionResultsRequest( + request_id=request_id + ) + return request + + +def describe_solution_request(solution_id): + request = core_pb2.DescribeSolutionRequest( + solution_id=solution_id + ) + return request + + +def score_solution_request(solution_id, test_paths): + inputs = [ + value_pb2.Value( + dataset_uri=test_paths['SCORE']['dataset'] + ) + ] + + problem = problem_module.Problem.load(test_paths['SCORE']['problem']) + performance_metrics = [] + for performance_metric in problem['problem'].get('performance_metrics', []): + performance_metrics.append(utils.encode_performance_metric(performance_metric)) + + # TODO add support for more evaluation methods + users = [] + evaluation_method = 'K_FOLD' + configuration = core_pb2.ScoringConfiguration( + method=evaluation_method, + folds=2, + # train_test_ratio + shuffle=True, + random_seed=42, + stratified=True, + ) + request = core_pb2.ScoreSolutionRequest( + solution_id=solution_id, + inputs=inputs, + performance_metrics=performance_metrics, + users=users, + configuration=configuration + ) + return request + + +def get_score_solution_request(solution_id): + request = core_pb2.ScoreSolutionRequest( + solution_id=solution_id + ) + return request + + +def solution_export_request(solution_id): + rank = 0.1 + request = core_pb2.SolutionExportRequest( + solution_id=solution_id, + rank=rank + ) + return request + + +def end_search_solutions_request(search_id): + request = core_pb2.EndSearchSolutionsRequest(search_id=search_id) + return request + + +def stop_search_solution_request(search_id): + request = core_pb2.StopSearchSolutionsRequest(search_id=search_id) + return request + + +def run(test_paths, specified_template=None): + channel = grpc.insecure_channel('localhost:45042') + stub = core_pb2_grpc.CoreStub(channel) + + print_name('Hello') + hello_r = stub.Hello(hello_request()) + pprint(hello_r) + + print_name('ListPrimitive') + list_primitives_r = stub.ListPrimitives(list_primitives_request()) + for _primitive in list_primitives_r.primitives: + print_space() + pprint(_primitive) + + print_name('SearchSolution') + search_solutions_r = stub.SearchSolutions(search_solutions_request(test_paths, specified_template)) + search_id = search_solutions_r.search_id + pprint(search_solutions_r) + + print_name('GetSearchSolutionsResults') + solution_id = None + for get_search_solution_r in stub.GetSearchSolutionsResults(get_search_solution_results_request(search_id)): + print_space() + pprint(get_search_solution_r) + if get_search_solution_r.solution_id: + solution_id = get_search_solution_r.solution_id + + print_name('DescribeSolution') + describe_solution_r = stub.DescribeSolution(describe_solution_request(solution_id)) + pprint(describe_solution_r) + + print_name('FitSolution') + fit_solution_r = stub.FitSolution(fit_solution_request(solution_id, test_paths)) + fit_request_id = fit_solution_r.request_id + pprint(fit_solution_r) + + print_name('GetFitSolutionResultsRequest') + fitted_solution_id = None + for get_git_solution_results_r in stub.GetFitSolutionResults(get_fit_solution_results_request(fit_request_id)): + print_space() + pprint(get_git_solution_results_r) + fitted_solution_id = get_git_solution_results_r.fitted_solution_id + + print_name('ProduceSolutionRequest') + produce_solution_r = stub.ProduceSolution(produce_solution_request(fitted_solution_id, test_paths)) + produce_request_id = produce_solution_r.request_id + pprint(produce_solution_r) + + print_name('GetProduceSolutionResultsRequest') + for get_produce_solution_results_r in stub.GetProduceSolutionResults( + get_produce_solution_results_request(produce_request_id)): + print_space() + pprint(get_produce_solution_results_r) + + print_name('ScoreSolution') + score_solution_r = stub.ScoreSolution(score_solution_request(solution_id, test_paths)) + score_request_id = score_solution_r.request_id + + pprint(score_solution_r) + + print_name('GetScoreSolutionResults') + for score_solution_r in stub.GetScoreSolutionResults(get_score_solution_request(score_request_id)): + print_space() + pprint(score_solution_r) + + print_name('SolutionExport') + solution_export_r = stub.SolutionExport(solution_export_request(solution_id)) + pprint(solution_export_r) + + print_name('StopSearchSolutions') + stop_search_solution_r = stub.StopSearchSolutions(stop_search_solution_request(search_id)) + pprint(stop_search_solution_r) + + print_name('EndSearchSolutions') + end_search_solutions_r = stub.EndSearchSolutions(end_search_solutions_request(search_id)) + pprint(end_search_solutions_r) + + +def print_name(name): + length = LENGTH + free_space = length - len(name) - 2 + space = int(free_space / 2) + name = '#' + ' ' * space + name + ' ' * space + if free_space % 2 == 0: + name = name + '#' + else: + name = name + ' #' + + print("#" * length) + print(name) + print("#" * length) + + +def print_space(): + print('-' * LENGTH) + + +def configure_parser(parser, *, skip_arguments=()): + parser.add_argument( + '-t', '--test-path', type=str, default="/D3M/internal_d3m/Winter_2018_tamuta2/datasets/26/", + help="path of d3m dataset to test." + ) + + +def get_problem_id(test_path): + problem_description = problem_module.Problem.load(test_path) + print(problem_description) + problem_id = problem_description.get('id', None) + return problem_id + + +def get_paths(test_path): + # Classification Score dataset path is (problem_SCORE, dataset_SCORE) not + # However, regression and other Score dataset path is (problem_TEST, dataset_TEST) + score_problem_relative_path = os.path.join(test_path, 'SCORE/problem_SCORE/problemDoc.json') + score_dataset_relative_path = os.path.join(test_path, 'SCORE/dataset_SCORE/datasetDoc.json') + + if not os.path.exists(score_problem_relative_path) or not os.path.exists(score_dataset_relative_path): + score_problem_relative_path = os.path.join(test_path, 'SCORE/problem_TEST/problemDoc.json') + score_dataset_relative_path = os.path.join(test_path, 'SCORE/dataset_TEST/datasetDoc.json') + + test_paths = { + 'TRAIN': { + 'dataset': os.path.join(test_path, 'TRAIN/dataset_TRAIN/datasetDoc.json'), + 'problem': pathlib.Path( + os.path.abspath(os.path.join(test_path, 'TRAIN/problem_TRAIN/problemDoc.json'))).as_uri() + }, + 'TEST': { + 'dataset': os.path.join(test_path, 'TEST/dataset_TEST/datasetDoc.json'), + 'problem': pathlib.Path( + os.path.abspath(os.path.join(test_path, 'TEST/problem_TEST/problemDoc.json'))).as_uri() + }, + 'SCORE': { + 'dataset': os.path.join(test_path, score_dataset_relative_path), + 'problem': pathlib.Path(os.path.abspath(score_problem_relative_path)).as_uri() + }, + } + return test_paths + + +if __name__ == '__main__': + # Creating parser + parser = argparse.ArgumentParser(description="Test from command line") + configure_parser(parser) + arguments = parser.parse_args() + + # Getting test root path + test_path = arguments.test_path + + # Getting test paths train/test/score + test_paths = get_paths(test_path) + + # Getting problem id + test_id = get_problem_id(test_paths['TEST']['problem']) + + print_name('Starting Test: ' + test_id) + run(test_paths, None) + print_name('Finishing Test: ' + test_id) diff --git a/axolotl/tests/data/.gitignore b/axolotl/tests/data/.gitignore new file mode 100644 index 0000000..94d5afd --- /dev/null +++ b/axolotl/tests/data/.gitignore @@ -0,0 +1,10 @@ +*.pyc +__pycache__ +.DS_Store +.ipynb_checkpoints +.cache +.idea +*.egg-info +.mypy_cache +dist +build diff --git a/axolotl/tests/data/.gitlab-ci.yml b/axolotl/tests/data/.gitlab-ci.yml new file mode 100644 index 0000000..499ae55 --- /dev/null +++ b/axolotl/tests/data/.gitlab-ci.yml @@ -0,0 +1,42 @@ +build_summing_image: + stage: build + + image: docker:stable + + services: + - docker:dind + + before_script: + - docker info + + script: + - docker login -u gitlab-ci-token -p "$CI_JOB_TOKEN" "$CI_REGISTRY" + - docker build --cache-from="$CI_REGISTRY_IMAGE/summing:latest" -t "$CI_REGISTRY_IMAGE/summing:latest" docker/summing + - docker push "$CI_REGISTRY_IMAGE/summing:latest" + + only: + - master + +style_check: + stage: build + + image: registry.gitlab.com/datadrivendiscovery/images/testing:ubuntu-bionic-python36 + + script: + - pycodestyle primitives/test_primitives + +type_check: + stage: build + + image: registry.gitlab.com/datadrivendiscovery/images/testing:ubuntu-bionic-python36 + + variables: + DEPENDENCY_REF: devel + + script: + - cd primitives + - git clone https://gitlab.com/datadrivendiscovery/d3m.git + - cd d3m + - git checkout ${DEPENDENCY_REF} + - cd .. + - MYPYPATH=d3m mypy test_primitives diff --git a/axolotl/tests/data/README.md b/axolotl/tests/data/README.md new file mode 100644 index 0000000..9efeea5 --- /dev/null +++ b/axolotl/tests/data/README.md @@ -0,0 +1,10 @@ +# Data used for tests + +This repository contains data used for tests across multiple other repositories. + +## About Data Driven Discovery Program + +DARPA Data Driven Discovery (D3M) Program is researching ways to get machines to build +machine learning pipelines automatically. It is split into three layers: +TA1 (primitives), TA2 (systems which combine primitives automatically into pipelines +and executes them), and TA3 (end-users interfaces). diff --git a/axolotl/tests/data/add.sh b/axolotl/tests/data/add.sh new file mode 100755 index 0000000..fd92052 --- /dev/null +++ b/axolotl/tests/data/add.sh @@ -0,0 +1,20 @@ +#!/bin/bash -e + +# Assumption is that this repository is cloned into "d3m-test-data" directory +# which is a sibling of "d3m-primitives" directory. + +for PRIMITIVE in d3m.primitives.regression.monomial.Test \ + d3m.primitives.operator.increment.Test \ + d3m.primitives.operator.sum.Test \ + d3m.primitives.data_generation.random.Test \ + d3m.primitives.operator.primitive_sum.Test \ + d3m.primitives.operator.null.TransformerTest \ + d3m.primitives.operator.null.UnsupervisedLearnerTest \ + d3m.primitives.classification.random_classifier.Test \ + d3m.primitives.evaluation.compute_scores.Test ; do + echo $PRIMITIVE + python -m d3m primitive describe -i 4 $PRIMITIVE > primitive.json + pushd ../d3m-primitives + ./add.py ../d3m-tests-data/primitive.json + popd +done diff --git a/axolotl/tests/data/datasets/audio_dataset_1/datasetDoc.json b/axolotl/tests/data/datasets/audio_dataset_1/datasetDoc.json new file mode 100644 index 0000000..ff57fa7 --- /dev/null +++ b/axolotl/tests/data/datasets/audio_dataset_1/datasetDoc.json @@ -0,0 +1,82 @@ +{ + "about": { + "datasetID": "audio_dataset_1", + "datasetName": "Audio dataset to be used for tests", + "license": "CC0", + "datasetSchemaVersion": "4.0.0", + "redacted": false, + "datasetVersion": "4.0.0", + "digest": "4eaa4ee8ce18dc066d400d756105aab1ce92895593d09c8be23e08fdd89640e1" + }, + "dataResources": [ + { + "resID": "0", + "resPath": "media/", + "resType": "audio", + "resFormat": { + "audio/mpeg": [ + "mp3" + ] + }, + "isCollection": true + }, + { + "resID": "learningData", + "resPath": "tables/learningData.csv", + "resType": "table", + "resFormat": { + "text/csv": [ + "csv" + ] + }, + "isCollection": false, + "columnsCount": 5, + "columns": [ + { + "colIndex": 0, + "colName": "d3mIndex", + "colType": "integer", + "role": [ + "index" + ] + }, + { + "colIndex": 1, + "colName": "audio_file", + "colType": "string", + "role": [ + "attribute" + ], + "refersTo": { + "resID": "0", + "resObject": "item" + } + }, + { + "colIndex": 2, + "colName": "start", + "colType": "real", + "role": [ + "boundaryIndicator" + ] + }, + { + "colIndex": 3, + "colName": "end", + "colType": "real", + "role": [ + "boundaryIndicator" + ] + }, + { + "colIndex": 4, + "colName": "class", + "colType": "categorical", + "role": [ + "suggestedTarget" + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/axolotl/tests/data/datasets/audio_dataset_1/media/test_audio.mp3 b/axolotl/tests/data/datasets/audio_dataset_1/media/test_audio.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..a18ba6c7549ff77be1184072e5c28d2e3148ff6d GIT binary patch literal 1271 zcmV{L^A{c00000BUDpUMF0Q*4*&oIOks9rH8(CdF)lGMFaQ7m0000000000 z|NB6|0000000000000000000007-6UZvX%Q4*&oF1poj9$^cPOQBhG*QBhG*QBhG* zQBhG*QGI=VeSLj>eSLj>eSLj>eSLj>eW<9YsHmu@sHmu@sHmu@sHmu@sMOTd)YR0} z)YR0})YR0})YR0})c^ng|NsC0|NsC0|NsC0|NsC0{{R3008C+aV>LG}I5_|S00000 z00000001Nd1^@s600004${RAOMgRZ+0095{P{aTL3Va!LG%L9*_V41H;3|{;&|+Ni`r41_S|t0H9DH4JaDYG8YL33Z*i` zVUW;lz{ik=l)1{a>~k=o0Iz_^f&!Q0;t}Zg{D?&lh)2KqsBy|~|C)z7n&1DS$IVI0 z|NrGO6QoA}@~jG#5d}d}5`YW@&?}4!5F-EkQp5yD2tJ@xC`49hJ|Ns8K|NsB~vpx>Vvqw4|AH^17u>t^N!Gy+jl^?o(HHO+_<_4r;LkJjP zwUfqR7zrj|=pGqEesZ7N|F7_!AOH6M|4-?%F~K~==R+7Q z%!?{8A_7e?TSn+IV!IqOSVy)qDfKhl0y)b6`%=UagUDDb91ee?K`RBK!O!ei2QmQv zGt4qEI12>_LO^PHHjxRNU;q7I|NsC0|NsA}#33^{84Z%dI)zBWQ3@o*8XUOX`|J4V zlp&bKtq#<}c??X#;2NmFP7C8F0oWeB~Fu;)t0Q{y+MCiwb%p3_6 zVd+dgEq~Gf>;G5(|NsC0|JVMw7!SZl7zzLa|NBzJD1%85D;N%cV!=;^qei~)02l?p zAeb`YD3~|k&6t5=ZklA?nwlG>Y?y%{h?o|D$Cz!QT9^?b1(**2I2Z%~8W;({86!%o zh5!aD>3GpTw}1IR|Ns5J|NsB}wEzGU0+CLqVlWttMgk2qe}ihqRLdeJ&COMld+|9- zONKWF<1#OB-@&k8Ho&}!H?U&>Y!((q9XdU?f#CyY0{Q@!i|{te>~oKoTH5JGU1Cja|V#5e#GC3|_fi2xB42c|QaFaQdk zq_ltd%ZdzS!{iW9Qh}Dl7Z(_6fn5_|JwjV0RsdC z1q1~N1qBHd4GRqv6ciK`6ciK`6ciK`6ciK`6ciK`6ciK`6ciK`6ciK`6ciK`6ciK` z6ciK`6ciK`6#u{g5eNX`0G0y*A^-vr0Rs^M|HJ?v000360RaI40RaF20000000033 z0|W&I2M7rY3;)CbwGaRT0Rsa91Oo*H1Oxy80et}i0{{dO1ri}KK?W01VFw~{6f%L4 zp$H>FvB4Em(eNZQVse5B3Kka_85$)fCnzZ@G&MFiI5|2)L`6nNNJ&alR8>}2SXo+Q zWMyV&XlZJ5bai%jczJq*goTEOh>41lla!T~mzbHFqNAjxrl+W>su$;>FV+F^Yr!h_xSnx|HJ?v0RRI50RaI40RaI4 z00000000330|W&I2M7rY3;)CbwGjXU0RjXB0|W;J1Oxy90(Srb0s|2Q1tBp85dFX%F@%+)z;V8+1ldccA(59|% zwm%JSwJ6Ib?wRRWcb^Qd>~5Kuwmy~b`j!5l;hS#^pM71r)jxA9JCFxv_q|N6>p7%90PYX?M z7bMqfr`sEK8T79y{{V%F9BqZ0^{ov*##cIz`i9&-w7u=vMJ=LwZ2Fo)xHVcmem%f( zT*aOH&5t{Z(_5WBK3(u>H)DQTbvnC!L1a*A^6ChRb6i#TiLGtWc~bVScJs!ZkR<(S zS+cpK6nZ7bs*Ddx;=EPj2(964;Q~Yep!WQr2U= zlXBy&SB~f!=N9>UHAg0x!K&l|ipQn_9V5`G1{W@cyN7btc(+ zoOP>I$o8jKCa8+oqP4927k7PbvaCCUccQiSERSw`wuA8Y4-d`e86%UN{ord`UeuRI z(&U;n2XAE+Ko=}S2hZa-Yt(e2=U3i$69`8|76?mQo0*8Wn%^7>a# zFETt7r3QSgmUmCuPu7_(i*+FX0G6MveLoMw8)=8h2kBQW{tx+w{YM{4k7Uuat$oju zzp!qVx=Z@kTj3o_Zx7@=Zr@cVzMi%C4%ZmPzJCg_H|*VaF#X-P{yRz1a#mxfSwdXM z`Re1wQWe}IVf0$&^-mexv+$CC-zL7E{{V())JtI7@&4@%`!C(MEp4Cp4NAi!YM9AO zsq$pLD7u&UW&DjmHA{(GJ0$##eRRLF-OF$-fAQLk{>zu5Obc)PcC}PaUuxd$`5xh} zE*FK9@->BZV{yEll72?M<+uHpuN8i4Z~S(u+_OZsi6%}?XRb!Ab zUe6-ver)SGJD6I3g`KYxztdMTy zmY*cn%O-cNaS1fb{{SrwSDI^$+q~ALs2+mX(Dr4jcUMrV?hLuEI@aoE&SW%~5=_gD z=8b!ma^9_Uvj!suxP4wDa#(GyiBTtE0{gocq4H!8Fq0>y>op(-3N6C8Egq+_$?#&hN`j>*%;_ci=!SsFWJ^eDVtezoxcytp6o+E4rF(*3sM z{(H&)0DT%cnKnM>xBZ|k*p1g;t!qR3Kv{(!>3^+!2)EZC^WIPU=+uJx=l*^q{{X&? z9Ly|z@_%RxGT-s{`qYc@{>Q!F>t6yb_1hc? z{+IgJwZF&hCBF6VKU)0ej{fWZekK0^zKu*TuFC`VnSbxIRIW+-JO2O+=SaZ+08Ag9 zMBg4Xc5EBbkF9=qHl=nDf27O*0DYQbU){5R$IQR?*{Wd^^j;s1`XqfXe^FD9j~Wwo zy&ssb&MB_0*Zy9m{{X(tMJ4s(ul#*Y{{VfOn=+5mMfmIC{`bT5r%CbK!`px3_EY)80GIFO{xt)9Z}979{WyM~<6oM; z+PA0vUZ(#5zRe%nm!tk(rvCuG%|2k|KS!ka)8Xyk@%%qZEAxQr*PrwCH~sc#r_3%> z^b7WpS6u*Z{^75KJWr3en)}Q4mX=TWN5r??{`oCP&jpY2dkqMkkK zGTUhL4KbZ3gY4tH)}f8YRM)Tgqv3JCw2iD{$H)&%_OBE0-loM;Cob7{t>&l%?bwVxK@~aR3z;q3|3z{dc$MkkN81vw(`HFVQLy&x_rf7gI}?l zSL|)zoBJr3-&${B$ie>r3giAD{{X=~d^s+cJ>{W-XB{@F{{RZ~y~%thkIOjjkIDOZ z9_So)s`n^k`AvNf@r(9<)czgKD6!L|bPbH6_OA%k^t+pR7+gZpxau0B%y%$b6$33^ zk{os7s<7H?GRU@5S!z<TA{thEECV3)UKHOSqh6*vEwJAdU@sC&RB2zL_#3NPcF;a*=p)>ONjP zR)vn6JQK#qzjcA9D8_2c)|91AT*&(0Lymi09xX}aCN{3a!v6pe3o3}E+IwKvgXumf z)NV8wtcM_W&2?T4v%gkCQ;PF(7-da!L7wIt8!E4oqCTGR&%{ke(UqR|LY;OM2v z_HLK zu)Oe1uzkB@=jl+7hA>@p5&o6TYW_VIbB{7k{ZSEvy#%n=0nwK{%&bZ;DOJic*W#9_2xHV+ku|v;FwzF=i zW!SMGILfhK_f0__wLPStJhC!>b%`RLtZJRqVe`i{MotpCGMzqnxU=T(+A~~xtzeXi zh}?ea*UuV+%_A`?SL!S6{{R}>B=L5Lp}-1r~ioYp&hXu#$w~pOQLbbH`Dz208rT)-Z#791r^w0JUt-n}Sn{c)^Yo8$jtQIlJ=#Hr1BOTf29v$#$ zv58l1F+XSE^(xA5Q`y4d;Y&!8SuR)_*@bH=)O^CW zAwt7737vC>q=G#6r$_R(D8||s?`ECMTCrmw3a;nosT(Gk(4Mr18xXQ?YGnFa-7W|f zVMSYnh3YFVV`P&@n|O=i9j3BboGwqbd8WPlJlRLNl~niXUraNp?NqOAW4Ke42a47; zryZHWR|`&Bw0wJY@Lm{ZDiCAoMSU@M;k1%s-dOyot^Ttk&-#oHK>ehi%FjU!D`0Fj z;5uK7v};W`sJQv-(fp zc!^Vdu~$BUxcHCdllEzI>CI(J@fXCG3|84oFK?LCT35m|8piE$oPc?fy(hz747AgB z86=3~`^+oHl%Y-8YI~ETR%@Di97dnw9~N9N^I|{u(XPWq{?^n86UX}3U3Gb<>F986 z`1)5nb@4LWNMU1TW1h;MJwHjy@8+$58n6^4)nU0}y|^ z9dv#b@r0V!EfWm-t}5kC#a>5c986^{Se>1`VOyS+uWy32i4Df#$i-^d$eTu755}=a zqbGH8{{WY|o8{uAGsLP1kv}m}wd4bnG=KNds?_GVD*Dr%ep;nIMA+n0|A zul;gA*1tPXTKYLIW8xzx7j%6L-xi=-WmT61E;{>HC#if?)aIF%+UEncc`ozoT}Q+H zJ`Gb@5nMP#MiE@rJHE44vrxIx!{Sd9>dXS%lb>T4iw|fQd*S!JMi8cby+WA%MQF( zVwc(%hU39Nqz_SEKjN>BliRx_E?9dS^G#dgo!+>_NxShir93?9d}b9fl<6x$YJCf> zd{NRM+#_rs_p6e;__+Wl%9jJ{UI}-o8?3QBn$3HHs(G&X;UJGTS%p`j?mCyoDFU6d z{yN`xqI0YN6YwI3T=tt^NzLILgwP!m+M_ImomCJ@mNZj zS@Kj`p9X)!Bk)-bGKgxyh_qG>$`$JmG|`C8nBr2{HiAKUY9Q&{*^I;E>o1ZXXmWn z0(Gg5=aN?Ximu-S^%#1YKPvqRlfb%oZk&B7@O(Sb?mqYagIc8e88h?0{uKV9(=Yi{ zGkhG>3Z7*AtMqzLg*q3PyTALQsK3I^9nS-g_iD8!$L7V?!W;OqPX7SPt2e_N#g890 zewF%JH^I#kW#xbI(N$mIj)F%k{{W7PH5vJ;zYgx<&Ub!Q1l|a^Ud{Pe>2!Yr{5aoz zNB!_A(tgkW7-h*({{Vann=zl6AMl0TdN<`v{{VzW;d?jbU#5fhfAFfW%m@AODvjsB zp9re)jQ;?*D6k)z6L@lH&IoVHs>`Cu7f|1oev?c4J@{sCnNSh_;HqAx2|BeZ#d6aM(EIc?O={zMCBERc_tzgi)&k}-{~`BvBb zEw2c&fS`ZhLY*JP?+Z!L5Bumz^gDNxoi% zMO@stw~tNJnT)DP{{U*dN8&fcW2Wy!B=-4f-YW41sI1^k@>i00lUP|xJ zWE;xzS0cRX)n|Ly_HfjxN*cXN9v!%v`YqEdDfg{iNt!rD%#(`LgG@$W-6&D*O4?fw zy~wUFZBu)le3Wg<;(5-ot!g%C#wcW9Ju8m5_>HaGxdI8g`q$7SNwu2Xqd=_2urK@# zZ80QUujN}q6E(i49Pp5r!=dDM{{R&=rN{1O{Odv=j2hf)#4~<%+)dyZqvy#Vl|lai z2%HPYlE0O2^``n0^&K}k1M$OMkN#NA`P75d1~gZa!>wG$7*R6{Dt@g`vDmY$Z}YR${ItB_*H5ANg8u+WRgOL(`qw|9*qJ8B zy>`P%AKC54HOmTeiqV|zv~Q}pZtKRLba;6=$E{$eiiCG%xA9!QyKeTbeq-0xu-LPI z5&Y|#Rh{h7+^x&*s~+8@c=Fw2!_CcWr;V*7#_71Poqu|aF(ZC;Z6}v#9%FuW5}JJp zjn`A@J3kib%)HIbPdAAzB>w)YhusD5<`r;AI7Jd9TBN!d$;z2 z+;AHrtU+%aoV9rEzr;ggpq_E=nl9J|AhBfle#c9uVV^RAkeYb1A5B~tJ{A;~m`3IUP zu4wGeiEfWN@mJn8&97Q0uM%GqJ*ez5PrNErqKZXzC0^AubWu{khV%VdsWgAQv{7BS zZO%H!q~k>O~Y*l@CIW#)t6KU&loi%_FuXS3i29UbInIvuSK|UON8(kR1O2 jO8IwC{{WAvf8IaNiYwQ_{4DvL!{bFdD5AXs$v^+uw`G(z literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_1/media/cifar10_bird_1.png b/axolotl/tests/data/datasets/image_dataset_1/media/cifar10_bird_1.png new file mode 100644 index 0000000000000000000000000000000000000000..3b736445a441e56a87bd13db88b55fcb44afa4bd GIT binary patch literal 2276 zcmVV1+V~S6E((Iz}AAb))-^0eVzjVuvy$ZZq^h4o^3zRw$@^d zDW%4kO&Z1^l>}=M#sFaLv$Rc_&4IpA<^~jFym|E|MhNkJ-*FtJR2YVwbH{O})4Ag~ zjcBqgm0BVMH~cn$ZwEyPVT=I)0eHUex*lURU1eIEqk~(V^E_W0h4N}$6gi{RD2Xs) zjDrPZw6S0!Qvsg~E)gthCuiHjo zj0Qh&gpdS-HdfhZeS_z0ZX^K!O31SULDZ^*4CBt|dO|SnbUOe1*C$Hrs1*>Z!`Q=M z<4(MHcoamTsLS*7^K_LVLJ=Yez?yH72moP>t+5XtKI(M0in1n@AMEcIb;&$-w0qR; zb||;lqQLbJ_76QTq>MB{9fqxoizoBNqO2t#1Qx);i=+5oiHtGY>hWZPC^@`!^y_=? zYbieZ=pR3O`=`6Z-T7jkrmMP=l(!a(YL!e|t+0`W1$lh@U8%&s?|(60t`NdE9e8o5 zC?E?6Axs*jje&057KNQAmmMAxNypd8GRc}+t+Vyya&dZcI-Mm^6n^iuSEQ^(S-yJw zomXFZt-OCfUl#!Q8^^cSqQCt0&(>hAA&S~B9p0*wyj-SrmP%RYNUz~UVFvd`fiLt;ra~Xa5=u1%Qx*%j%{F8NfD(;vV2#?irk%jcAm zMuLbUUrX!@8;-UR=0t5JWBR z;;)ts&zBoHemJ270>bm|$4=4hRErhNM);6_@{mz|3 zc5Cav5hq*6*LMJV+=&l;;# z6GnkjW;$KCu1g4k7yOtagtRfD7FD{;sFuZA7gA{=8 zcsShQoUc|Z#&{S8v-xB&=pP)sG?^^ZH0||z8~fQvL_WCpez{&Rmy5~hI*cNTP>pCs zNR1$?sx)6)1Kn<$bLx6-7zSD?qmApjp65|YrBVb_DUA?<5NwR$oZpz-uWDYZ&hjg;#mkAlEjAcV%9m^*BH zXJCxUv&>pR2pNkgfN^{mGul`Ul(>!u7GWTug+j)FO0sl18}IJ!9v>h3e#jWpS}Uap zAzEu8Wh-iJ4Yt~y_PWTWl!OqBaGoaG0sx{KGow*06z4pSV?v0O5@BL2k{T3)IBs8$ z^6_{)7z_>%55v$eib4qCx=wFvyT7#^MxixUH6rvP#gs7?w-5onN;73N_2Z+aC$sTx$)jajdY3II4ozT?HMfFR|% zE<#8vErpm~U1w<$_?}cXAZe{x%yYeW@bXJLTd^}&?SC*!e{vK>d9QO|WdajUB8laq&xI)m-raM12|I*-o2K0Ci~ z85<1yN}JQuhvdBvez(&ri}dOBXq+Ugx~?}rURs;7s>b624g6tNs{JSzSZA~ z!jKW}2Z7XPGM!;!L!U*SgBZQMo>1=Y?G1g;J3oIyJx35*td@D6ms)Ft5Tz6oYQZ#( z%GZT4svWsVD^ZBWeD$(76*-(? zLdvqLYe5KIEms)ZAoTjZepQOI$B%1K9^bh`nS&_9giy{MW0X=_YmG6+7;nCRQc0<` zl3i*c5$QP2=WY(k7)RhU^-6*Ptb0LB;*vDOk101_fj&4@r% z2oV5@3;-e$kpV>_szaa05e%UrQMJYd>lWQyRr@g=`(9Mh*~&G9V4ZcrId9Q`RRjS= z(CNsCqzZ^M0ZauT;i2ybtE#FB z005|})Atiyh_AB&q+G>0Z^#f45uvdz&d7$YTNpzLP%Ecn-}n8GV;+uuoVDAR&2?-o zygm~Kd~W>-6#!8b1z^5B2{96p_nr`>j(McVj}JEZk00)qL%-YZoV8(jVywM-`*yip zuGVV{03t$!063AuqShEuHO3%f2wqiGbiP=WoD<^H)6>xG@9*wUPfwqoA5YIOV%sd2 z%hl?1yV<23X7za@{m%Wj)&uJyIrkT``vEV zwB2HHc5&fC@F6s_*2QS8gWaA1R6s=u2v}{?G&yJQy)kC;8Z+mdYc0dj6VYzBo5hBR zViU>GdVM)xEKXJ{-$duV3xSydpw!}h^xjYOPUB=|I}F1#%#_k@w{y-7!$3sV8s}`= zwjl%pC~53wU5OP90Z|oJfGRSX|5f|Ogx~>@h(wi$RMe)F5OKTR645x0>-9Rucy@Ls z!n4^-M40P-yCb5Uvl?U-n3PS(!p_+-C1Y$Um5f0$000Q8YK*DOuPW-gF2)D|^Z9%n zM{8|L6#zvPfLPczVJtcNMnKV!ur%J!yLNTD+H7`X8u!}+8eFfhm8ue|Dj4F*H2F1z zKtupArOGt<&jK^X#28Z=4O(EZ#v0O_>#Hx{-KJyz!(YEUU;0%?mLT}XxoC_*0BbD( z*#DFN|Gol%h+6x4g8hDP4LV25<+5%4#l=b2%=*LOe6?_O^mV9V*3RZ*J{mGC;=NZD z``NA&YZJ^X5~8Y1Y2m#O-d$a;=8LXrTki;jRb+i})`j2R|MMT?K4GXKwnI9Sp;C(X zfm!TC(ZmV9rXCTI!jw>I2nq0H(f#JvU*6tc#wHqTNMY#HkP{(2ZTJ6r+8+<)=9Gg$ z5jF^!8IV-fI_IplpIc8blZTD5px~UjzCM5d_3h>5dD}MLTBwr7(OPQ&745bkcYW&G zu$(F8C#Q)Jg&7b4z*=inb-ILeZt_U26%h#_I66O@-`>7CIa&B%iAYL4zHCRXaXuHo z!*Tfe!%s3CR;zFl_=j))xVX46rV-JJd0R?BL~AWGPrGT-5+NYw_wV1nxjOH<84^i_ z!ecFERPjw5Q#$m+{m&2M=J}*E1_n-x)gp!nsi1<22q4;0sq%iUt%_)c{&m5Yd$K zG-nne0|J^$?vF+l##B^rsA8KiSXW{&4;7ONonS z-U6VAAd&slmkCi7Sk;nL8M~&rI6IfhyJ0H={h@z%`)2=izx&VM<*?bS-zjLVg<%-$ z_Bb3yU0hs4yy#Pk9RL8cAmOXgR0OS6RZsw~o9D-)@$=|Pvb)W8zaL_p%?YZ^ot5kwdW?XV?(a*k%A<93 zclWU057EazTh-(K-DL+y)7LWQa_q-cb#?JZZHV21rItFPii!XtS0(~02&g(O2vq>! z{&3JLg!=r;FT+@Ve%dMv!k!SnSf5m5pZCW;Yd_@Jt>(++>hkT!k01ME??d2P5J*{w zNVqyZcEPzx?I9Aq|)7 z%jN213Q871L}o_xR#gQQg;BXb?kktJotNVP2D~!_=#a7s0Ep`2_Aq8Atf%XXvC4Pf z|Ly7VkxLdUk zR;guFgG2_cbZooa6q+GetG;zxo;g9BDBUD zLxj%9tE($6JoH0|(K%;~X`84jH#awF%wK=~tG@5M`8?O6s@9s%>4^yc1z49F7-%X@ Q!~g&Q07*qoM6N<$f=zJ8B>(^b literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_1/media/mnist_0_2.png b/axolotl/tests/data/datasets/image_dataset_1/media/mnist_0_2.png new file mode 100644 index 0000000000000000000000000000000000000000..ee5c090c403288c3ffc6ed7e28b059c462598bda GIT binary patch literal 289 zcmV++0p9+JP)V{+x}@v-E8wl-ur+-Y!4xsrVdk40UBMj1 zj*nn_0)n<)*^(z(B=>uyp}25hl}Idl2};f*xC0O6lFvL6TgB>`Lz$yARNeufEOM-k zFh&}ZICv+;Gd39&&9|-jSPA#z$d7s5H(}lsl!r8Yz_I&H+nFO6szzk^&p(%wWs(-z n(Gw5jlx{M*OL%52mVOy`a05AWEP)U=ng{RyG2`3=9kmPbLL^zIc#}wQe}8SmA|+9D_wV1oe^2^hN{KsN{rmUt-(!K8fh4s0_TRsM z{~iwD#E>#y^XTv2zkk29a-)i~FfcGM1O_rNT;Ke+=L0rZQXc>SMHgOxYfJz;00000 LNkvXXu0mjfX33L> literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00001.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00001.png new file mode 100644 index 0000000000000000000000000000000000000000..5e05ec1c660edbc2a57b9ea70610ab7b67327c7f GIT binary patch literal 312 zcmV-80muG{P)mpt}&AA2D6$1mqqm9>2 zFfcGMq>0{RzzJ6V>x40xut;yfC&_?El2H|pM}{^5*wtc@FGyHcx*4j>DyK_TRsIamk7N{r%br zhn&LMzrX8nY0UWjcLzTXxquJ94~pY5^zYxl^KltE{rB(R>Nw;K?*9J02A9THzkeU$ z!6o_k_wPtta`XTH{VR{l#wUNj_uvR{28Z8&@1Toe#wRw(8^`b|qbdLZXP7%gEMun; P00000NkvXXu0mjf_s?aZ literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00004.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00004.png new file mode 100644 index 0000000000000000000000000000000000000000..bd8a5cb91baef5e043add84e1ad13d51ba18a3cb GIT binary patch literal 269 zcmV+o0rLKdP)u0EZ(^4mTPX-nJ8w zIXVvbN&3YVC1x4Yjo*mq7CXgK5o5n_;dZG<(hf;AmSH87s7*1He*4uP)U86^er{CS ToK*_l00000NkvXXu0mjf>B4Tn literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00005.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00005.png new file mode 100644 index 0000000000000000000000000000000000000000..e6bcd6142531d44f53366ceb12037c1167b86744 GIT binary patch literal 328 zcmV-O0k{5%P)iV;8p|F40000cJdy(XcqF;x7#K`(x~TT=pFcUcBpJ{o znNTE|7#WedEGUxy{^N8I1|UI_iILF`r_#TF{`}FxrE>-Y149}v$s71wz;NsD-~Y-u ubTVB1|NkE@V;N?l@X#c$UB_!H9RL95A|tKJqgg5d0000n6<&|gpz1|lNy10-$2Vi74Z zFi6^=B(p_V(kPskUc~!lV d{UzQX;}gl4a(M~kWUBxG002ovPDHLkV1iDvkl_FT literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00008.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00008.png new file mode 100644 index 0000000000000000000000000000000000000000..10e4d0897969521190e8db3d107dbb983edcf8f9 GIT binary patch literal 172 zcmV;d08{^oP)4l&Q51rNpdrW_+Ac`W5=MVQixD(LOJhrmZqm}`;?ff0(%u%KG!)qGkcx{A zw;>1`yySgPx1pu(@vPyuo$tp39|sm@+8uzzT&ov{&h9e!C_C{%_x7@Mw*qkZ*59`A z#q0p|qWz!upM$1R1}JQNW$f@G?5)Dmjh<4L7V}n`}gmk6IMfnUi|y_@87?7ZdheAzWjUe@87>(><&2p z@6x}2|CF#xhMfEN@83U79B!Am`0wAp)o?BgvRt&D4g&+jQEb{YuKoJ=55-OS$bv9` lmay|M7(_CB#2(Xh1ONnAVP#++TN(fW002ovPDHLkV1hyViKYMm literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00011.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00011.png new file mode 100644 index 0000000000000000000000000000000000000000..6b0aae9d17a8970982b65dfd8ec031d7b57fcc5c GIT binary patch literal 212 zcmV;_04x8AP)VX8on$Wn O0000DI%$JsWukN6mBKm9beQfWcij~T2C0x>jCMZ zB&j^uos{PCKb=n6apJ)kyMf(C5tiY3+>Y9Io*-#wsRMXf?Xa?mH=veowZdFb=6qww z0;lr#UtM!O7TiBNyZ;Vw(qvDSIR#j=L$wruJxS7Uzwb$^m@vM%7!1wBl7D9G_Z8`q zB=v3sMf0;G7>P>Jtw;E%PMdHA@LbmesLs^INF0Xee$7sXkCAX_Ur1HvbtSia*`VW} Z=@ZHTf7}X`SZ@FT002ovPDHLkV1h&RjP(Ej literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00013.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00013.png new file mode 100644 index 0000000000000000000000000000000000000000..3ac16af5d1ce35ce78e9cd9f44243c3f7e8da1d0 GIT binary patch literal 306 zcmV-20nPr2P)LBzC|#+VrNh4C;>g=PPP71>i1n?b{h%~hKwVT>&g(_A%eWxSzm zVHTeYTF&t^2*UfeudR5}<^H(mo&(HHUi9K*H*c&vP^v9HuR1In)Y-1%&?j~1%pw(C zVXoL*bFrt-emc2X_?wE`=Hbf$SMtF75I}hyr?%sY5)KL07*qoM6N<$ Eg8JKwyZ`_I literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00014.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00014.png new file mode 100644 index 0000000000000000000000000000000000000000..7642af9fe572db420f4cde763e30c115e506c0ab GIT binary patch literal 165 zcmV;W09yZvP)>XJh+{wa6ALIB-H=_%4!pF TXCE_f00000NkvXXu0mjfSaUzs literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00015.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00015.png new file mode 100644 index 0000000000000000000000000000000000000000..df66f30485d379feae37addfd01d8217b6c4caf4 GIT binary patch literal 286 zcmV+(0pb3MP)E(c;t>2pn7+YK$NnUBsY}M!}&)3d+h) zpg|b*zvLpt`*NQz;F<2_cOUL^q@_tLa&IK^`+6*RUY6?DKLFpu2(0(U<0-BWzHq)$ literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00016.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00016.png new file mode 100644 index 0000000000000000000000000000000000000000..d1fa66a2b3f701fcd16c4a450902650c5be23ee6 GIT binary patch literal 307 zcmV-30nGl1P)m?a4)J4?64~(g?UpT{|6Ri-dFD;K5Cen8r$2R=a;|@uGcYhPFf{$U4;N#C zF<9@cU|?WiU|7Sz!H1!fg9}2d|NEDLq4UQ#2z~DwgDQq34E+9$U}H$Kb1;0ybS?wj zP74MG5$76p3@LWyq<{Y|%sv0`-^T?E42&>gPBCM1Ck9R}1_p+|PZ$<#eT(oeXXmwl z|Ni~^@Z{IOf3u9xjNAF|`{qf`HZlx1{@vk0x9k5sLk0#02A0k*FVOWdF#J2m!oa}5 zvH9P32TTv#_&0B3Q^M6h#|^Q0ulf7WpFdZ7V3Qg|005qRS+IPKP4EB!002ovPDHLk FV1m`dh|T~2 literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00017.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00017.png new file mode 100644 index 0000000000000000000000000000000000000000..b4dfbe4ad8861c2784e155813d6a718ff2e6378d GIT binary patch literal 304 zcmV-00nh%4P)C{rHa5v$Aq;HyLwLi#d0@(E{|jKSn9jh!F!>;+ zr1L*b23}zX1_rIaFd-Hs=_d@IAruTCf|8LD%CP^8Df$0Dgl5fHjA^Ij(@X{i1_stS zS4FURCHq?@pJDvXi_%!+7#Om@{r&r)i4CjV-~a&ctTdXYn|~Pq0000Uqcj9)}O#bWG>KPOK9`4fWOu~)zT`TOUS7IsMni;%y6 z7T}O$V9@>hM-7R~gd(ZK#K?eC^83F7zi^qVx%mYSw=yvBKm1dS(>XK$EWs(~^7XSn zE;)s_UxRSTaZdTO5~rbAe~+-^k~4eKD1%cYRsaA9)I%@c*pLkX00001CI~g2Zk4Q;5W~{qsi!NhbqCy+1-y9KnQ0^2|4Z3Gfs# zFoYpV-e+LoYZQbEsG6a=kGb~GpFb&3j?^uLoeT^N3=I4?{`@&F3}GAoK}fPNFfcHD zJgUN=EBls#fi0aO6iE}rOUL6G7%VSXS@F~`FfcIOc#mrBlD~id{{8#+KZHs~@d`ul z|NsC0|Ns9FLRp{~3j=rlZ~a)ufDR1*{Q2|hOeB;w8T-ht`5aAgk#F?1u$_#=+u zAO!gEf`NgdjRjp83e2DW`SXVxha`jPi$8ztaY-^noomLaljZ;b*oI&`3__5+}o=!NI{LFruNUr9q)T(9kH?^9Lkcg2tBS{z6*~2@Y+y+frLj)*vQM z4v9fvNbvFI=dr= zErq17f1tIzb_pl|9#k|@zwE-yhGS!9Qy3H9DP#amHlX93`a`C{HuO^?gWO{b4k)Jp zHVXg{xd)-@`>AM| Z`3C9ucqvSd`S}0<002ovPDHLkV1g3qj-3Di literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00022.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00022.png new file mode 100644 index 0000000000000000000000000000000000000000..4301a7dee082f737919d220bebcb2c8fd57f2c39 GIT binary patch literal 255 zcmV6FvJ~P?)PXf2X;vY2F8g$O>xOFIQ(tGDaY{aF%M2T zlV2G)qaDbWA`002ovPDHLk FV1i*0Yhm;e5K?~2Pg|Nj1M z#U3FHd*$!1sW{|RuKoSn ziOWLozkmN)qY5#>Swaj92d?4L`QY#05L|NFum9FDqKUzcwcus=`5%`~hKH{WamrB_ Z005z%KDSH0(N_Qf002ovPDHLkV1f}qX`lcA literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00024.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00024.png new file mode 100644 index 0000000000000000000000000000000000000000..65b7f2f4e572e96d3a8fcf1635dc60658bc7eb90 GIT binary patch literal 261 zcmV+g0s8)lP)oGh2k=G(mxbQ6k}79al16} z$G@&`w%b@VY2Ntv@BK_w76oiNuj{Kae0s#dz`(%3jni7;XaCmW za4iGF+xrX$a7r>TFg(X2`TrGGNmVm5U8(*DqN4JP{7^NpY4(2k_wUD>fB*iyeE+Wt z#Vg_y!WkGCp8vmkk%57MfngDf#=?L9{%v-^5zPu0wo6hud(1o2}@BML$ytYCyBY8N|ut;A}plx9mSC4Lm?ENmrzKu{0~RuV-m1dIFt z@v+8E#7bC;;6V$MxWQNJRF`|WGXq!pXRUbqpO@aeg$&G{y#rt>iC8zv`c;0ko#}!; zz@st7gaAcJQg54{>bab8UDw_CotPMzPJK^#;BD1fxPPw!tXbJ&72sja#tKbx^>HhxG{rD8l9x>RfJ8+pfZTEFV)I4`Hls@y2J dvseBreF29el6>5*%$)!L002ovPDHLkV1g8UjBWq` literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00026.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00026.png new file mode 100644 index 0000000000000000000000000000000000000000..62a4035fd4134988533601da551ddf5c282c7460 GIT binary patch literal 263 zcmV+i0r>ujP)&u2f=RVEFf8zbmGs z;JSbRS1vsK|6d^%*D^3itvz?{{y$$9Y?2HN3=9mh|HQGl77CPbYGYtvKXZu_r=-N+ zHrzUs{~9Cl(Cjo~IEz!G{=dV>TvVNc|IgsIvg;KuZk-Ibu%rTV005XKGL}6m_yzy~ N002ovPDHLkV1nY(X^Q{= literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00027.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00027.png new file mode 100644 index 0000000000000000000000000000000000000000..81a6d59d6bc873aab83ae4bfd46c57604236c062 GIT binary patch literal 337 zcmV-X0j~auP)M%YZ2Hj|Cr^oWIQ*x8%d`422evY5h# z;OqjB^vE;7seJ3ZN)v{&U|Ci_`915lAxU~E8RT-X8M>CF=3FFh=U}b&AW6U%&PR(m2D jE0UzGayj)&6Mu;>%r=FM3hJ=W(B8+er>nlD61_p-p-xwI?o%sI~Rb#++qF0kBRdLAq{`_~x z6NjAlr+*J{>GS^m?=~*`d_VpBr-4IG_Ts-6!5EqdY2Aw>Hl76@(3j+hg zt)DHcAUs(+28M??9H4mm@85O2Sf#l9Ui|(0R~y5%3=9km3|++x3=9m1zc8Fx@Bl;S zI=G8hGGpk}5@ujv5R08J-^q5&@edYztq;gCFc_WwtA$0*;;=+nAm;A*hIpOc$&-d^D`ujHlizNFDh+k$fqkDycfq{XICqtNl h;nb@CSac2o004*wSn~#PD002ovPDHLkV1l_*c&h*a literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00031.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00031.png new file mode 100644 index 0000000000000000000000000000000000000000..1d2a89c49be77f2b2769bc7354534adcfdef86ac GIT binary patch literal 299 zcmV+`0o4A9P)NdHU=Q?JySyWeQ4IV|zySF&0+gIzNF=P0F=dQfmtvORiK`R_mZD#D`hF6-BzUWV7btFm4QF+=pc69ml x*I9)3>o>SrGFGO00DK;pfj0Yqm$lQ*Ti;PoaC@vgN%jB$002ovPDHLkV1g|RfL{Or literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00032.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00032.png new file mode 100644 index 0000000000000000000000000000000000000000..0ae3434b1ffa6dca045d5b7bd0f59e50d709cfcb GIT binary patch literal 262 zcmV+h0r~!kP)iY9_&Y>C(t+@{|6Cyi51=ujP)fHGE{gWGi|NdReg~dJ6C;tBZ`}gnPG^`$BtKsju8$i=9oWU zvT*4$dh{_Dmz>7yznu~|0T^S$%|x<*cB@4hpTZ9yk}znMLOFZh z*8t;Q?|o?oPdm?f{+zQ1{x^=jnYGu5LzAStlYn$Eo|zJqA*ATY?IWXy|1mh%1f|0j3fOoS|*6nG< zIS*XyIR@FANBaP5pYKw8$s2-a=v~5B&Q2^-nVY4n44UTcs&C#N13_Xf9-?wmTesAAw2gCP^U$psi`Ox=#E{`~Yq7TzWm4g_-~W002ovPDHLkV1k+gf0qCN literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00035.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00035.png new file mode 100644 index 0000000000000000000000000000000000000000..7e76e156c02adcca9c5f0f49df3be224065a8556 GIT binary patch literal 191 zcmV;w06_nVP)ZjNQkHXQy+4+U_Zn0he-o-0jC zQq+~9OJ0`@^mN+=sfse<(+I9SY^QWZOD`+RLaop>kiVX0xBD&(6oAyy7J#Nc{XVr@ t<}7^g)umO|RC?-fg0F2z-nYvg<6e@+I28JW03-ka002ovPDHLkV1kfcM-Ko1 literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00036.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00036.png new file mode 100644 index 0000000000000000000000000000000000000000..8361b7a83f1545d1a3a833491c4b9489593a8a2a GIT binary patch literal 271 zcmV+q0r38bP)O?9AMCCbFYZxqr?~kSa|a`^ VXR#cmpnm`W002ovPDHLkV1k>~aG3xA literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00037.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00037.png new file mode 100644 index 0000000000000000000000000000000000000000..be26c274358c0ccd4a08225f12ca9ab02eba1716 GIT binary patch literal 318 zcmV-E0m1%>P)mbk9nob&9jTEw)7-0hr?e2V>Lu{LyOx zsC#=B%f@up-vPiF(>e5}Hc0IO{ju!@|F=s3l99ZrKKpnJV6_Cy?C;=aIauU{e4Il6 z@Rh)DF1gdJDJLC=h6X8;6nkqC-skw$U5O0I9YykLHzokn4`Q_v9{m678zuB~Px}4# Q+W-In07*qoM6N<$f>D=^NdN!< literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00038.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00038.png new file mode 100644 index 0000000000000000000000000000000000000000..c80501c30181cac30efae32d4711e7206489c53e GIT binary patch literal 255 zcmVl&tuN02_qSbgq~E3FR}JG5Bsp)yc&QW3e3k z*NxK+{Qv%$!Z}R%T#GIlfXrb*lN4llbeqAOgMoozEt;ezhfpbl%tHnq#xLjBA?f6{ zS96ipU}s=pVEF%Z*S33ik@SeXgoMDufB#TyWnf@nVBov;uf14ISXl1qKVb|>2Crxi z1_lNOhV$4YVE{!E-JhQr8E{Lk|Bpu!FA%b3CPg_}007|BJQV~WMT7tV002ovPDHLk FV1gJaYmfi{ literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00039.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00039.png new file mode 100644 index 0000000000000000000000000000000000000000..7b4dbf24ee1b33799f1d84bda523b79519501e13 GIT binary patch literal 282 zcmV+#0pJ!s7zCoxO5ht{(v84Ixxqj&8W;%@$#RXl>7X#0#pz3M zV>qcAF6BQBj&fgLBA)u&b53}q|Be@ZcU)E)LF~dC*t8sWynqXb6&66n(a)%b&_f3iZ*PS(5=Yr1Iyg zrbnwbfP3Zd=&79;KZX7a$w?KW%fuX6y#rXk18{Dc!e@Td&Df~$>J`Rkxr0AUQpq^K gBI)+QbK`#U1JC+pkBU8@LjV8(07*qoM6N<$f~ACd{r~^~ literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00040.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00040.png new file mode 100644 index 0000000000000000000000000000000000000000..6cf9b8989a1675ce9f4b866e340e569ecfcb266f GIT binary patch literal 171 zcmV;c095~pP)7P)?;&nU$xDy$ zIfwzLB%k>k+)AVW`Qp|Y!tfZkB*UlIxQ)Gj54T+L{~0KJ6k~-M*5KBO4gheCF1AUV R91j2h002ovPDHLkV1i5mUL literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00043.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00043.png new file mode 100644 index 0000000000000000000000000000000000000000..14d71a3dc959f94451263e479ab4f14906bee906 GIT binary patch literal 233 zcmVD*Nj8n10>v(8M+YYBamz-HynybFVm<@S7lGf~$GIPU>eQGXTJ3GHT9 z@ZCs=JuxxWq5045#N;8k-Eo~jtLwhrJtB`W4$CfXY>RV?Ix002^M`-7P literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00044.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00044.png new file mode 100644 index 0000000000000000000000000000000000000000..2f93cb48690dac6cf18d1cddb0c84bb5202d4f72 GIT binary patch literal 225 zcmV<703QE|P)RF^8dJ^bzkeX`{%K7Noeb&fwhRmm3=G`r=S^^0WBg|kZa4h@8-!D? z`fn3Xxt71DMR4o;gHx{X?*m&D0Tv{-`v3o$n%Cc4$Ewq=@AT(?fBzxe%ZMtkA+KQ@ zaNZQBi#`6H#wo`z<4+(?xwL;*amtCF#uG+s|1QQUxA^aB95RJ#TK@Lo3P0l))7@~I bNg4nEQ;j}Wvcy%o00000NkvXXu0mjf=I3GT literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00045.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00045.png new file mode 100644 index 0000000000000000000000000000000000000000..471cf527870e403803bf6bf6b9f4b4fb6d374989 GIT binary patch literal 284 zcmV+%0ptFOP)+yrlHI6ocZ%=h7JRR z!NWgW1kmM{{CQ!^z`($^`OhC4bd838{`fF3Fz6irgGDm(&l3d(hUDj8?)*8;0~2I{ z(g6&w9T{?EICW|jj(tJbY4hjLpFe+2h;bi#u8H9hspbcN{`7G(1pPUKAqNAzfB*L4 zlnnZFM+K+kg+B>MJSG%53EkiCaB8gldmN{n!i_IdI3>^j`G`wO@x;$RF}Nf{|NQC0 iX=v!56Wll@F#!O&cv_!@7rVm%0000A_|=WI*WY7TsGxpYS1oi+tSxW<{rmUt-{*i! zKa|lW7yrJTEv{1W;ak$Jesnqg-^&;n7#J7?wD(?-#Bj>5YkUk03=CXTekEf%wDaGq zbT$TtSr0?8_<~vI@V^}rjju5r0|luc{{3^pA;-Y5=HI^@98yfP{wD4Fmw{a}`d>2x z@0EuR*yKds{gY*2F#mE`1dC+UzuRmK3=Ap%@(~unS*i>(e=#sHZ2rZMMUsKxGy?+z zgA@w`7RkTA?lCYhsFg82$6}}K%ge>Z?f-c(0000$+;BSR``lkvICU~GoIQ^)7sH=>^hIz> zGBD`jlT^hg$$(AL_0F+0R7n;D)8)UwH0i~X*bPx))j9s}*WcG7*rgemN7=+IjoGb+eq3x_cKq=h};wXrVxQK&63JQ`2XNOMh&|PqF(@j+L2XybCqhOU# zrGsDtaVgx94nd>om(8I!ecW;H_wc^JB+GI?C50tJwCDLENxgaX#UHmg4FGFC8HFSr z06+%7`YZrEH5I^rIBo;z-Y3TvY|B|XC&lN5bfD9t@n~+}7s<{+*3i2k#p_BTb~5oZ z4*(y%e(X&B0@}vP27=ADVJZONk`vkY%NB=}MA^B2Kl*Znu*uoq|M&0TzkmNO!bO-6l3&j^CkhD( z_l`k;AI;cGUIvC~P$3s*3dRhiGktsRAiB| zzklCdHxv;Oak~ET@7I?OD9VGLLZj;6-@hNS(GA;xkj%kwuf}_*{GU7@MU;Y&^FfcHLV#u-Vd&uyACocm714BHz9NVgYM;-oIGhmbCZ25Oa zyx^Y<0|NsCgD;xC<^OJ2Nql-JfMgAv2;G_VxdNRSXOa3=F(&TP#uJ7XSMD_wV07_6!UR0>}UUEk%+tS@GxZRRtcyTxJFa zhPi+LZf8T1T>Up)mm>bQ|Iiz zn^5>DI^Qr%V!^4?|L@;sWIl?@8;gF<#i7T|qEwCdB{%002ovPDHLkV1n7K Bl*<4B literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00055.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00055.png new file mode 100644 index 0000000000000000000000000000000000000000..66983ceeec0e5015cc8a15bd13c1820f63d6c07a GIT binary patch literal 313 zcmV-90mlA`P)LiZ~m_5W7zfg@82W5 znEr78`*)qu@xOoHdSUS-XUwa=e{sk$FfjQ3{rmTgH+DJhg@6D4osJ~Hf?#q*#V~yM z!;W2J^WVQ~)pqT~Dp&jVZxJ5@c1gQ;e>d?EQ_ literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00056.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00056.png new file mode 100644 index 0000000000000000000000000000000000000000..3ef6d460047b31138f339281093ddc26006eb751 GIT binary patch literal 326 zcmV-M0lEH(P)d0&KRg_7vMCIv>Z>aPn8b9=f`mP#k1$wD1Wi_lz&;gqmr86z007xT4sM( Y-^9|7IP9$C`v3p{07*qoM6N<$f}MSlC;$Ke literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00057.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00057.png new file mode 100644 index 0000000000000000000000000000000000000000..615240ae675699fecd3b33e1c0bedc5b57d50714 GIT binary patch literal 253 zcmV12CF5^EeRXY?Zz4>i5!t&6D(!{(HS(D%0`2>>{A>ms11*lB_)r}?BJ zm+go%XRuS5`PFZgx^?E#Q3g(I==M9BKct_zGia+1=L$@{Gsv=%00000NkvXXu0mjf DcDimQ literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00058.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00058.png new file mode 100644 index 0000000000000000000000000000000000000000..4459f1abd070d7189002c359d667fd553e383417 GIT binary patch literal 236 zcmV}97&zytUeqlB0000{`Z{%ob&JRMud!nq2Z;!e{V1_FfcGOFfcIu`~PgoR0akH!w3cj25nsi1_p+Y zLNLW^|DuDBMhK&pA&DPoY<&E8DFfVC23C$t1qOzrkB#RrF#P(-aNI0;0S;d?=sraW wAyjbr5tM;VlHt{vJ-AF|V33f(EjOS50Hla%5by>d?EnA(07*qoM6N<$f^)=x%>V!Z literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00061.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00061.png new file mode 100644 index 0000000000000000000000000000000000000000..4befbf492afd74984cf76450f9e72b998910cf94 GIT binary patch literal 254 zcmV>M_B4_aVoh-wqfAjgV$O$a?x1B-t z-Fs~M7$*FCZOZWYUpZC_|GX||@cXx(35#6t^M9KvTEF}=WH5-rkX-ri-@kwV{{8>5 z=S3k*j1k6=bz)FRm3ndgKf{x*YgkOyeg9qyn`;;t7#JAqe;4ACV+j6t6sH{Lx_1^h zC2RjJ!YRja;kyxT$v6ENLh!)iWa7Xr$$g&}x2Yrp0IT9dwnAEUssI2007*qoM6N<$ Ef&;sC82|tP literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00062.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00062.png new file mode 100644 index 0000000000000000000000000000000000000000..2dcf216236e8eb776ba5be3635ca50a47aea5c4f GIT binary patch literal 280 zcmV+z0q6dSP)>dwuNH=?n+&_cj9{^t~~fZqm4fc zkH=D6sypyWH7RqY;MwtZl$4_9(6QTIKs~?TfZ?uvp{W literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00063.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00063.png new file mode 100644 index 0000000000000000000000000000000000000000..7944c5cb27fea752a2f90b326970183927c661ee GIT binary patch literal 345 zcmV-f0jB8cVWXm0*w}=SDwQC50kM$4HiFlSads^>uA3h_3r}&FnP)zjIq<(L*ET!*#cFLys!gNy=YdJmXJdZCw%#R^ zBvWGoTS~AyZs%rZQQUf~V>k@O?Q+!eCiMrVpjkYY{Ef7&bxi%8s;LRCq;fjfk!c?) zWd~g8b}5C2W|0+#8`sP#0J8w>d0`la)$gle7)C$e_a*fdTr2>Kr8fmlCrLV4xG#HG znF4V1w2XHWB+2v(@*SDy#On-k{?!K{cfAdBBLi@y#})u>>7kyS&j4)RW#J_PRzA81 r{^;n~z4;z%oyjW2cv>k;!N0CAFuwKI?ukHHW&if0$q7IIUjbo>pZ%AQD#y0s|3`fYGxXm-6;#QPfB&2zERGZZ zcJQLeF|YY|7wWf)f8PvHH7@%1P7cbP`|k>>T*=>mnNViIpMQ<0lE?n7VPs%nP|2S3 z^Y6b)FdJE5(ziJM*%M7T85kHCc6~m@B`c~NL+77=|Nht6)}I*_y}m*Qh!VBq}yPZC2C22%bVLYT^gCK<`_00674 VT)z#-3Wxvz002ovPDHLkV1mKGmNftX literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00065.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00065.png new file mode 100644 index 0000000000000000000000000000000000000000..68099ee6731db437ae9c4c9c1000d204e1508952 GIT binary patch literal 235 zcmVgR9NN}K`g#n^SforluKfM?_wV05=*HR$7Tf%1cyOKZ|E0BOQ1uAh z|MmCpzimQXn8q=;KKc9o#&lN!Z0@uA`|ocF3l7JtJ^uIiaX1c12E}bZ|DDDq$-vIQREj lPmTZ%m$IpFJ*z3**MFCOq0q2l!Cl?rCk9WOgA6ufC3qOltYBb}@xdl3!ocvFfq{YNGd5#y{QGLmz`&p!fL$^Tmvb1* z-~YoE9}Eo3{#_EsDS7`-22MGK`zvwFQ566H`-L(aC3c#J00000NkvXXu0mjfmkDT2 literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00068.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00068.png new file mode 100644 index 0000000000000000000000000000000000000000..ec1ad06372ed071efcb089e72411b47e828f1ef4 GIT binary patch literal 220 zcmV<203-j2P)!}^8sS+7T1Ofg54b!wt{!zxd3+|B1pX|mQu%CE8 WhFE{#y&(4h0000_XC@D!Dm4On|%Hy#(-0=cA;D4w$5h~kQ;GDd&z7}l|G&Pl77*1P)AAWw# zv)u{Bu;+na-Jr@X$__}2`efJLHhM18D9bv(&h`h5!Hn07*qoM6N<$ Ef-y&j>Hq)$ literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00070.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00070.png new file mode 100644 index 0000000000000000000000000000000000000000..4948dae5c03770d8350e1352e23a5abc03ffdf5c GIT binary patch literal 254 zcmVR&QxV3f8zkk<)F{QZLj{f`m_wV1of9F61Sm07JMkUq^41e!4a7tt7YJc_j@87@Q zd>FWr{{C%6k>i~G_wU~a$J$&O82tYJJ-~%3*|R0WMuClifx-UO-^Cg@+{?N5?_wSt za@=?SUgp9f$Kd$)oi#2w>z|)2aLGCR{Aq$qj_bwWg}CHQ5B=SQOO9dL-xr3s!2xA;^50000ISh|kz{I5mpB{-=slvh&|#0i2R&|L;WMquTlZ2p&l^z=F(K e#E#ps^a21k7#cU$HjKXj0000roQAgl z-6euc&idJ37X}6f5esEZIj%i_w+RW_FS_{mu>ppp``_zAf}8*T{rmUt6m&Vsmw&Sv z=KuYBup{*ghNS!7O#-1Go_BCD`20YZ2_Wh{ER z85kH$|Ni--i_1~Z|NmW!#AQJ+In`o$|NY;|`5l|KrayoF{QdLC7n_{jn?HXXF8}#+ zh!cyX&*i(?Oksci{As}DB8HT|f2ZJ-)c^D6j~6aUrB{Fc9EWk3&?N5_FfhEvCCQ+` zz;FiB!0=fN3=9k^_x^syVq4c6KTa{r+kal$V3Yjw=g9THIJ}t%iO*DQ0mQ(2`Olvd foepf+tm5res7A z0|UbY98P`q_wV0h8XN!qJ&Yk`G3WQ+z1#R1!an@HB!ez__3z&~%nS?+3}-Q2Tlwd2 zJ2L|V0|SE|ToUFWnP|q}6aF$VFfcIufop(CuGD<%_Y#LgxljGw4P#sVy(Enynf>># z29#;~815C65Mp3pV9;MD^{O1ZBy00}saGM#PzCwU{C#xK)5>bq-@lKoFkEyL8d-ne zVahQmZvOc9@87?_O8Bt)#(VSM>5*YrrKk)5Rbg$KhyS_l00000NkvXXu0mjfSWt^) literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00076.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00076.png new file mode 100644 index 0000000000000000000000000000000000000000..0e51f475f720c7f75a3a64f338bd447052bf868e GIT binary patch literal 301 zcmV+|0n+}7P)`)7+oj%Dt@ ze+T$+$o2gDcUl&Q#_WIp{#E0U6FT$nNH{ZgIpr(={#4>}Y0bX_skr2PKm9DjCC700 z-xG8pCOGRkgIEokBqGXmFESk4aAQlsfeQ=*DGUq@J;=(qXZ`#4@71Bdw+|gU{qN)3 zw`eY!d+Z+stevZdW-J2(gQ6~k-v1e=vuO?hfOT8#EyRmi00000NkvXXu0mjfI+%sT literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00077.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00077.png new file mode 100644 index 0000000000000000000000000000000000000000..e15b89556ef713651617b02d1cc7c6e6aca8c5e2 GIT binary patch literal 196 zcmV;#06YJQP)oBN{(Vvoha926nT$)${>|^-?kFNit_|mBxcCT{PLz;C y2mk*r!6}#d_qQ^d5W-GxO!Z8dLPSc^2mk=l#xo{+1}caE0000NklN||m_m>X`E?G3P6(C; ztBIut6f1&T7)n-zLSwu2DueAwaiP%DsNDuvI1(xDpg0m+!%>oLq}bH#!k^{|0P{at TE=o}f00000NkvXXu0mjfUqV*m literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00079.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00079.png new file mode 100644 index 0000000000000000000000000000000000000000..b71c8d51844a86fff2f840421127ad4a0decb291 GIT binary patch literal 265 zcmV+k0rvihP)h}nZ72Wy`TOV3pT9ona*P!ai9dh-{8dJkvd?|- z=g;3i>-zOy{P{BhRnGJ6pFe*tZRz4-c=_kgB@w773xxXr|8~op8xI*6L`DAp>Ux7_ zqaD<_=l))b!R5(qe=msOlFR(`2dA9>r$5he$uaEz^IQ*yl)(P~_cU?o^ZWC+3zyuS z7k}1s;gGX``{$1x4mrlZe{bS4lcDy{b9Y>F(s%!0#33B0{@IH)Y2X0>9-3a0!G!)8 P00000NkvXXu0mjfM*@VN literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00080.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00080.png new file mode 100644 index 0000000000000000000000000000000000000000..91e9a1421084fbb363bfffbe89c268500b21dc16 GIT binary patch literal 320 zcmV-G0l)r6vap>3hldeRJLEaDVsQbAbPyc=c9h!{bFO>yy2QC%QIR<>(-a<-4+l<%Dej zXeGl{BvjkLIRJPogCoOPnhnzc5POusfLUQJ1vqu`%M5)Z{i{lcWY>fV*Bevld()B$ zn?PZIXMGiy106Z#B_#u+P?6!YhAI@%j{rQ!5*p0lKBV%k!4Kq>JXFr#Oqmt{0O^su z>OmVXGCp;HRGmTAsIe#H&F=Ll{WD9+tv*R1-8HpYn4|H{llNTvVnU1a&*lej1Y#EO STNe@l00001|kMtb^kw0GrfKev9!_9r)tNtDda3ldII5 z0ArVfoJHR`kOp=#sYW4h^%*TINGaNO{~I;l7NT(FO2NlF(z~=KDL>&lsfl%3zm#X79d> z!6FsY*en0;Wic=?s66=l*A7F{)Lwvrfq{X+;Lo34tS)9?U|`_B^yfMoPTAx?f9-K= zy#97K_1ic)s8VPIfj{P~GNkl_ldSuub9 zK!fkkpFe+FQ6;aVNS@|ImJ9v+_wS)BQw9dRiaCG({{OoN#m@f>4ExUuF}Qm>YW`JWKYzZM8hap1I($di`Nj#|=N53u*YcR*GFSirRPTc*HCgu? P00000NkvXXu0mjf%4e5M literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00083.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00083.png new file mode 100644 index 0000000000000000000000000000000000000000..aac35fbce99abb80edf0b02b363499a6208512e7 GIT binary patch literal 195 zcmV;!06hPRP)SdWDX}2gOk_RC7BrTIdFIQ4jwoj2q8YYvEYwC^yWJhnWjw#Iw`2RNzr<~^BKO#6K7yoO3s3b9?%Q4RV`}gzOlwZ~ClXvlsyv>x@k@ z@^2#p14G2$UMzCjPk-<-FfjNbOTt5Fl0@%k1_lP+uOBcqa-BTP#lXP8aCjb)AWWy1 z$>DDd4BS(+9${+aIrcY-M8=GzkmP!Ely&^ko@=e@1Kt!KmYyv_wQ{*WC=zF28K63yS~n05RPGx%VAj` zg2Q+=izj~<;Ig{+@81i;I3yeY{{8!Cp&oY03C}Nm{`>bqH5*o04F=oN+kgLV7sO#S zgGA}qzwHXRWEnEP{avYqQ`YSD-{nj=Wz&BA#SBI`IPy1(fq{X60jvHs4C1&Y85s2N zNj}7s^pk?n#ti!y7#JW?1_LttKQCfnU|=YaVdVIYVHoG(zkmP!{rmT%A0vhw0|N^y iE30v%ATxG{kN^NZ=uC>f;m{iZ0000n1kTZB3J8CcB! z{#^zUVu8tNhA}Xle9Q1%bcHqp!+A7~xBuQ*suUw?^5pN|k3qbsa%vw}Ns91zt6Ts5 z`!^5W#)7|ZoEaDx7*hWJy(@q&DgOTN>r?H?Yd>Jo$iQHB0_w?+_L!0kk_BJ>zU}+_ zHy?`}0|SGtyYc+Ld&ICyGVo7(a|M?iL-OCxNx0-Z-u}(RCC7F9?_peqa;*M)UKE$c u%)iHcaLL(x`m2UZj^Xv+bHX?!u>kHC_)UkTp${%hKeob2!Y#&N zOOoVWWk0*wzNNaFAnum4!l4FCA5KZv-jGe_bFp6<99QuS}>V&^A@c-|lnQ&o71_lNO1|3Bx*C*wj=>rDr!0XzFA{wZ0dE8(sQ3mei?i(MY@BMC=X}gzq`#}~%j%?soMjJ&EjSI0j-=-4Zu=L z&cfI!izYgU03&KE11s@BbQIvVat%*w>EID!`3Ha|rQj(fN%6S@_{_w@>_5~GR1R@D TyTG9800000NkvXXu0mjfDcgx_ literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00091.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00091.png new file mode 100644 index 0000000000000000000000000000000000000000..67cdb4588433a803e49769a09dae392d7b55691a GIT binary patch literal 274 zcmV+t0qy>YP)IAcQeE9q4&!0bk{@g~B@~(RJ=g%LgFfcI4Ffcq? zfNY_U5rVz?&*vOmew*;_PcaTDcDIi|T4iuZR{r_32A7!*|NmYT#UaQ0?eBS9a$IZv zT#Uh`Z^fTKxa71i{P}2#LymvhpEGv2%v|<)zbr1dta!c=hZFMB Y026Op`9v7)!vFvP07*qoM6N<$f?-m2_5c6? literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00092.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00092.png new file mode 100644 index 0000000000000000000000000000000000000000..680e9c9b923c43abbf2c5ff80d69fa71560dd444 GIT binary patch literal 231 zcmVV!M??QBff!hjRP*_e_SzT%OTW5XkP_!AyU hWPmAo3!ieT003(sE&s0q`sDxs002ovPDHLkV1hNLV8;Le literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00093.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00093.png new file mode 100644 index 0000000000000000000000000000000000000000..dacf0fe177eafa8caf153924b246fceb9f518042 GIT binary patch literal 305 zcmV-10nYx3P)rE-poW^n8Wf^Mni_0# z3k*c4APK>5Hbg@A^3`C$9QXqg7*1owsBF8&Zx|+Q1V9P)TPVT1#g0(v97~ z)5mqRCsp8Rw+ob}3{%K=&Sy>2^xi9U_cON7|BHM9qz!KP)Or)@00000NkvXXu0mjf DlBt8q literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00094.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00094.png new file mode 100644 index 0000000000000000000000000000000000000000..88e1755348cf7538c0dbd3d3dc68b6831892f158 GIT binary patch literal 301 zcmV+|0n+}7P)VOXVglx%iiV{%o*dU|`GJzwytX&9mP;E@#2!O{u$o{`~#ZgiVTpfkENv-~SVk_)N%>O6>pt zoxvgLv_b&YBqYm)SE0!v=`>v=utxt44%ck{e@h|u@1GECa)DpoD>4-QxrkNc`9E*n zW`6jifJ5@{pFe*tVYkxu)89WU5_oVph^hbpe7Ib)oU(k~00000NkvXXu0mjfzF&}r literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00095.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00095.png new file mode 100644 index 0000000000000000000000000000000000000000..e02a6d4c95d126b848a9fe3b1ba03247c6301fb1 GIT binary patch literal 300 zcmV+{0n`48P)>HQ3{j{lZvM`^%W_ z0F9u0GMQ(D@%Ftk07=PD*s(!;kTDR-#GoUIQRlBD%_KDOph36!dO*?zHWH$Mvp yZymt?pHHMc^PpDPxLbQRo8AooKc^c8{jH|xTy!VgMUB@00000C{8(pPk+2|N-p?& z5~rN*$G=5LTqYDr9v+fqQSD^d@*Jnmv;Qnm_{ch? t4F5~w)F}Dy?`tufvS*&8VtmoD~27002ovPDHLkV1j6mbWH#N literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/image_dataset_2/media/img_00097.png b/axolotl/tests/data/datasets/image_dataset_2/media/img_00097.png new file mode 100644 index 0000000000000000000000000000000000000000..d798e157788ed5f0aa614fe1e5b1e769ba7f1960 GIT binary patch literal 316 zcmV-C0mJ@@P)Ur^Q zh@;5J*&tll;qxbh>eav`s zRmrvV;0*$TGm)LtBJ~PJ0HAF4FmptaEgScw-3w{XvSx2kMT4~NSr=E};vWZn#rG6V zz{QLVT+_!gu&vmomgB=YmAo%ZE>)8ASEIQ@k$l^9h9S2vcA{yE{yd)d=>udLa@+ss&tn0sl7IgE`TM#-7Ms&}FaP=Tr_%uqG%rn1MMo_(AVF3`AY*T2Wn&;MAVY6(d2D57bRaS?Ff%POFflbC zEg)!gbZ|N^FL!r$E_P{TWp8X@ZZ2&a>AUJq9WMgzaF(6}cZ9Om`WMyGwdT(xJJu)#YF)<)! zVRLj}aC2*Ea6K^~V`y@3ZDC(=a9?j`W^-k9JuNaIbZByAVPtbXHXv+oZ);&_WnpAr zbZByAVPtbXF(7knX=7z%Uvy}4WnpA90WMyM%ZDDj}Jux6@ZggdGY+++% zWIZq-Vr+GCVR>I;Z*6d4bUiR2V{dMAbaG*7Ze?U&X>N3KVLdP)VrFt-ZDn&kGazDL zaCvfJZE0jZG9Y4KVPs)&bUiU3VqaouVRJn&AY^HBWn*+bF(7wkX=iA3Vm&b+Z*XO9 zUuSP{Juo15Woc(N2qGBq$DYh`(9ZggL5X>L6-H6U|iWo~6-b#y&6 zFd%7ebaG)|a%E<6WpijfFd%YcUujFd%VoZDDvlHaQ@1aC3BJa6L32X>eb1VRUJ4 zJuxmcFd$)ZJux~lE-)|vfB*myN@a+I0H4}9hOJ+(-_+yU%Lx75?&%v38Dztz{p>d> zrBG)jH02rN|F$ZsTc4m;QY1cXZjOv=KUnc!A{*x(@9j@F#1gKJ0394>raxL%Rkx8n zeZ@H^d3S?i&ba$O=<;nMcMc0Fa*6F~o#0>HL)CRD$o&xW=Y}UX4%Zl45{8S#69n9yyxyh3 zh$e^O8`~pColi&zi}RtV%jwPqmq*_SO&Xd4@IH!4}_N*3prK-7*|*6>}I zx^PE!gy?Ror2~vvF#7Z^o~uvJ7PnolE=aeMl3yq8E_;I6V}AL0yD*FKw(m%h`S8X` zGF{68O-h^Ilcu-(e$u^Ne=+}(j&Zljsf-nLk}x}^J*qzllfU@Qs^e6wLWp$AXzXXU z5Ow~gC4%FNA?X;d9_(V34cCE*#KoZq8KIW#Ly$=#9ITQHq2{Y!hprTbx~|CT#BE^% zeNnVi)gkNw3HSKYh0^Jr0m7OjtF?VCCb=Gb(9NGf5k?9l41I={{gLg8F;gr35|(Ti zvaWI%_UnFy6|{{n57^?Px)*qp8hF$jr=kY97vuscT+x}>6hIJ*el{tZgFFk#8lRqk z}X#-FgIAi-(^89Jg~8B?v|_eRKG!bKW*g*yKZ#oq};ESB@&({QC#qsscQ zx2P&_p;!&rDBC%y?TZ?%Ip2ZgGg61-*nj0yzgAS(v zm85L)!-xqKBQt_!0qhc)U0p7nOIJWc!29zllBcyH5yaa|W71Js_?xaX^y2aOKRM-c zp*&z8QT#+lj&7{-J=cmIaDA`gX)z10&v$OYt1Y9lb;U|Hya()|to$r@4;eR%>3vA< z?&aY#9s)GWw%OT8Ztu(+V4H`QQ|L2fYJlwSBGSQLgj(IzZ*+aB+WRk9!|DBi7aon{ zV90DXdX)i7cLSA0*b`Yujs{xug8h?#AK6I)|@B~G-NZm z+sgsCFm_2p`j@1;kmi!I_kMH=)v@lAV$FVW1*Ju32!hmzS6dEztnNP`yEBM zRk&;Ll?FJI{b`XwC^Cp3yQ(&FZ#>C7QWU1D9@p+W>Q^QB#71u0LTt?Po>)9!EMIbH zu{C?atnAi~hzU#c|82@EK%45>Ehs&f`?p-+$k-_(2=M zIhi+F$|{7zX9pC~oRk!4%54j%f6nve-a9Z5L?D`Z&I5x8Z z^RhA)<@Q%~1kk1*@;<5raS?CKi<4FIf4)Z*al(Rd5CIELn%K++L5Mrojz!HU@2z$0 zWuTjLreT5thrVvjjSK-hr0`o|5+wV0=4fVBZ2MQG zQIC(qN4yQy!pblxJ;k{gU`>N zwx@28J82xrZU9b%&C1b9{4u}Z5lP6{JMC_19Y7u_J%4jR*1U7!3f;kgi0IY~_?5bpPj zT2`l>!&F?8r?N*a@WAG`Z=I?uR4O)JH1@@y$A}NrR}sjR{P(Zf9-|t{61wBf*8EF$ z{KWPdCh^o1aJfq+N#QTdR<-$ISQiWTAFMze> zgv2z{@0iNfuwyONSRh~iMIq!aGg7z>3_n_q9qm-mB4ZpSBwUyIiYVl^`}WU2{Ac{O zkAGGodpyTx;+3{oQpT8rps^fVGS7jZgh#40bc3oF{rLP^rXDa|m7~(Tu6qza%7J=1 z(gY{;KDw^`#9j_$1{23$`7rMqqtp;TcyaA%KOtFuGWBj1r#{I`xVVdxsE(?xd>PmF zElL-~hXLL9y$VnGi8DQTn9wc4PlxED7@0bf6={Zd!4g~n`{$7DM$JJnud-Q@XAP!e zO>Xz&vb^&Zw~|+lBP5&pkXSw9)z1fjLnn048~Dt1+L8cu60_vajvNZH+dpcIuBI== zOYel_FJ!-MT?lc89vtJ{P=D`}0qW3|D}@%6WSx;Vd|^}lU_)Z|>{uVTdCAm<{NC;Q zUjrb$S>YKiRDKJWl8Ee-6Z0M zC7Y8UvpfwcQSrmVC8xe&@zVVXmEr2f0`w5L)_-RX`g*xYtP%l7Ww?)iUo+@)r9jeA zaWAm~U{3GjK+}Um=P8F@ahGr{bhb^%IcG^EH}aD&7q*eGu6HyYvK+YX`;dO=<_6ur z*Lqxt6Duk~OQnX23%_{Y9p!>O0;%?Ks5Z_e0VCHKE|YMXA{BOMjBHAAnKl( zwV$HD-@CGuMp#{OZ@9AJZFCZigy9=~?a~0h1$u zKJ&BMB7li@*)c;1>aqn(3h7n=Mi0TwCsb7wq6yd_dGy>qlcT?& zqXLBQp=N%Bgnio2g-d)J!k55b=>`Jqk5emQ8V~JzX;2bBxeRBEtEc3@(vwkl@oZE- z0pM2CS(n4rs+xRw!wr{AR?L!0GfoUB2&Z+|komQ^mC)*#@RBuu=bMd;wMcBVFKIY6 z&NbQZT2jj{)_3h}*HTFixOLM-9EmKq$;Hv|^}n#zwZm%gngR=(nRf>VnV7hh&S%9k zvz5#l1`LgD>iEKtb|N0Z8b^uO?&DWUx~_Xfhv6|1%i(13caUKPqWF{!?e;Csr!c?4 z{1(xfWx^9Dh^=3&JLoG3fOgb-d~3^ps4-#l!AyMoU+1$5w@{6HP|Mv00>^6ilYAEn zfSnH(f?nI%iel{}&*?YTJ8HLTHe!UxrOKxL_cqTqh3DhqT2?~4NE7~G{Ji{l_aihE zCzeU9ZPJT>X=65Pyp`U!Ts=0vsIdH5a|01~qOI4M<3#j= z)Ssea%&>1gzx{FPhC=#^>w1^yNR(NN*mv%8)wx6lolP}&OI3R^H3w02ldMi$+W=3m0r^oTHod(JLl=mHUhfKur=1tbNTh8kHuxs+ z2bk~dgQASIyDWfJ9M|Ba7PNVNSV>^`1O&`otJ_bj^73M}aC(ydw9EQqmFB>QX)VT$ zqYq&t)rU-&@O35!zP33yL<)XT;C(Yf_*32OID78OZMIk9wKy}OQAm?d5@Fa}MMy-# zMRs=~&9%a&J%{F_6SWj=N%r!olTJZQfen^U`B{qp;MMdfp#lpTfOj?{m(}LkPT6|# z&#Hnb){RfAnai}I5{qV->$^tC4@nvzMgs;yYJ`y;As&d13J2!kT)<(p;ly(ZAV0NC zs1|ODU9JXObrxqmxb8W*1UxgYAK*H)GPA7W@wl@G!(%qP=(?b1oEIDfd3LNwmJzX- zgF=hXz;6rmLNKy(04SpYXD|oY+1gk2*u}0Z6##fiI&=DPxp{iQKZCl|3}_TeKbKaW zFy&u6PK&O%B_%z5TYCqQmd!S6^=vDUl@Qp+fzg=gD}JuqT+{5;x0Y}5#8wA~u^{Q0 z28YOaQs?g5;JU1Vm+BAj+QK zjG%ElXDV?4GvM!UmJA({(F;!R^T7d0tQMe1>+=R}7QsjNb6?&)6}>4yiLLDP@l>xX zD-M}_r{{s;-cSp#HkCb1)5%?T?-kbo_vk!Uh4Xn(wUuUFp%bnO4$GXpImiUVk*m%~ zyyklI9EDp?o?%1;t~Bz8ABenGw7+4HV>7?zi9QoGemTUf4Of`NVy#aAlbosT;va<@ z-D;UbVLD%@B%n~M-y+so=fvKrhMRPbhrhyEZh)#vch#S$V_fA0>RNz@{}AxG_wP{^hpp zL={??T3CvNCT!O0$EChKQN{B@_WDD$YNwb%Ox5ylL;u?v($GDWrr7{@svKoJzYuGt zmt0RgJa5jJFz(!piuj1DmLe$Tfs005dnnjvgS|Gltfhq;AV^Aw;u3)9#; zrpiSt+^sCTot#a%A-+o<^KyGmODCxz5 zG!@DK`8Mj!wTym`a?Vh3TY(X_>3@sf4Zf|i%gN$&8e@>wK?R4UjVIz1Mzqk)>0%|v zS_}BS%pc&S?AnC5K(3JHd?*vr+YXX4JwLmilmGw)PeGbQJRy@QgrE7O{1A_%Fgh`V z*AcBa{rCY8f6~%)h3_<$U3lc{GRp0x{BF3{{i0u2 zBPm)hxwQ>5MWw&31Gx@C>P6w{t?RCgs_N^8U~bJpVz4bOwb$2RfMk_#XX>#xYJ|0| z7*xO1adK6hBQKI4vRrW)Q(Q1?H^0D)xsYC3pa&VlqZbt`HsXDQ4o{bjCv$c97KG}? zHF&&6nO5XPNRTzRC!*d)`~jB8g2)88r1Awt`p4SMBB81wuEbXLp&)6H;LZ)&Ar;B1!K00JfEH<6M-3D4>7iRQhexq+Xm0U(2i2}xJ-pZ z^#+=*lwk}Fjxf`CEerQdbFDbEw4X$F*RY21dVk_bur!%4cVV zL-FI7#oL;tF*-85%N4!057MGX!BG$I&k~Jd;rdP&wKVOE%fOxcQ| zI9ZqY-FmP(q!>yKq^^VtbU%gco7^!yaZbjfz2a(oC14UTJw#Mi1Mg@i9h9^J(s>tQ z>$&j97|S%SCdv6A|Lu~0@Esgh*9r@!5UKmdyU7Se$e1D$7Tu11f>X0U(fr;%AJyv^ zj{s@@S=jWaOdw_z9u0k%?Cq94QktWbFq{=BkKSoyn2<+7{vQR`@Of3G1>ivb!}3{v ziARS@$G=>*ZHu1&iE53xE&#XUv2X^E?$F7lxbow`ns?FuMOKL(A6<+OgcaO}h?Q^=- L`($*nQG2$~>;TkHfR-qL7F)ylX+ zEF@D7b&|M=v=&akuMxQY~_kumexU5745 zmsY4moMJa)^G{u9GHl)Ywl&qcrOqsR>1#X37?3*1UD98m3Pba0Kw|a4-p&=Rk*@}M zj)c0i5cA3id!FMG^UzJ;bM3dwxJ2KQ*1k9N(^fPY2>@zzP$iWVn{h;b_C>q5&5Gp*>_{&>+mc&s- z-HOg5dxuNMa)eRdu_Rc8*<7Sg?)%bWfS~V=$uOq)tihmzz{Po8pkqDdhXA7M6w*|c zMbxETcvz{wf|=jQ$f61K2W|0@f~=IGK0?Y$**+dL zDYgSk^5v?O0C%?tKlwlL<&2R|jXna26AB$F;}BbWQ5h6+pYIhw%5%SEykD&0a11o+ zxjpYmh`^OlU2z4sxC#hgVk8j@9%My_B7Gvuq}GEbT%-7+9FgIh4m9?iq$eH;(opVp z(&&dB`E<}6vf_^{s0+507gZCBoX2U1|36xp7yLc;E*%W9Wm0&6nRC?Y@Dc(^94IQ z+T3NPUeECfNn=S{iS4@$nH35c8gUdBHX8_wN6*L*J29t3Pk#UsgP(*lrq0^Uy3={= z{0wu@rjHO1<4n$(5Zb+wi1uzyedP(!8uiPjDwS7AL1A(0$e)i@M1|%N;Awbm?e^mE zUic1y`U-#{8SQbW?nWJ^c9z!@`(s+M3a zpmyi-G|PgL1-_-C%0!6N2wpL9DV88IP_54dAC^nA9`1gtS2|c-8?!A zCHGed{OrkZeIialpx_xJ6Z5+Z=-B8b8?+{_D*IJz3&MtT_=8!+XowMuPtq zjYJ*x-n^FJT1obrD$Zt4p8_PmW6AxEtZb65*1^B`&P1M|sYU;1HZKNb0yEoYu{v@* zHaqLWVaagh|1A&H!48H`Qfj?hs>@c^jD4zAR=kq|A7l&2!&=rb@Go7xT;Is=kbaCC zj#yq@uOvkq=lDLe_d~{kaF}^eHoB_aFu1q724q;M0l~nZY|=)pkN);fB^v zEyFWbxBNOh$*_}1!njA3&9YXw;Iv^`i7fuU0Q&_8;1C3L;jj$oG`m6;JT8?HaEM=C z1{(}gX(ywq0h}d7ncy#%bas@~+YCH9 zK0S)O3tGCQthu^%oa>cTD(WJ^tT7*!zq8ZFE2}R%q#QqVaVs=v z2=8?t?2qoJkqx^Ft&EMqdNX%M$dQT{!^oHYnanC$?Xi+Jw=%7IERC%>vI*kq}OMN-g; zIprs4wgecNcJF150!TdKxfTAcx}MwqaMl9w=^QhK|5F9goFwzY=u!6^%5!SZb%Er1 zSpi(K^38k7hfwDKw0?^XJbh+&)AnWyhN8}ukbRKVQDuZ=(hS55 zz8-WYQWIj)gCwr!%gndVY7gT*yO$O*!*pELesL$z>47+?@s<;0}0bD z&J#Mni*2c|8Igi6rvDCut;>%C8d(N;J8}1839#X^>D32S;hu?W)Ji{#@@(s*Zq%+ND5Q@(6S+O z)s!*~Ny>Jn+J?JhcI(}_OaijZ#r4c=n6RX#EUd5QqrX@(n*}M~G@1x>2`%8M1Kiyi z#MSN14+Mi7R_X^5qqf|zUf1)uH|=n-dkUGVyz7Af@s&y)@6^&b0`NU5HhW(}#f52> z<&f1%DdX{$d)MQyi?XiRVYXD*zQJ0^ulagab(2@>Kz$zN>o1#bDn;5rVyD!Od{W~T z;)Uqs&qzmSP})-+6mv|-`c=>5VAN7-y^gf+At!)JbJ*&M+oEOd$bqDlKl1Um-!hie6` z;Ac0sb891kMW=iP&1X0_rsZws@62<@e8f@%ioPlY{po>JfRYm?*~;sm-^o1DaQrLp zguks8WL9EKXBfp%lM-%;E*#MDVWMl?AX03Yz3&XvM7AWS&L!nTqGs>yWjeZ}Q3J3- z2c&U@j4pxLJ9e&<_tvj>Nb?^5Jh7gY%4*73%p#|%)b9G$6$5hJIw8!Gr7CPMy@y?7 zv=0jH^e7l<8t5=0xGued~ zQjBfM(SAfe``3|Xoxu+rZ_2GABo<-%DF@6)cC;H~^C3rZlu|gbeAPtGe!f*MqiQ7! zzL^p=wY34if@qO3X&=XekVCua<1u9z&t78Gs+FuS_QE^JI=l*VyEtm8eW#O?a8i45 zK>8k#$}!KKm3iAFI%^&vhAE9ihMp~nh(l<}?y= zV-LKX2%p+FbKM#|*|yqwggb1Tw9lsC$**N-#Wz5Q+{k;NDoG}Rm8=kWu4E0CJH*Q>>nCn@A0R}wED?H<5rZISa$EG{D61L@Px6W zA8Ony0LRIHt z3UkG39#)Iwfef#&o1QeTptsKH+Dl8x!tuzB2!+ROXh{_IAS7bSy0L<7M5=@c2vjMD zt=?zP;BzmE(FxfB&@R@bfgCrVj22l%JR)8H>n@wH@&X1_Mk!k@dpX~rC<}@%DFzDIAp^&dKnaA#?2u3H*3APugLfhG5jX! zeh-r6)0C0Bcc{W+4(YC~uS|lHASW_ylI8E4@)xC*JBE-_NO$h=G#y)?A6E3I;4agw z&U|!KS-*xxe;Vd>9Ih0Tk99M@5G&+rbSWD{j?7Bb*T-x_O?s7jW~h zpo|C#TCD&b@*oMgQ+kl_5kwg$TgVe$wSdyZCfh5lRB68JL*}Lfw_arKT?0(te<4stR=Qp9Spz#001#T zp6Nms|1&m#x`Fm_fg(3k$K~9-7m2CcBuvK15#K|q_6U}OG~c3mcxPIcbWPDw0002C z0iOtI5VuxjHaZ1GV-(x&to;@Q-Eppofm9%oE-WKC(L3yM92@O}bos+z0=^vFp-|}3 zX9}Ql>sr9zL%|ydCqx~|9~y6xlKaKWW!`fN}E~NlyfDaC+|EiPe7ss5eBwPS`9+R-6#p3 zX4oC-l|v&G6GcsirO|7A-P8^zikxyHeGvcv1pq;t4N0g$Y?(|6zcf5vf#xavdw};h zC^uWH_m1T*mih{p{T46}WZ;Gpd@FCUhEwPNAI3xQ_bR;!k&6VwQeo zfcX3{`L&KJF0Hp2&;ayH2R$HvQMg=QI&lLn?(2b7qnY9SH? zZ9GRBe+=U04;LWaXpSfl<|AHPzrQ#)pqf+oC&m3MW@-!4R?STQ^z|^0QRemH8?_Rn z4a$H1b?4MIp;hJ{o?GjEBiwE9S?w|KX_Mrwpe> zh=s#vBv|R9wg<&BsINCL>apu?@ye7GI@-kacZaU=Z)Gb`(VN3n(umEhenBFknrG^- zBUR%IY~{wgM?Zcm4GkMQ-~wv7096N86s8&#YWk2l0q7k_^FS}<#yH=y)0zng2WJWk zp`rZH4W%rAp|7!zr58{klAQm}v%L|PhH)RHlVP%iFH#Uuf5U_6(H9km7Nd~AsKBx_ z*bLDFlZGI(+{7v9;L%r+Q8L=!>*?-2`^|Peztn0_!;j9=p76fUT z+pWH_JA>X#7_X;w_V|)zIV^;Kh)-muAwY!j$Xj;jkg_eMqlQEvvqpU?wr`2`0?#5f z(11uJ(}ZNvoI=O2ram78o>V2))45vT_DRHme@%I%8BmQ91<)0a^e5?sS5?iKw===a zq;IGWUpDuX>^fp~bZ^hxiv{o!H!7&0L7J1l2MoD@O5KxqDv4|F1iZ>sgp*i`tYN`? zUA3r>#_MpAI)dvy4Z;Xm!NV0%ltHj-9jNn2EQnv?ap7NIR>$>r$fE||m}j~Qr*wTF z^s;+MGK?`{C$!|tthS>wU%YQuHP*}(#@DJXn&B!Y9h*QC$S$HlqBk>x``oj`2f$Eh z!~{~-Uwh`JXj)tN>IV5F$jkkkuik2U{-g-s}7q4f@kVLd_+IK^7t}R zg_ouqNqtb(^=WesYBLYFnlfrhE{Nre#XKPA(P6~%VxpCH2cn4*mHiz-)-X^%ZeC6V z#j$RI{Ed#G(C~+M+DRov-Dqx&$mU=1BRa3jPyii9^3PQ3R2sP?zFe{x= zK_qt$DGx|I@$9DB3@Y}PgOdjEmAfS0(>MG38GNDvryu|T0Sf`2OKA|dEsyCSs!Bv| zEPmAWXlHGw96kML!b~v!=76n$NKmtVh(Qjjwr5Ks{}Ih`G)OTW_PYt=pa}GdAS_8( zd{k6c28x_FnXQN|Xu?&|$I%lB`R9CEx}Cs;DSV=FU>tJ6qo0S)aE0eP=iQ=@3?Q47 z%(%Wqi9G#0Y2>3mZcTX#4o92Xv5n@ymmyqoFJ&qaX|6#xXKTQ<>K$dMPW3o0iaXlz>*z(Jc% zNvJ_=nM?`4IQI7V#GB?^BD!&S#(NjZ%1gnWlCG?wpgtFb2L-Pq`Yex*Yr0JeWUS$`tJUDC~7v5eHLPpp!F^8{JM+$|+@Ek(GltKLLOz|ZT^(Fe&t zrs~P7tQZ7ku5aLGWedzffJ=9_F@WBo3?1}yG_evY52$bRAQ1K#sV{2A#UC;%qz-{=2pv+PNEQxHz&XL+_K>!*n?(s|^cEgUa;Fy;v8ZT# zt>l)8;Vu}_UEU_^AKb|Wk3-wqxe3-Kb(zW*Uq+q;rXJKG@KCf$p`ZIKHpJ=dj74c2 zoI`7h>*KYO4%xqaS_j~JMdJ^lzR(N1a6~8Cj)84vm+O*YndkMkPeEO4LnoMz> z{IgcKPy*0`LY)wBEVu%*HLSg}%P?oZ+hGqdJ1zrRVsy;&@c+siXz3YG7suy^{#$^s zDo9V+N90UWxy}4zkUG8`lUYwJ#zMt(ON7q>)=Z9L5y{9TG}5EV?@r6W-PT@)ZH1|0 z;YVN@t{dU6cYd$*58P)uA%EIUPZ)}{LG=j%tP{(3tLQwAtg`4(I5zwxQN=G5u>Jol zN^IjNF=f|sp_sZ8Uu<1lB~m$muPXNXUENvhXD!|SE8o|2Yca|}J{&AiJe~SPHdh&bsAeHT5zHv~fJ^4C)pMZe-`JwG z6;uoe{DvzbU6%2Sr$!nqrxdd29q2$wy_pS|`Nf;rlY-{0)5Qo}s~G!Ct43)reQ7^n zyxvu{ew)xOPBk=38UJSP7stw|6mU^}bttRq+7W9}Q9KWv#oBkWC~d`AW7E_nsVa)A zs7lMm!$&LM*WM?% zI2S9%shy;w{fQh3@zHb=sk=Vi>fLY?xcFtES6_>D|5gDXdv|#rWRCJ=aEAY@uu;w8 zXGty(qys|f9?AUNN{C#U#T@YRGW)r4S(PDv<9X^kNKd++ujm!6FH`#In$g&+l{Qb0 z;HK>clp}U5l2^(ci$gl++du9~bHwF|Y3%!C=Nr*utpjH+<$hq=7(qqY+U@ z)-=S9aIE7H# zL+OI;XnZbDCxfre6sJ=g$xAZjP(Xg0lNzv9ji5%>k7kvI#tHmy`XE~#hf2Vsha>7;Z`wj6LyhKE-S_*l3xgrj)J~*=x(v(FWQ%%ynNdvj zOrLBrrnuB}x|de}C{$9y=yi4ZMj?CdkA(z1G; za+k+4QK&TbDhLj$T>C?)Y&{lw5yVIl`*)XmBWXJ@x#2zT zqVGva1~vLSXj8iDFD~es&iwDMY9^k=m*Hzm{L?~;uGalXY|g8?oH+NuSaIaGd%oHs z<|&kf{{Jw>n_*R!vF)^uC)k_GsEe~lBdt9OtTSCGkFX^H4UG zL8^a7W|%^a^~PxqSG;$^_CWX0l#N;XnRVH5jJm71V_KctPgFgo!E*{myoFFrMNlabEM7%*r+{m=g+?bIsg2z)jpM=k~N6cuf>h}p=$q?L!n3)mT{#Y!w6nM z50t3oTB17mvsa+gJo>>GLPHC{KYlV5yu3fVgx-U8{k2` zK?0W-*#cItd=V&Z%Qeqv*I6COcy!hNmbsHd#RHD$+G42Bt|ecrz=R0&@kGe@d=dz-~9|Pl|qOeIdt^m;naQ z<)bBC-t)I@FiEA2$R*{+8fbxToFI-wF=cigQn`6cmQo1I7l&P19Y}+T`*cSQ<13^$ zj`c!e>G(iUw2L@*?a=5`DdQt!Qrn=M?3=w3Uj@~Msv3wy^7o6tESf?c9%o|m2 z84vKAQvd1WgJ*jRBXcZ+KcuSlwC}5?M)p~t_^9K%8}yZ9>*zP2aTPqK9Ro+-+{krahwac#%ib3u6}J_N9VR^NcIhlj&TO4jl2TaD-L*Aa#cjU$w9I^0>#@lOBi z2JnHw?Mjt*c{d_kF70o>&`e6hn%rG}JZEpyDxpwqW$znHYfKq4g`y`Pl=tmmz@6G$ zWWK+*Rf8%28Sp7Fm4*kbW)~&<*h9h1umbd&1RIBS)HT%E;vH|-iod-7Lvv#+3>&c) zAZ8AJFKDHcDo1As6B~y5o(#bN005^!n~+K24vCb&oBseBFqJQtI!yQHiLa)tA%BWE z-9%)poxWB_g9BjsOcQ1R5Jo2I_6pYoxBR~@{E(^}5u2MYVQXwBsxs596uRNuEj>I( zsuP-I6C|fzb(A7mJF|Yst2qHn7vPt0&CQ}6wJl_B! zY+MF5!Nyb(&oGLZPY%W10WP1+&Mv3&;@>0*PSHH$Qq9;0-P~p+MR0pX@Y8Z==%R39c&G4JO-Hm1 zv2>0U2;9fKBqlGtfRC3>p5yR&Pq!&ph3?ZTVL@eSRK{3J zxh130AJiVmX3)~80M;e|PjOs4P0jL_PFbre9O0cL8on+sU9uVNu)DQi?oGcT(oqh% zY^lS3f^m7uo|7X`5KIKc=nIkkxqvpNUj^B*ROQ$dZN{He#NDaQ&HM%f+s;dea{2FW zGd9&J_jkptegC;Ozda!KSgE{EF}qotGsUf8Pxu zCl44Fk7xOp*)OnBbJ!DV{XZH*_Tnu3e(mQT;)v2UFu#@kn3?xP`P_#GrvGp%6ET6&9S?05>ht9>qa*i>#qhYI+;Kdk~1!8ov~$q z29EuQZlGRMb;wWb$iATAe94x0J(g&sUYqQNePQGMF20Ar6y(PGK0rtC&KS>TU+IZw z9SiZ7pFgsyuq%`!N)A9Hx>jy9uJf^NR{A5PbpJm0!TgW)9Bcp#d!6V4#JQe#;+Mx#)83DZ z{c1*4GGFbxUJQAq101IE%Tz*BYFmnliddtS8PnZ$MvkfEs1hw#QC|HMciL4BYYQlx zEy;-4ie7Zy0Dl7BX6Z#nBAw@CdeO&!(AwXebesRp00K8PK;0X#2SY_Z$E63AH8~@pkRJ^1K*I`;?GAVS9ajie|*R z4By+vVCZ-dy+2|~%%y>aww8$G^L&pDv_9i+f65KTm_#7?Imeq4&MAqs_xcq#-?u#s z^+P8|3-y3D5=T^fO(K2AgiZL6vT{-A; zVmidUd*G~?Bs-W6e;k4OT7_o@BpsH(u#j&V$suT?*ricob^r-VpnSZVh)CTZhmdI* zem0bOLAdjjC4^4O*s>`+VL+UcPo_|>pS_*c)N2yf!Wfswlk{>&ncb8S%f+(~IB>OE zB1EVtwdoFPg_#)EV{U9qw{U9Y8IOjs`q@7f565R2qlx4d9T0LWwZy6Rj=Bp_Y1bv8 zQ6M#5nl!#%O=l{Ac_Ld1FjB}n6LVw-|Dl=vGOQ;c_Qa$C*q4h4;XQ7n+LB7ci|G)A z2Q_~5BI`t%Ftm@HWy$U4UUf$R@?L?KNn~tJ82|b!VX<7n$y9)^A-IgIgrAJocan@u zyCq!EuZ+?rgpuO|zrBhc+^rH*0w$D}eXFo*{87Q65=(|vkC2J&e0yvTWF+pO1k@gSP#t{0%fLkT;+{(Lpk?4pzJsVi+;=t_f~P;(`yd7 z1mjaBIMg`0R0&iFSFENq-KJZur#Y76VzNLN|9+oe00nUY`&;h!_X6ijEbR%6HFmRA z_K6se_u_{Pd%g>~ydWAg3?%sBtUxgJz?h&7Y+r|#BBl*5b;;hR8}hJk6vFuG%6~;G zoHP;;#X^Mlbc2#m5kcD6Zyl)oZSsGaFw+jFO)4+r%vgsO zNGxk#H9ySWkW-8ufrakWn|b0i+Si8b_y*=!^PLhBNe%%N_)j`_)y_rNw5- zE${46!+D}KHh(7ssnOi>Mnhi|sMnKn`kMP?K)^#dJnev>Oa*Nx?)iqvtFdvL`O6@SWJ^{lc$8+&JZgKBmUvm<2kq4Z0U5fb=O=r0X_iQ-Q-G+(2~u5LGNem@Cm{ z=oH1tlo~xQAol$v1U2x3IBN>4N98YvW}^s3RhLXi@!G?>fIubS$H~m*Iy&n)ofG_5 z?m7Ht-ajoD-*8L{OQd4e=JH}8Ub>`}h@Yu(zuz7e;@r8{H#d5Lo6O8|D2ob^LRzUxSnq2R>& zAAvmUr3wOk$LMa0pAs*($_WacXTYv`yB5qyo+>A4`N`5u**SOIL{ zh6pr)uQz@}1H8ZulUt^Aftm@n)X#HXi;sz9N6y6%N+EUQ_K9O>EAJ}nxOCV@=}R>a z3iC>hon;?xSfMP8zR|~UeSDarO1ZS_KG-6132%OrrVlm66iB@E_npC%;4^zOvfZtCXGJMDKD% zic5f$dy;&Ov!`_jF-;^d*xKT&TAsb?4Ci*hIhg3SqS{Hl>up3c$vO5K>vEs&t-gkW z%O&R`Tg0pm#!K9b_*(qd#|GuercI02xX@+{=SJZ(#O|S(%Zd>-u<{n5t}st|C|vXU z%8@)sGjHDHJt>`N%gIVOc6AXXxidJ~!zx(-005dno6||*4<=IrBmV#`>kN_sV{dOY zcECaWkR-+8=lXg=*3ju=iCV6phedAjz#lqdcvoS%jo5Yus%B}4p#<_NzmBA&Y;m;2 z^xTD%WGBhqPFjem$zHhu`J|iaG0+X85)x@nS$^c52G~s061*lwH8gfy(bitP)q;g! z*sTXrHd}8vy6!XDwwm}jljw;#jyq-}3h!vXfB*mo{6U-bN#PGBQw0$`M*$^)^$bHw zX7#-?G*s9Uz-OJ7+)h-l4P!`phu|ensRJaVrPlVq73K%hlV+CzX(}AFOT(#XxJk2u zM^sFpmW&1x)u)V$9O#j;kH=Q@(dJ%z!C15NjGj+Zb>nWbo`bwqKo4~JHRuiOY`iTd zaW_X@&M;w>nt;hKwK$#Bk zSIaS&T!ah&{=gy5@Gh`Uy|}qfr&MYbMGMl`BcDLCNAwvvV96u)rT@Sr=Cpw~Nn%xx z&DTzQ9H!}=`&^y-#1Yq{6#DcV*kM)Nz8gsj{ z{LouKZcI6TZ_hSC`k|<*dZ{R%!MpuD%TDAuG+?dE?R2cT%ho1BwR%%24trWcOy+`J3}5!by-N5Z*q+@?-YqFNP@}UK2XQ`fzI;4&k{PQ zEu0n8irz7~K(7?hZ;>JZf$;J_edyMfnpa|vaYwp7RtJU~;iL)MTHN^hM`9QU+?iK6 zn6@Ek`EiZ59h6hnnGnsx5F5k5f5RMvnF06>W)ONw@AMPNp7~(|HGo7uQDaZ7bq|20Je;NdKH0}7JXgm$z`T;RwrXfFft;Tn-N-wS)O{!E+#7k?pg4#`} z1^{)bg`JiHCgGS+6O^a64)tx5Ig;|vwnQOYj0_2x{Y`kX2OhhWuku%VjzX2wbirKG zwtdhh*Onm;A@)#NEk73>!DXutLr~KT&5VX{QvZHhjN8o!Et0*5jU8|Bg=dZW@$)!o zBgxYGNPu4BQTVIxyzc)Bp$q#r`h~cYa1$dxbCHyAJ!;;E+=rHL#GP>*x02jB_0igK zKfxde{S@+_xqb&svHfSaG)yRZ>c}WmKw!=Ht(H(B%HRpU!u@9jArq^bF?Ghkpd`&% zT5fFX7A#`oV=!Ia6`*Pl)&jLIq?V5+$`TrB zv|u)qErv*}N?Mpz%*7z8`XYc_sPBezS``YDxyPz1G6jl}b53%V3mn0PWW#64kyPQS z`5*pt=SZ2hyl}-*Yhxp`=`H+mI&Sq#p4E~khM!to%dsoF4dvQ&Fuj@Z*uhmGL6 znQ_yKpSZ3AHm&9Y{SF{!jyTn~&wEk_v+9%3O*@Ka<6*Cgtn%;CZI6}1JlrfU#6#N> z3AVfqyrYEX8Kc{D*uto;&(An&hULS2u_Mz>%;TC8NAh(ACcW^;K8c%)VkT$4o8Nw0 zt63#^+3LO|+B8?zXexNilI_+mwJ&p{ST|l`&Db$2FhgPGl0y692%x``6ySJ{j*^(@ zEB8*bMd+U=4#E8t0i*vDX@xBxr>UYv&P4Q}a_%e__wbj*{eK&pdn(2bkIXZXVs`Ha zJO0gQY9))s0hXusnOH*Z%hu=jVjqMM zYUFLZoo^J2X|tDKXRm!~$jWzyy|*V;Mt}JYd>->O-l%I1cAD~}ATIA)y{s)9<6VHC z87+~_i2T|54}8>vIrVV`$5Z=5FdciL5>{*a)qW6E^p zajE1&Yc_hI+D2Zf--m0Rc>vG|TvsC*2Rd*i8b}7~_Kt1)CjP9I2j(Q-je5It->6{H zp=fAs8ewoxI*vYYdr2Rq#Qo-DqU!!OG5<6GB%Rb&;SVk3SMV^!dDb7ntfy)c%l%x7 zMB|>YOdq-kB>-3^^#a@30`d`>t%cF^;VW?+gAqglYcHK5TAYVhs>@I|P)T3=j)S7q zt%EY2D@f4UzeS-G>Ihilq_A{B*4j^5li$PqL)i|)n#HiURjP)t{H~*_GtfYU-~a#t zi2yXqHpsY{xCJs{PxRmO5ve7>L#>qKf3yZE}W{UR-j>^=%pm* zJQ96M#^_GNYbAe{K>F88-i1w|A*|1U&Slk_~JyLv^}M z>2do}4`eB)?7p;e%$dL4jmd$+oB`JRfdfU2L+dZQO2`KOx_J!I-3L1a0Xb6bMQ;ZIjxRwfU9oSLqU>Bt`IWR zYnFLU3c5&jaeqL72_cSUoGFv_(7<+~Tfcq)2cfWQ6l`roKHAs5ILM=0Nb=zf_2w)o zSINNVqEr*mg8F7H4ow39^_~C#27N)AIZ5FUCQ||{{{lHrRFGQNv5A2deagV6SuH#j zPR+M1bkIQVQm!M;QnbCgtE5ny%DUxAbpj*!B`2EnMHPW znGWD-IOBU@E(~)cB=&nfV(g z^oec{>bUN#90IX6MG=shq)vAGz*eQEyrV6+j$cT~FMBeg;TZt*i8FXAwdKWEaKWpH zQ(PFIh=}ULe&6_n+lx)hQI_kDFeOa)rzFfdC5|}7a$jZOuZ%{#@oJu6{7rxzniO~H zO#{<)u7XC79tata!gw!~<5owAhTbe>)%;RVUncRLNX@Y5AIRT*>ZBjHzHc%GT`}RA zb@5F(tRha);yRPNq63NNgq=J|*8Su04^&qFW{2t_YI`n?qnUc(coX$9tr0e5>g0;O zcLJD2b&0Io4Q1;J#nzX|#2g>ygAR$QLk2WH^)Kl9OcHAuudXQV0mC#%V=j(b6Do9$ z(2&mz@A-?>X?cWMeYpg~_?KFa<7u}l*3MQf8RXyzFoaA0>ZV2SZ{gM5m3C<85cws#v4 z;bg>)JEAM84Z1xeAx7kt5t$xc_4EUlB>RgBgjBsc zUW~evpTNzg^l>*9#(!pFceW#S8bSBxhk13B2L!8?6a-598d*$U1ihjB_4P^gwl1)|pF=z29TWI;2KD6BWKQvZqvB$8MKvSY+eR z6zX@*r6RTDy7jTQw;K1k7@a=Eb>wCBy#%6)L_rgP3s6!WYD&bJu5P_`#WTG;u>83E z#EE3<<7;UbpPO3eKk!XZ6~sj{g+XsFH$hr7dZ_trxHO|f zai=JHo!Q((MBVc`C-)}Gkv7q_UJXEHvgx@J2zy^U8#^~Uz%Hhn9PEhoO<5YC(<|mf z0BVjnQ|Pe&n~4XOSHOY1#yU<2t%~wsn(zjM#~u8q1HYEw5LU8P`DJp3+(6<@qY}PlkbUTus9508B|KU*DXZU(UCrk7lEhmTijk3p zctIcL1}s6E!Vc~AsDUvGs}2|{B1E2nk~u^%4wy8_D~9;4Tzi;eY9r>w$rH)G&RL>$ z`KLbIqc+TxiASO_b@|a&HziTTB$~y&F(nV)K+k3`=&&^xK^Nwv!WUc?uyPNo*=+Op^0<}IJR@A zV9Ohd#NdqFvgh>qd<<9^Z_DrU4b5f!NMVN%m{kpY9`6PHul%ADVOtSkYam^+8rIcd z^+(&T5Dip~zH@7D7*{*mCce5H5~4Fn-FLuPDOv$2If!lD^p92nxZKLc z9h7jXN+p^~v&xOS_{Nb%uAP-)7D=E0@s5E*8$1r)#tR0+k3Er^Rp|n{61Clvr;M<-l9l>D)XF3I`S4s1KW z1haT*4X0BYFBgS4Cb+C6`xpZi_>_;ZZE zSAL>b$GJc>$?=~G>?;~uMS(jN{`YQF+A7I@Mu#Q^rz>Vyy$;OV{Qu5u*ehlL`XF0t z)@X+#cor#7iSYhham5}-zJaj-001#To>)Q_|1DFso-`YesKIzr?5=>J2gd3=uiheU z7y2)?_bzS*JbTX1gl#TnYRA`s0003d0iJhh5VtG;D-bss0iaJ!(0UwD;CDa(+Zi~Y z7xQ9x;Gu^0r+dZo@gkCS{fo;O-5au0&YxnIcq10ziN`AU$ATGotnVvQ0D%92J>JiX zkb3Fwla?>@rbpnyhV=ON*OVOm?Xy$6ml0?PRACm^=|^FADAU0m_5oGg=k!D2Dr*$VAhXqB(2S@q#ZK*~D0jvoNdElBii zpoN~ek&g$xDdD*k0J%WAPf|);zW+YNc;T#btwd0kV0JIg4dtsCGe%oG0q|_QChM_Q zLLH7&{tGs8h{jg>T^+pIRq35b+in{cN;J+MdU3Mf#U@a_rKC;=DA?$$R9q6z7hib3 zmWS(V5%xT1zTfoZT;1I+vZc%FyxCB$lMQTxG@|Zkdoin(q|*9!gL<`AaaYFuF&Q$L zZn1v0E25j{E$pB)DS)7wf}>!sLqQnd-~oUnK7_~(#(%72@#PLJ+zEejb)Xn3M-cTXrK{tTpSpCFj-U!;e>(ceh46w-ygrM513X;K-ZBBZ71XQ|$eqg?|KS0C*o&O|lbR7l1S>3Wi~rpD zf44{)z35?$C;=K6JRXMw*yx?w4K>(P@ku{3p^O#g8 zE2n~h>#|5TWSjD=p`mqi{+W`)BQ@HHA%Jn=JS;zbuQVxUk}bXn7ZCLHL?HOMOJv+; zfZf2e&lWVDCmv9?zu&9CngUD+>g(V;F$m#qvog1*I3q9_U(PG_o#=+R&Qv?(_yunh z0|TKlc3ugW4CET>>Qi+PQU{gh;{w1}FPjhu&w#+ukfdUT0#%+8eYg!h*6S z(C5+_&bC}7D$4?Z@q?90D~oa^3?m8m%4KI-j^fnjMdqy=&D-N7-;F=|IQzh%3*Hi| zkkz6*Jwl|^DQ;=WFUnCI)NSF6yq6~$GfOLM8Dq3rDWH*solJdnil1CKj2Ha`b{V|j z&l{8vC#;54N$pIuMcO?3_mDAnP+ryblZl=WY7_1^D zYON1(f>uAHwjif|VR6Nq10Y@3e<{gk=M3aIR~PY4z4VVQB>|Ce=Jc0|x)Eb6w&{^X7-a31?-~>Wv;>hb);rl(OY6rNT}9b#<{-gP z6EKlp;pyW#ypPqkqA+z>NMM$juk2e>NEcn)(5So%%0WCE$ou|`687W3*?i0FWnxhg zD8{3>s&ui;|4&wF5clLJdPe@1s1Q?sb23r@DA?FRAjk!2n0oO4Gp}6rLp;0bviDFH!Bi zhunU|yhj?AuO1jkb0PyHtB^gekRqSYL;@dFO+}=_nk1y&nOgE5IROW~waH^D9f)qV zEd>_iZ}{2QtYd_%;AyFi?0e8PdZ9e@`^G?dmkSTV&2IWy=`dUKB-r3wH=SwjEA28u ze^;7#=;y%z4s>(1aJ6Mj=<3<7LhlO1aCb(=e(1!TIamE_bvJDJHA40y`z4jRN(Ml> zm+Ta>BRj*lVkGMl=TWEK;&NM>X!pDLvF_8OA#k$tT-P-^0*KSY(s9HrlO=W)5$&XD z>RCCx0Pg$0(zwwG2gvmBRK5}Deg+{+b*aT6tfwn2ske@bEEAdVHI3#!0uiGkH%FBu zNZhqiK`#Mid;*k4Q9hh(W2p%FsdeFzU&nE_1FOC&l%714q%wv0C{yy}o_{`qbLMKT zOdU}cqtUkmBp?NU6j0Ykt6_r}~c_VU;DB=T#OLBI9=>hA&FP+7Ba(s#}CuiEkG6bt6 z3}wj$z+BMXMl$N_(`EgLRPQryEw}X2)892kly%ZxwhocbET=YLBXjr`jhvh!=!%oJ zt^S?%bVWQ%|G8)(000DlL7Klws6lL*ObE{c*Lnka;H<*T2WNak4V4_9qi3PzpUdrS zVE;+Z?9)DMXM*&8#p~w~(oCdjaOf+#Kb?b8K8XbE!z5)5coovXvn}5f=g>4_)N5WWopSHab)r}+) ztoOYclv=CL;`G!NIU6f^H-UX3SL9v?E_Ei>mkuh1_Uodo&pgaM^q)Ang78-G0Cnc;Ucn;rFU2_JB8JuU`y&5i=-@=#RG!{v8A%_o z%TM=aM(FzT=AS!)p_7ReMLvrU!LR@7nxSMZLoJ}$L9z(D2?RR!FT}H1TFkADC!(8- z?h*?7!zVeET6N%$o<&Xp`PJ&!I#{cEV9KBy`R(w!GN(&gg*nSJ7^_;Mfc@V_H>aY~ z?sHY{>}xM5aRogE0P1;!>w{_XG56fJ)taH`PW^t-io;_Wj2kk_?O@8+JYMf(3R&|l z#zbUm=22b!dL0Tl?Q8IkFrGja?0K1Uay(jLrsaUTwP)mn_Ina$FW$#9_kji);{}fk z=FxVZIFxNb-C=Y}E5n1tW^#I94iV|2ek)p&LcNR<1lPdg(wU};?2Xl@cUoMCgtQvp zXfm3 zyJ15htepl-tTKPDz~Gg9>wmlQ1~Ehs$TlDasRI3#_lByOmbXv~8KilgIGspt*zY@x zwj}7{{Vn8$vtxLeHA0xE8TNLaKTP8I&LvhXk|=je8KhwY2mV#;!9qL9i+&c9RU#KF zxKK6y$4PrzspFUv@v!skBHnwcCnKpEQ+=+Y!OQH0<(L6>wYbO;0000dL7v`17XM8`GLoEl<<>u{s-e`Y%@VKl z9Ek9VCy%a4b?rcNaN=M9009;Op8aVMw^!}$#y$c(T3`Bi0ns`6sczd%SeVHrspFOd zx7=#Wu_i`>W#-HCn)I(KmLOkC`L7|2aYT>TFL+FS#zyPicwWhv6tq3xC+PXm!)HiX zs+nhr%+Taue!N&u!)ACjm3Ak_5+Dr+fA=7|(PkBpFo_kgf@G+{wOC{irl7xay23chF6mu9d$ z4cmp2TN0mY_p+TZi^?@NHt``@5e5RK8sBr%P{(#2*$T{hZ*X0l=EX!{sua;JHeW#a zE5DiNd28`{Z(zPz_us#s;_o5pVjf(lE>>tec~)TVY4g>hAOHXeph249OQ=E+TOsfe zLeLOHKvP7-RMCC@rpF0Ju9aMWa*unt<6aWCP(X5J{=7pXTZ8wK&9x!l-Jm*!_fMIpvq5Qi4BMxMr8>k2x7=Iweu({^wlu24&Qa(!4)&$dEHl1a{P zOfL4h3;9twisZfUPa2StrYPoqW}?~%xnqHFhkB?93Mq*L^1Iv^ryXFe5I!Au!}mA6 z$KdKnjKfIV@zxn~4;YLC7c4C;T7Dit^H9NvpT_5F7YZASEI+;fjGh;hFY>#!wL(wL zsAjy$Nq&RzSZt^jW~VI^F1SpDp^_?V$}ph6N2|$wd_QtUB%3BZyxD5_<~$@xHy4I~ zHcu9coj;sgTB$q(>j(lYmn!O9s`>bXkdX6=(Oltoi|xxC5aug|wToeSkN$chE!^&o z`Vc{$nRY)uSePp!FfZ?BdaL-aI`ZltQfFIsp)SdjA*%?p5U~zCOjKdAO)C}pMvS+( zWgLeztWEs)!&Q7I;TvaZqeFKisMd?@kgS**#fP`{tkv~+e1Wk&$;Ka*6uk6U=eI{5 z&aT+q7*#gJal*$)$g}*tPfnn$9=PSF@9cGAO6|XSd(0c0i>_&?!MVNMv3l#-W7J6` zw}+~E07hsb$~S62NCWWgwV#A&%qGKrYxM3y($I!3R~I(5R$xCp!z zI2QuoiS+w54wKQWJt~Y>(4*5@yU0{Wk`w}9ou3^faVNgkBfnw*b^Ddpxu5>#$|F-@ zv6iG%CFx^x_Ma!Q3Aqn$grEHqI#Y#u7wu}IQOy`J3gZMZBU z^M!UMg{|YLV~O7bu{8b+reBO=P+CK}uJ0K;?Z{5@g+Y9iqV>ia{K;ImA2!*1q!eng zH@d(d@hA@ap1*8P?u}$MHLY0^WFUeW=pAsM(AKc(0a8+458Y;7Ufb>(ev*1!R4D|F9HqXw3kr-SoJC}w+;r;iUB()lK$0#pb;5G+9x+fhl%fiopt zb)-(Vv|#85!8+u^Ul@cr#;_fP++P01?}nqDEzlH%NJ>m))3?Iu^#`H7u^D}j{Hqeo z9K)(T+NQ8PQn_2q%;7udY_F}(DM>kAQQ>I&C1K`u8`Q_mt01DJx5d<8k&dC-W%L~^ z`F}cDs2i$z7HO%c&h_!ocAC7TpMKJapUkXO1kf$RKe4}`w5 zqi039jwQirY+PeNgXYhiIS6WCzADbd&9IFvs+g8dIIkZJFqJrs)5hZZ_q?H zz2d``1Z&~Wr{CFm!61hWPoGE5hb$0;0C;8P86$BHVT!Q2EoOb7cQHvk_i8G#KCz*!qBu--O3}5=m~AB@hbZl3ZT~wE za!KNqx4HEamNGRawLU7aUUT$Lb+b)8G|;YuLUE`<+0@)8im`4_tRI#N4&Vi-6;2zr4h-5`U+M zCs7>!A_FJFl`=%b>7KA|nb$hDReT6C`e z#W+U$p)WDtE~t^W{bY{6C}1gVtFb4;dN0sw zq*C`!DOiz#WW&R>;t@dE!(md5 z9Iy!$M<#8(<|~D$*1g(F$ZJ=EEIk&m#?4!i^VF?9 zGh;gG=*DTdId_in?@~_uT!O%!i_qq2RaHHnju0jg32t{iepQnO6bc7mmdfCX%nc`b zt8T=}^~mVn+(-d?Zd<=s^s^N0%jMO%ff4oB>oIj4D(dcnE#@fAI%@3{qaVY~ze1sN zmHnI9)7l*2^p3WC_mS_@c46IPUT?|2wNU%_6@@mp#u!a*7;A=?YA-H2*_d!c>v3Ky z*diTF0MN>SqIc=8nrdQ+p;#qGfYra+Qh>MdY?Pe?0xNjV4)T*^pIAvC zdFj7GQU$kSO@b86N)tK_#gXv?2It?RIp)*pf*VEE4$)JL*nE_TarjbZHqj%Npz}vd zT#Ti36cl>_lelPB`2YX`Y5|`BYC_-NND?tUVz;V%hd^}DlF*nKUdSBRZBaX%Bt`gi zhCWvCOzQ^uMVSpf84;aGJM`oV38XIdvE{hJma}j_%w(d?{A@w<(dz0yU~NmE^_pIs zq7V!klGP23^y=r4j{I7sJ#*zX)Pv}v_hV^Q=6AymaohW0meBik42+7H46NJ&u?w_V z)oAi0MIO6Bw;jD`k_09|nVtQ6XxMa_tl`r6XoJuk zH6xsVO??Ft?_658B+WSxVw2qCvoh4R2g$F)J>7`9iaqdoFX#rG9WvQ=SKMSGFcd?g z(}$NIVjC7R{3YOs@C9c2Cfx0F@}LZ1l$jsGo(|62Rr2YEuZjmwk19p$s3DmuG7aK0 zI~d*l%_BN60U8b$us4nX00h)Qn*&MV3Q}b-QpSH8!0_(h+liw+O`rh25G^wByI%?>lh@Y{U0MUmZ&S<3ZAXQTNZzq%yFLRuB2Ru1i$vwSmXS zmU_z~+~zyLeyv|&4@h2P2{-<@rJ&51#b6D<{o!NSJ>e`tz3;C1UVhXzU3C0J{5h#p zXLrGVcU%%8Rv|}AeOf&xUof`gp(v#lXLqdOY6*F1-oOLLCJ?dimY@R)lQeB3Az;#w z%UFl8h}uH=j*upzJw2Mfy;l4Tu#)hO{$=vA+y(V|GoX{WNLdBpyPN0TxdS`dK2fom z;o8Ze&C77`w1vY+|Nhyo_F6~9eFgzDm?4}m6>{Krx z(zbtjr4!0#(f(lAX(c2DkNm|cjH;uuo1<9QnsMw}lDx)?csqvnuf~Dk_m>$nDE&|i zr8F9$V|+dZxte1ngMxBu!p~l8{JrUoE4ccQLwB%)HPZ|XcT9FUV;%%r8)(N#fw5fr zH#hqvJW-Dypn>S%gqW(*nky-1`m;4yg)l?oOyy!|x=%;EdOJI|GfU)fZx7 zV*Eja1utCDRY^u47vn_7RyLNZ#n~+1cl9>d%DaYTKRZ>{#MO4BL$S9^A{*yK42tq# z8I>{^(SsMQCR@Z!x6lk5vVzJaWTg@n!*}6DqyWgQ!Kg5R!UHKDxR09xs8mKAChnz_ zBd=n#uU-Rd1baluQpNm`iUA|IUzn8&Aox4e{UH^LQoq7Trc5sFw?)$=bDE$0jDGfu ztMmNdWWIOhz?+egaEx~#(6hFV_*oQ`f#)?RNY#Veo8qm5pqhpig)`Q<-?g5~6PB80JbMekU zRPG~82|g0qjpD`Usi3v2pxb}}sI%s1p%y&>phhOMkfUOb?(y08Ew zDZ%FOKu|d|_muj1s>U7Czc1-bD6$mc3`KHf&Qs;9SQBCyIopb2PGiDiJM4po-1lCT z5)U}#k}QaIy4O&?Rjua;`3_OmqLW4o2EQR88xi|Etm=LLI}>U>ww;5+{frhn!Df#X zGYlgNH095_dl49zvhyVTyAYRQ`SJAI!hJAmL%s7!^2TJ^f%Vdir=teAAs-dkt^CZ8 z7jMI2qdd%%cwQ1tEZ$(b$fjNOy2__^M>~AWwxVDY&ET$eev|R0W0?jo`iZ3@=E(zX ze&4n5@#hQcNtj&uNG|!JpLEuBz=Q?154>!wJ>*ZPTstfBI4lBh3lJm|uQXEj7K)?A zqU@kH&b#qW?RjzhMU@<3*t_6m=*sIa@u3=FApPB{we(5iX%2icJyG+;598a{E-m+o zWjz{Hbu=dc003J7pCW2P-#|t5USF^^74|8;k=Zo!vG3ySdK2ypp7LIK5RMCu-76AwIR;ooh#baGO4wD|`R| z2HHWJBuU{8iIl*I|J99n2!*xuIPmEX-zy~XJA;5OzyJxDN+FyKXyHY?d6xmlsRlQw z3Z1<1Q7DWxhRfN)=UuWpuiL$bl1s>blZnc<=Fw`svSLI%>!j8j_gIRoUy%|=Ew6|A zM2RDy;L?LinZ%fd=yWLr@^fh!E{;x|J6u4NZ##cu#N_GInC*jjv3GqU8*yT1apXti zfG;8CA~2h^GINiikzj}6p)fb}eZ*f309&=35bsJHS2>~mGBedIU=?ss4|<0H?@T}^ zEK3w1w4AK{SF!ne(pjMoS(Tb}ADv<#p569i%pqaP9Gh__ijNw3#Z*B2ztDW(!q;Oa zdu23o^VN|OInscIjMWV&>^E^ID=}eZT}K2{3^)q8OJSV-!CV=+pV*lZT06ccVL4bK zzgrYnvia7O*fmzb+-BDaOvr^FHnDC`7#I^cLpEJ}cy`qyGXE>*HBoOjajHXbcX;Es zz*6(u3X!u{?AyCA}JvRLZXO2Z8Hoi^gry0;vE1P_MTH zeh*5@U1OUVI#hiG^WkWV`eWxV;%sxcRk?a}&O2%PG1zsIRP_}fdXYz-Pwg08l{Y}m zZ|i<=0YcPMmU-WPTYDR2Bb=zA>H@E10l)YJlEt_6PdW%NsO*)VJ&8LC(jx4)X1jVt zCT9V{wfv;r5^)Kg-wZ98i0{JIz+%%>5|J41V1=iM5S`t=6qgN{4BK@R0 zEs4XIXuOzT&IK@QCf`4@)8qEna*X8T)^jSJ^L#M; zZZH^V$F8opl22W35Z)C0hZQ;3lX!u+73vUsv=tgD(RNg?8C6IPw*%B&8t@Nuw(doU z)4>Ua8JmLn)*|!zL?b-BtwcXir78xB_#;7lZ0~Kkf;!G9f$nv-0>F`@>$xsb4{qDYRbK*!TJVX!Jw8mkqC z*e_et7$qPRaJ4%HQ8pKnj0~)uMaQH2NwEnak_}S@F|L~TBESudj7<3`Rcfp*-d=V^ z(8k$=(RBA#8SXe*D3)WAB;?8L73}8Vwcb~(i^b`H&Rx;jehOu5;q(`a6R~(&Zn@gg zFHSjv0#!sCeS9Twq_$~=jHWel7}S%ep4V`CO|(}c3s-_*Ck)NK0002ZL7PQM;SVNL z0xSOvn3dzw&Aii6`Ru)yZ@rShYYZ#v?vaTWT_*i* zfO*?M;uIRh>lAqzCXNmB(3%QH@XC9yCFj$djJ3jv_YF^N#PGB zQw0$`M}Ss^rkg0ou4L$jlvLY2h5Y^7nx)b|lb>cy5l?#?crOsF_c{kbN=OpU#1CD1 zF}Q=YDvz&zj&`-ruuD@e!zG8%-^{V;*Gh(-HM62reJcbh_{Ww44GTiO1o+$-CL6Dy#0-7 zrDqHIU&vG4#F)YDpgnYT@%I(hK{!q1s`$;thR+>2#-a9OVS^nSs;bL4dUr+mYHS(^QAFlr9?-$KGP2i@(|JEBu6~ zrh)>TWx>RB{t3K}Z=L)~G4$jL07OI;zIN5RNuAq&E@XY)xx__*>a>qFPKsT776bJ5T0+<+4@&Se$IWx*{CsOOCj0p+!Rg84r55&_7WgA*~_voO*p zV*#jqyqr*fT_-Kf^buXd68Wz_mLgh4YXgst{?x9J8^+?J888sMK1CeJjiH~&8TF+? z$>9yBgI*OWXaA;-T?KD*g*c|2!vuK+tp)EAcmjt80Y!i!KB$3yT|s-qv}(VFtkVUh z7LoNqc{f( zk5V@)*VZTMoY_e|@$U`dzNlm+VSl*gR)O$MIG0!b4t4UY`AmdQIyBAn@RKDE@6XhE z(oa$Nr$rTS8aJmZ8$?6OjqEjZs@@6*)8+X_%c`J@x3J{t5rsMW+@P4tq+q|IvQuY( zm;O|*=oLiIoHILk7NN;>-dxyj3i@>q4zA zf)N;#zT#r$SA%K5J9MXpu^RU^PlqIlnl|Y&>LQPQ^;aHxQk(ec%{dAU5t3d23O#@Z zYk}7bB-J$g+#-IN?mtdTyb!4Ay;_oHh-8Gdhp^kE8-KTf1}4HZ5q%uF==Od223O%;Dbfn z8BGJN+z`g1uL;M-MH{Q&-)?~GSsmn<^3Wyc~Ko$6ovi&H)ve=#ur3gJa$dURga5Il@skaz4U3udWv%jG6k@S5DrWK zQJ8AOsWbS{ykH3AQf?kmEgoJ0t}TtU%>$vHeT?{)oRZWR))Rj&bh%*!-FL(li+Ttr zQXrG8L3-FE+{b+nJLY4-mt6a$j$U}B__6n>eiUX1104hk$TQwcwY-Pi^>P_vn(dOm z%9nsY*d+-C)P>Vx4D@d+78X#i%KjcLb}3v{BmtsYTL;pRNc{q7A{gr-6pppKHAxI@ z^XLwDGt*;99h-cI6eef_)vHd(z-pi?w$L@$1)L zPj~v^^g3_LU?M!D+_UqxFCfBtmSv#~m62Q@yRxUUb1m%8vwXrKCL=jl7k=;14I=nD zG1zY}36>Ris$i;_OLhp`Wibym#9Y7k*iH5eAi`BLM|*%AU9-X#!Pa(}{j2ktyJAq| zd-;mnM`h%NNr@yhB6ehP*2i7FTHs0+8UlZA9@`0;_p8xzIZqjv zv#40F5=g9QW7i|n+sAJQkT6;owq#l#kwea(yyu@Cd|yGs4shGykxVb-LssFF@*v`x za`-C+@IMy#OIFA+AUj3YnFW{MrJTKvC{=&WnI|Y>ldfx^lNu=ki2+^f zieR$eDf)@5!Zm--^`nZZ@dpgLL$4x-yrLaDK*X#fBLWdWatYC_+N z1mr6h7?^()ZSi8e-$hiB(m!AP!e_EuYu8j3#y^I2eLhroAI zo-8`0`8a@s!V>#@Xi>xmY$))t!qC6xp!3kPiHDGRZkah03G2Vw=P0#{i+|fp{7Y1N z`rXxu^3nRmJ6GAv5lFnHv{plVN-h8sCZ@9V*m7+WU84nda$n3E;A8+I%Z!+UPl03> zGfw?@@rf(NCg3j?EGG>;8)vl39Q?A#tCv(sNI#-(mY9>v_g60RDM1PyA zwi%?|#a?={&$nN_&|^2Wt)3qCq@KETfDqno>sv=$|$pO(k?cN*FRON{0_2 z1v>+7%pTgU{iPhbiI)h0xmX-Cm)CAZ8&(fL!_)9>En}3x#Al_NikZy6t>Tcs6J>(n zM!flDx~=QRiFCm^WYQpJOK#jqU{)$vJ;*Vo@aTPZ>M8|1x=jE80ti8yh)LlOCQ||< z{{dDV4$Oh=opYy--c?o2(Doq3dD!rcDCr}G48aZ{5`40xJ3iKaogcCZIJTH9B zd&pc&a>1MCX0LoV4n~*+A4mQr3@Z=|5T%6i7GT?)ySnQHK}=0>+0=qCimvG-SH=P_ zH_LvIp!rYemX+Sc`-Qo8^|0SQ9t^R_``6vw-E-CIYi)mo<#h617p{~&KigsE1E6?Z zvyi;nUeBP^E1=Jd?5pc93@!B?UrLNJe2_bh+VgVJ0}>u#kF)``aNi6E(FC_efwp09 zgg{V*CTKMV#wE#t?{mL{&=G@`&~#>*8{*3IWUeDxUc5ghT0*3l@ga09kcRGN%75D! zXegz_pg=Kuyj#FI$3}S}e}Tgt9cON1Bl8U~jd}Y9ygw*!5y}x@4qLE1f-(>F_1v83 zuDz-*TUI~)Dh-4Pe5qU3Q?K7r!B$OUa6sAPElUXRmP?Oqp$Zgt zl9`^c6L3l18(QbRqD$hvXRLp+poB}gAXM!o$*AN4*y%2SEZH)aEovhw%u`MXHA2iO z4x_Zkjl!stGhc2MkRYpXEm%~1mW0?jUuQ;nK_;)TFb76>{>}cpW=lJ)+!&CfMWDrOvX<+8%Fg2PBZG5f%Embz+OtYs~Q)?Z# zmtk?%Taw7hRtcNiPw=kff+6)3Q?Ht-*2Iqv)(QR)1?~N7u&whG1_ycWrkKbzico@2 zu6g+6%aNzE4DnzO0fHt8dqewgv37bm7Fz8^3b^V@UDW@G%Is78=xj~G4AN^0y5Xs( z+BJDEjZz3YCw|byNtIwC_ICig!OtdbNu1OZBr<}*5VvZ{ENIiAPl5x1b&-C&WX|$^Zpu@wC$_xY z(~SIQvfTT84Mf`zK);aB&p^I*E>h$aob{O<=K)=3Qk(b=*?VgWUH#m%^mEI^3|r|a zcQ^(qm~G>Oa{*l=H46YLkKloN?YY%ZJ{c0nQcf>K!}}iqMpZ;Wb`!sf$`pvf3S!3o zFOHO$z5|V_1ud}yxD>o5+y!6EmCmkJJ5U1Jk2Sd?WLII>LM2D>4J$YExf*J8EZ86D zKH61{)%%?u2_pID2vG2|JdhoVY#5)mmVsmA);qMsh~qaXtVlJz)AS`~3yDI300s%Q z(MDu^W%QRc2Dt}HCR<|!?x=N8vrd*+1Is~Ke#^ucE>Z-E|N(5qy zi}>2lf9|tpvPI@TM;;}OMA}%GECYD@Z~j_#80(Mf*8cCe5lYmvB~+tvBll7+f^po^ zncp<^x?ob%mYkM?;j<;oBOJ>n2O3%^%H65~=31x!^&I{y5z>Eq)Y=$DtdTM)qM*Ur6X8`g@V0x5_c(nfc zkYZ%5zT^^k%>unZ4vA+_OJnbn`SHxy6}{yzrOJamo0tkgl~U*y;->;d*zIFgeDzpS zSt-I@C);60rD8)@Ji>=EgY>l^-frVG6PRtzCAq?sD$%voMo+4i^uiA?DvWZUIBSYm z@}X+Q_F0y)9Br%{h`3ABr{1f+m>e=tydBAx0=Y4)c;Csb%t=-=w(bEZji-#QOLApwM< zV2RJypAC=u$jRGnSu|28X`TdpExv40JU5K}Wn}+e*22vkOS|(vr*V7bl@BCe&2TYo zSh2#UO;npfAg4kyc!l=1*`(pg{3OPaMM}j+O`G6Nz!r~d<2f26)qL#k&0)<@z57a^S%kKpQ6pMRtsCvOHuWY=pdvT0ZyveFbzJ8Gq&$Jq(h znIgBXF#g2v=RA;Do_doN@_;0d0O42IiuSxQ`r|92hq7&=iKWDIiR=%Yv2)Ok!wafx zl-hdAxywRAK*7ztv0qCjJ#x(Uz1j9lfut3gccxYu z*Ty~fjfM_PP{i+sx&b7qH)d|?bbP~hAziS%l-NT*qqeO`i7%8c)^(+VwEOqFbafXyoneu^WQz|b5U@xB0$KyJ950` zGDDkUzj5d`qK@_aXe|uf7zj~m5s@Cb!|Tl9198L)xKiXHRn%sV4!`5QXH=)=#$AuOr}&C<<%5jsZCZ5%>2+_;FrRpjW4xQ zP=~PpC_OpWP_PNZ#+U+$OE(Vb6epFYAc3B|SiDUpof^8N=LPP(as(EN!r~TyC9Y1m z*TAc%*$Jy$uloE^5T!AjF6**58Jj*+o71!EOl)tYvj0B62u^>%{NW~HU#G_*=B(?H zQ+Yigq-05J?UP7#Jo7ho%c{r20hi1J%hTThKOvE~DTw#U2$X$cVYdRi(RQcLU;qFC zSplEPYC_-ev4l9urOXR2FW^Fp+1Urrm_VhwK=(t2H=4)#{mx8{(#9dE ziT#?!)kmN7nUCJZ?$mja5j*m-Ru|-ZHtzKL(*ERQkXEV{TL!!XSrT3{bBv&$K~hba zS5Del(*uG_9LpA~$ZV;-i$UYc1EUopWWzhkZJ8k0*BV;lzz3SOnD{}@r*T!CyL7|- z#iaq6RiY{9wX%)b_%v*}=EdZi#cri;Uew*ZTUZh6ibe3-w*Qd4kmoHgUvnYkR~9-yU+?eFy(oW~JXFle1ebl|NT zM!y>#8+pg0VT>L0BriIYXLEPx4tks|v>dUgg_h$O(MrjgyN8Nz%gY(p)xRH1*Qy4f zuVPLw0*OnZu5?vpYZO&u;%r)b_Y3JpT&M{N~|FJUMt zu=?J+4cY8&DBE_8K3RVgJgz@$;k3TG=C9%?S(|Ps2D4{%fc^_S5wpQ$+41NAl=1tJ z10|WoO|2iRu{gEgfO>>zcv~IW-=^1}>LU%Em)2oD#(=jRmh)QRIvoHDWf zoty^x?rTS_EI|=9xCq1SFUG5v{WcN?vMT;WkW4D&! z0CP*qDNCz?I_vWopatvpH-_gA0Z-X_`9#_p0<5%pPJgo2Yd5%7U?WJS7crq?=H&99 zd8f&qBw0JLfb@_Kj=L`D?gcIJ zPKTUE^k9`?Hj-48W!y#0AQ$GQS@x-}D%SkMvvrnEI6}vf)^=iy7H#=THC;X&%|Q-N zbTaZ&=583d-E4R3zP=69DVhypJ4I3y+2|k>r#FZOh_K^Mau}Tp%@C`aQ#$nO_)E@> zlt4f5&UC_}-}WN(FhGp(sRgj1=E%K5i9?aANHqdtwtnwnuP^GF$Mr@}3)X-C_O{!<10?^aF}#se}rYTq|z3X z@{>YLjcZERo%KNSEr2c=?mO*bK3sF3V>j8I8P6eb%>D>xg zZzXma>pI87!Z8s%zdx>^V36j#8gw)~R2HO8#rjkFOE$ z@@C&hLc0>4GKvJCE_8hJ<%4cyT={_dho_s^8ksO!hg^3yHq;+KOJv^T8^mO1y23LkVk0W45SNkkn?K=Q+y`u-sghRrjjmB}0Wr`Sd$=$t@Ck#bBv zD_I(tj|<(+553@SUb@Gp7Uq5=r4p>^pIDD5AoB6aSQiei8&a4jW6l*%7~!-gwB zT$7x0R-w6vUb6|wDaPhy1!5S`B1XcXKY>gbbn#mfhFIfq<8CzPj67U(V^L>+S$l+D zj)|JOcI7r4ljEN6N$#OPF79%ObM!+Te~}Kc=v9`F`Bh2Y5js2+`D}p<$83HzpjghG z1V8>eXijJ6a)2wJ%)WAHdUPd}k%AyK5v7(Oj{gSrd%VW>QYgJk zc1+>RGvcmE0dVjdbR6tco+l&juc5_ZDP)#)VosIq;u<(7XK2F zZeRkD{LvN{_ea2Fq<1(8Z(QiC=OqwujWl6T+a?|V2-rrq@o<)yPt@gd@zEHxn|@a9 zbhi`zuXH3Jj{Mo$kc)%B?FNsR`1`okMX`)Ih9rv8v((X=QgHS+JIDdX7aH-6izc=E zUkDyY8|Y>10000jL7ojl7XMm8zCQC+n>rUJMm*rRxA6LL-Ivx#^=p0*Oo=4M;COcu zyG`13bpQYXJ^`LAX%M$apgk-mVp1?_5zVJ%wyv4POhM)K;8A-tZ#$tS`g}~qMDV6P zX97Nl0;u+j*(mHGp61@QzeMV-F!e+*UcfP8MR6kR^C`}3cj62vYtoW#M9NIRHI;KL zVzSwcY>Tux|H!Q>p`nEzaC-vRS@j&-C{Zhnx zR1D&E&du(pge-)92Iy}NXHAy)dX)w{5$Ach`hk<8K8mr2A8s|Fe!#NIn`D4HS-{^M zN7muWl-vUzy>w8SIq0DP00!MbnlVYJL1>vw3BOFd8ar;c`4-bEv21O(If23FO8{(B zKD#I8U$+-)!HY7)9#ZF~w0rp(yC?XORgktQc3nl~>fE2p@k}F*jzm3KWd=|&Q6@wq!&eHCGkUSK;c-HO@Af;UiZ6$f6S2WY87Q+rcKXOWn^b@ z8c_Yy9u+*#UYeWr4g<=lnM|Qsq?}QF3e9P>0WdlYtau|xKnkILU{mB;{@B({_Nqqt zBU8ENkMGEl&9{%zX|y_GKLZKl{YuvZ3nQ~`BZVdI_db7#-k%iFp5HJ`_ zJ(EVBgA}ST}_FugLR@`0BS-}NA8RBh8<4LTmGPLiy0JM(xe49^c~aGlA01_42Gt) zB{XOaHEfubxPsx^H`HR!tQ z6K@8QDkG!R-Wn_Xh`rv8g>!|)j|svJwX8okMteXoh74j)IG|m#m#taw57x3>h{-8L zo^az&_}&i02cZP4`e!|%5XOC+nV|}diA%`uVe{TjT)#V=wNBM!Uua3#Ryn3cSsxG6+mQ%%bPs>&QvW6Z6& zcU+2sJfPresAbFoFd{O7+UGrmz%Sqgk=jG+>C1N1z6HA0pPbXFUcX5U=lv(;jN`kM zlwicgzYzjfxsSi=Z{^jFAZA&ltUV!b`slHb}Ov*P>t{ zf0SVo6>Z&wRcMDmR^P0(Mugyofr1!1ikOzG0~jR>|E?+cztv%%CU}5lVXM6b_w5K% zoq2>bDI+%c+_E2OoD%!Q4d2~liBnI*IJO$P`vRSx;jEU=X>)W*MxvfVs)`&`bgxh- zq)-=wOfLyMHYf#E{SB_14Nfh~FCpKt|0uXvuD_DzoOk8A83&u57Iv_B<5X+m?y0L>o=jmV_Kc}=(7tQMP{;m4FH17auinl7 zF<(sV*K!2RIUdHF?I&iqx4D&~G6L#c*kGEhVFYO;ObCjlCpe4b3|Y22F2B@y+PhT2 zju1Vj)rV7X@|c}e$NLe=KQtyzM7TYm)@Msq3C|XUjyNaGH~$#>G+r&VBWs_aL852? zMKyjk*ZA-i{C_hOUu5scEjxX#4iu5s6a(^*tl}Ic>YDidH*cO?#4qx%ij@v*8Oe3> z8OYkcv+%Q7Dkgz5S?SD*wf(>^Hf#h7qFecRVX-3#^L@#oBF)%x8it~NxysZ_B^e?0 zvO6I}_6*5IvawYsZ@6DEy@l`RAs&ge+Yy_WJ_+{&NOBkgD#3A+-&x50pqNKAniY`` zKQ(?7R)oAO^SjdFTqRsqW7*AfPQQHn3%&$Y*+nSZ-YGHy|eRj2v? z>9ia+3GT-E{JmRMfRK4rlV!C0^jdM8$j*F}6ix>3+v085(`o`Sh@oMVc0c#3x(J7}%>R%}Z-MDR0003O z0iJJZ5Vu?m;#URVt*F)@1FlQ7$jB>kQu$1N6&GXuUHRO(=!Gx~GLdn;OSA<$X+odP z4Un`j!Nwd+Sd$a(HW^1V;i>^c(4gl%9;Ttqt9D{^ZOGDLMJQ(x+wq7GEpt8zsmoPD z|8o~w1w8lIi^+$WT5L0dr6Ud)y@|<`c2iLcAltL{PUieY2i+Nv+XQWgb-D|@i<&r6 z41&@$xm9&{9Ya4m9r;S;5`qTSqaa`!LWUSgw=PjxMuY)GAd}Q&9;l0U0>z%9E4=jk z$=ZQVm!iD;OE`p-s>ItP{R|vXV0yE%2&aKZa5xX=WeGV0nR`z~kuU2nH$sM8`a8{3 z*Rhw~vc<}HL&+JSY=2xQ|rZ3hvu*EfPZ)Po%xS8qq#tnY6oEqfcV z{HS;n%j{&djva!yY-Tn3$^w3yFVO~~6eq+ld1v=BiKus1EZ_gpPlHho`%h@>rgv%p3j5fDBWWzZx7dLl>{vx+f}f>ZR!skfV4FZ?lR9hP@1Ia!5AiEN~gY{B4&p-X{Cqc75-2wLyMl#eN8Q`!!QtZ~u3d1Uu`Ig#m zQ`#0|tk242TDZiNvLdvg)at>@0bRIN*)<3<zu;T#-4h-wgX^;IaB2dl{~lnQ_K_!Hj2vu>}vateFh}Lkzkyh zcS*rr+qH1r-0n3j7A>2=fjr=Y7_b1S_q4m)qz`IM6^cL9*B zNE7UR0004JL7tOE5iBMDPm10j*hPZR(D3jZ3{{9r`usoVLF}J5lqP}!SKu|vR8!|y zqrMI)xi}NXU6T{TVZRGaw$`riNfI_Y)JyY0PVdx)urp>}^z_*M-{;ZUiM6lpY@!Tf z8rYzw9s=M{)BMnJqkvZ5ibJUXS%HRYxReA)X5q8OI7Rrdr9zmj2oAP`OpjYAj_Roi zxBq5WLPyz2qcw%uXl%eBt~n74u=H9+qRZ!C-OTuEIXWFipT0>r(1x5VMyTa_{+ zRTLg7&kDaAi=Bl>U8dA@#DYe@sPlZFYfRZm^oe_PfoS9r(a77! zPuXp7o8nw9YoTVzO=h{p&Xm(Qj!OQ$3y|GwO|=FImZ(QZ?j$3Xi;t=PStFZP-hVvY zw3>wFbyJ?A4SwH15d=^WC7NM}?WlrLK_TO8v_Z_5CYq`vT$KO7{G(ZaI;U8LAE#V; zQUg=~006lGp0ad8-!-#p@FTT2-;8#%k1zL;TFoG@+IFIT1u@3?>_S!cAx5_eCsNi7 z>{`sf+!q%Dbr62-NrPu68se;NG^h=InG{Bj*xJ6m-reTp*>LGd#a#4(Yc!1Xo`1-O zmS#}on^gkUVKwGTvPDyX<~QwrPig$hz)`rGY`tYdNO>TNo-tJ#)y+!p%R5cQ$%jyF zJ2=(T(DoAdKm%g6w#)#fk1bO0D)gS#!U*|&Hf>h6{u8}G0000w0iLvKLf=^AF2oQb z9B=2RET^luJr)L`ba$zgckfg|;K+MtmLcXaQFi>vz~_6oOF`4Ko2+E7Ye4`21D`>f zw@Ii$Y?(|HOh*Aq7(zb;GVC^!q)oZJaM!_AqM8(_W*7=9{*i0iA2_gsNk zlqDgU)Q&2a0M}-|YdR=v~a?8?jm>oB@ zg*5a5Ym6r{2oex^`v;)lxAU=8RrC8f_W%C%oR$( zW5PSI$np$P+q%%^LX zcdpRKl|PDR%^UqbGUMq#Qur!@P=(~2L3f7HXjb;og0I0LsshAm5#43^YUrIY+*hMc zsPaMh$j3U^9CgenagnmDnFh=y@vk@`klZ@4a)(EaRszG(fQNi}FtSI~udJ6jfJyGt z&m1$HeKj}>B=JvbjDuXC3MaZ(h)68C}z!?pIHz0Wpi37Jb(~ zvJ22BPk zIW}}JO5+QuK+E-2JQ0Fplh2TtnII3|9$GXdChnbxC>6KpfA!b+K}-uVEp2|vvQdJr z;kGB;JMZ;3YS4Wl5pK&pH14!!%ZsBYGr_1{!orgXjB*Z!OA&Z1o#w|2T=7WqEYrhz z*S4s5#SyP+ZpGLYkNIiSLUeb;Hah1An~M?hWT9y9$B6a}U|)=DPuZjoH427%33Y{X z#D;}v-4qIKgCP4sZ~UZyAAp28_^|5AU@DvbvHr3Xo)U|Hv}!gi z7Uq~1M*$xGOgkda<8cnb1i>)FTWzRw2S|LxS~x~&uM%(s!|aXL-B7*?CXOU&R=|9K z_n^gW%Bw zo(&AR!1zZ$IYG*#qHHotVY$GZPWcLkveaD;$zS8-$P+Alx;!skY_j>p3?A~l&`%bW zQ?hJe$~uXDUllT~n`+gxaUomI`C5LQ5gM*!e|SZ*fix|tFO&Z-k9p7Ppvv1#k*F79 z&juKO4JCZ@JmsH$384e9iUy-w27%`=$umj4ybCH}IoABPa_e%g_a|>JpwpRmk~!Uh zVDGSo@|9_-#r-3qy@wmIlISdM(%Ki6HpL9q&Ng+(LCXrU!)UsXl_4sZfd)Ry5{;jg zeJR%oQL`4IHl2@svbuTb#u`)KKok~?)wIkLT{V*eQvgNH^MShVUWA-4zM*nQ0P1=+ z1Pkl0d(KIF4;j~*@o0YOoz~vOs~z}`3^muG-c2$d$_{sR#L-~`>*BQSpm||hWkrnh z5XG<00?P-I49PcQ_)q{o^2_bH^x+BHYyt?6FP5X<(;U}xJ#Ge1N8fUQ{Oz22TiV(P zuP!f{RvjtS`nZf^Y=VpWT90R8mV5X`^QbDLP)AW^1G`#cfs%t$yqgc@KmTMth&;6< z+k!}v;oya;t?5$bPQ+#e(Y@y0h8%LJ^cv5HwUK!E-+bC|tZ(P<3mz!-O7{A+fQQUu zo27zjY$Y>VkQAc9(b#mBS;KHfc(R>5({(WfgpjG{aHtQ>guIRG9RjxCl{j5sOgl%Z zt)jE0IEj9xW*b#EZ~_`y&E&CaTNrdA26spgjt^ho#000E_L7MwX z;SPzEz>NO^ZbyI+7HjMOASaQJk>yn?OPdtv)_+dZ+~P+DX6mgxR7(rZGPw{`OYqy| z*2d(jr5xsx?+%RYfqP+^XWuxEhuiu~+;loqS}48#h1WltXPGGEW+$9j?m43)4U3RA z2&;2aY_IVyHW`0QaidU0yIugDgng`d#nR572E5N*9Uk)D< zGI^`an3m|1Z(WXK74JhN?5%T#X}6u>WeM}vK3-06%9C)wxW?V$t%}ltnkdD6G#l}- zx8fi$o6YAYF*xel?Ly#}QPd9i?5yQlfufi_k%yK*b24g^Ctd^w!X@=5M}PCKQS~j~ zQFyig7*Z>n7|+xp#d}+K7fli5z#cNxGg`SxK!?(+|8RvxGgVI7FmQP!FtssR4x~#R zvnQq_QAn|ycr*6}+!0TabG{ieANC1KH_qSO^9eu2HeHxDUYmiHiU=Du>4BT}jGT^u zSr@Z1$bz69;Qux9cc>OcRqIywuKS91b+8kwl7%C3$ z#2rP-t&CrD?pF1VtnM!tMll|{VwCaR-i-xS3mPBf7k0LI4M?%(q_`{M_o5{OG+{OD zCHyaVt^t5Z2YajXWYzjJf4lg5>c`zXMh(41b|NL0g(9i4ZP&}4ozc=5-nKX(AfjEm zn7b<|4a?Ax3muYJSV-j&p&yYFOby+buA)X29MsL+HV|bP=-R`OB;E-~JBC!R8e?n+ zh&g8ron4P9nQpBZQdnw0bp1YAtW3L%<03g%RpmeWay9ySZV7tGFG%}xj0Svks&O;3 zt{M6FUX5Iv=$NmQj4&tWdp<*9nBqy-OnB3%^De9YpXH^`vWJQ0ccrVd3cFGIzVp`@ z&agc`K&+G?aFQ9^X62&xG&%rn#2r3M;*1`S>-<}}WBfz_91)r#6L6jx87`0O|JbG| z*bIEQP|dyM7yQj2FGI}PPy>|Wz`G4G?byyoy`bfz_6mgGMN6>w{uOO9Y=g^@KqhiR zc;0>e;WD?nX!K3jsa69TxhF#=pl$iOCPk;e`QjYgf_^tio|<()x$To~R4%s7j?GW+lr4hPMedsKGo0H3A#-YV@U+ z>iV8L=K!>iehZa6O$(VMtqI$gJ6kLl?+kGG$f6cEG75hEg&<~^&Ux|-_wkinrY$9; zu#d9pA3L8|${j$l0YE zEd$FqM3{kfU9J713z@f#^VtBaC-6vdwAT-*oOM&_m1E3vDasG3J6A3&0N>f6T_wD- z&su_>4D+=i9VPRCt`Mb*HFfgEO!|U|UUY6A6L#EZ};*m2uhiCa?kH zz*gH+m4%QugOD7osMfYj`$g^I?(0E%xiu4cgsul{1SIJ)i!}HEPLi~qUmEk-=A95x zHg9B?AW8T6*dgSZ>_fC7KmY&&5<#CDLKgoejpSPZgrK+*uxNVI(41mqIi2L`W{0xU|uL_weV;*G;{Afg7C1M#y6c4EECr9*nX3}SRzb>1_#HLaBuc1Np zXNd~kZ@v(v(gkz|o2&ercAc+sN>hbK;hFS=O!|gIqKfxsBIgs4b7{vYL7S{+NLcH} zb4HIzK#`gyzMX@OM3gsOqL93uXC)pP9_1?K1mKn%Yv!=xuGXC0zWAKh=)8-fw#C=3 zOZt{@SU_SB{D$owd#b=?LS`^zC>dw^R%_LBd{%j5(TegDYJr|qDEYjX1tg<-@^V8=X^p<=FyPvhP0Suz)D^y>|^ z1}8HA?qGlb00cTgn>vw2+sm_uf|pF;kQ6Hx>ePV+8N;cYeaWqY9+IKUmH)qUOK%ScQ`Qqaamg6K?Vep;Zz~ila|3M5CWQje=IqhA;x1 zGTU8#YEjPQ5uK#Xb@AVp8^Mjp;pRx<#d~Tc72?j}mB8M#sg@AR7;5*trc-XdFQZq(IE9r-FySK`2g)@)^P6H6FoA^uj)SI!3KyfO;wYMZ z^L#c3<5Yvmo26QVKsALVP<8EdiC>on%Dq{In;BJ*%D&e~vlaoe9~PF$|JtyS7uxWr zt5vwm=l@xU+yi_gx(&Qe|FTl1)mp-A!XyUr4QkD&ivJC6>DSW1j}asMoJ`6sDzJ~U z+OwB$AQ>h%Q&{bntia@4wSXQ1fvoV?R)ly>_M^+osJ!o*L5o+YqbQmq#H{U;V^3jc zTg;O{V~}St?~KP5gKwjMh-k*RYEdr&~rPVC;tlSfd;53t1NufpqPm?~?4s{EU`4#)}WJJA)F z<=AhYr`=u&r=+kSco!CY6SJd9Fw3RCZaPVlkAE`DME>wE(~h7xBc*pv)QiMfN-AkR zV+t|RCzibyaCcxv1EJcJa4xDFK5Z<;r>*B|`jE`sOz&;_r(Ij1pTT}Z6{t!)&l}s` z++ZLjfE`=i$A9)qyotyxn2AQ6!y-+%^!^tGE7$N|7Bopfv*|F4-k#aD-Lp(_5zIJLWxh3w1JA9dwc13>4WPKfmcs9weq?tw(Fw7zt$ za{K?6U5(U?{4yYjsim}AwgNy-NRIHwH=)aq$Dnv7t)ow%bq-R^eeb_hytCvBn<1ZQ zC6iRi>UaIz>%qCuNoN#P1oWiTVZ0hr|o5I>u;r&op|CcK(ot3o|*TGwsK zUKz~Z^DDf-R;wsuNqJ&5OVw2}gJBj;UX2Zm(6#tAvj075IKaYDKQUHul2m-L8ci=| zl@1`eXy|x~8Pk@K$82Pea0g(>0cYYw$Y7Nwy}^E%akbBtnb!1X6a{*^1S`H~1lQ|k zMjb#uw*BE$taK*;010tHn}18;2t%SD1M@)qP!!QIU&5KN?Y#Iw9!UF?ViPz-$Tf7b zb_Q5{b#@;}dcE3x!pn!(3&)B}_w_>w5=~(UH1w&Gb?Kx^QjvT00^&H$wld~Lh}G+O zlSvNIT5R!x`gKJ`?(pX zMPfiHN>$oXZxQZBN(_=T=7V@DIfFJaRLnx|J5Q3&Q0@z^c^&)fjllQiiaPId>rW2? z60pGciqem|g0%m^?Uq)E7~P-5Mb>6~i`*P>6$t%^6?hEj2!SNU%4D@N^kgh~jNZDw z6;4S*^@6j}k}iE?SAwZ)B`>`q>>fq$G~f)ScQz^}r=o$jI4`7{#G7NB=jYyY=9^(s zC)e_XB!W)ltBces#4!l;=V>DkK$Z? zPH$olImmvEE7EYx_;`kh0--ap2rjcJOS1RLT^gxI4^0S{9)_I6EYQe0nREeuttaIvN6#RABHIJI zO=93DL1j=6sX)qwVk<{Qm=T`B4$4D_15hIm-21MU-8@OxQeYH-!*lPlCVBj!jlckGd2@gclqDP*J>#5@ z5gE=bpvCDdz6pQJp+eg8!QgT3ciX3Q`P8o;QWglr# z`B;re5e^CHoS;eLqgKzdB4FKkwm?U>2H&I%*snnTPJ0OwmYd`Qs7)@2^}pwy8bJ*D zI2Z6yXKX=L(&e0SQVFk`Zy_`ajc65*qajr;c**CwJ6%9Ou5+IuoTgfTFgf-Zo% z;3`j~cYvU=&r2$USVgIXUQ*z6sd@3s!E_N$6)vPG%L~37V z$uB3%q)RRNv4ZzkPJQK72&87>EhBEyIOh8ch*YL>l(vW6qgzn5TxCNk5jpC~W16GW z^f|Da1AC^ISG_oSnvtO*M&z}f8vD7NmU8;SGGUyUY}>th3`T-eG8FExU6*CVf~QsM zkVDZ~=F1pTRI~gkUr?>7O$JghFtONcLSz{koz7qgynot!+samu5V_LG=B-K1`;&n( z=D$!lDcXVqfg+!|B9OAm^wPf+-z+8H#w=LW@@{3_3Cm-AQ#sth1ow(#7eN0wjc)3< zr6S*}^_RC7R=_&f03=YE{CIrgn27CbNeY2ZI z<@$f$z*I@CXVl6J1R%PIQ|N+CHxYEm+(@eQ++wed=CM}uHP_p!0e_ZC5$*+Bszj9C z3r~%9C^_nC-r=~9 zM4Jq!WZd`k&l2~#v^wGHQnp;sdAo%8gbBsQe$;{|*o3@EnFJHjLp$<(JI9LHN?H3_ zrg|&yD%wi+s-;7hYqZHZ3GK(BM5sP^t~=U@zdcXfwO*tHNQ zmsa(!=9e~TW7gupKoExYy{5DU$+(MV)EYnN3z)sI@Kk!wb3PIy>U;bo0x*nsLT+C8 zv!h!IlQ;6+r4gKI1V9%PZ(P`yHhSD=n_|oMN9F`dg);o$V+RZ5I+W}baKBc`rK3uJ zGYrd9O{86>*zXF6ARI)A7H7c`H-Zwc6MC6xUJd7zTDf#2vvx7MnYVJI=1ZA_TGP*$ zMQ};+y@Zv20q3579ZguoWhKtG#_9O8-OJ=hI!tQ*-cb2tpm*FL`we^Ekw?X$&GDUk z*Mr#w8NX>Kjwi$lF2(JhbhWx}`p~Em0etRtTu>!n0y#cn9o4)8=EVq%y$S453RIAEqdP@u{dI9pq=MH0P}5dbS3UIw zvGjihQ|EnksB4?`cd??7a1f`q@S{BivWiu7;y(VzwtX)b8H!07j%e?U%KlGT8y2w5yn z48ym8V12-hW-pBrQwSfkSk0$QQ6)D8m;@|!u!A)QogNeJiTd4FfnZm+z)y}0*Y{~V zBMYak5I0FMhp6u=L5;_Sb$=J%Z1vBdTTXkVK_&rs?4E83J}PhgicLg`m{U!c{-FAr z3V8@sIxoqUq4-PMO;cPdr6yKzZ?NCqIvUr$pUIo$AmTLaz~Qs zxRZ~x&HjYOFz_{o@YV4~RqdEsi(xPprWL?kDmu0hpXNdkwJuAMXId7n@OPHUB1>(X zJP^^<_|dY9RF$5F?Zr4y2W%efxP`2j@E5c#KoFT5u535II9TF<0005w0iT{~Lf`mH zBKc`ypC!0w+axKcWx_jq^Va2rBY~-S4gf}_&U@G6@R!C($fuZSyorzcbc+@^9Z(s3 z;KlFD3iH3i`fxBKAT4q+wA~}3dV1_gkeVkHLBWr%Cn~?pWg53Z{PU8L4kCcaTc5Buo{?~$`8HiF^Q=pn2v~LGG zgQAi=M&DJyRmZH1*FLQ73)PmdrI#Jt;Em*VIpE&}saS56Y@ zKOpmiAyh6RDq4ZhwevJH-(*7y=t$)>K%p+LM&2ibewDWJ{rJEaA!b}!z<;N2>Gsg` zi3(!xzIv&vwu%n>bvW^@2tX#FiXO5ofOW-YETP!x`2jQ_&EzQV)TeZMWI0jF^z_8U z!mY&>TH$~I00f~yn}JE;4<=IuJXZ@Z4jI59CgzLcCPQnLrOX3%pi&Y(G3_WUQg}90 z$#IY^6ZS%j29!V%+fN)>;Ze`QVQuFI59L^)8JHvF|J#>?3`^ELdSvIwWGu=erE=0m zD^C6{+WCo8rVV{yl~?W-Xc%Xa9qb7AG|S04LRiFZmO|=13Ic`0{vMHzdnA?7wF7M_)w^3a1^p17M16A{GtC_Z$ASE;B) z)5KPC<2;kF?Ycpa6hOptnl=)VS#jDBvNuf{+U;D|b5m68;i^tXOXhy8>3x=3G^n4c zw+ZZ~DU(TN$zZ;b738u;cCbz^aG-tC0IKQF6vi%_t zlH0>R2SM3;HqgdFMf%e-u1n)f=yqxOcJX z|4{=rvP6$OH^eFUW$Snd7pl3_X}nx!-SX21odkMb9ia3k`Ua!g+73ogZiX z*OJEE%{HkCXNGPC6`kX5+vL}PT`ykH70u~;wS+-^$g!bRvrq&b*~$zJ&|o?3BlkZ$ zh;o$ij@~3WF{&3yE6fVDKaec<6D9UJ05?o;<%=pVfh*J8iymIpk{e{ecE^)1_om&(4Y$Kvq(GL=1Q>%qAiKJUA{8CG z^(ViL@SKDmn4cv>8GrPNmCCEv*GloU~uIEBVu}7?+&pXqz z`YMufR5mqvM3YR7DECM6mdf^dFoG}v_KPr)DzW~32jb^DA+v__nP$+}A7@EeCPgs~ zk*^JoUUMOwa)CV~h|*%*$;8%Vm28k4ERO4es|xwNm1z}VKiV>N9 z;5y8E!G3MyH_{6CxDmJ^2yy}i!glYxpq1%0?eAmhaRZncru#6!6C?|=ME>rC*v}Z+ zAO_fJyS8Swh#JfM)y_OTgOls1r12H%Wbw}F5OuFsJ;V^h8mlFAXx(vz%giMtzi((N zG?&((KKV=QG7^|LaM|vx{Ri~=Q%Ef}aumXV=C9=>0?#t*v;U4~ktxzVSC{NPo*H;@ z0001J0iU31Lf?N~)Us^21%E6)MHEX)U5U3ycF$3GwE=O~1ez_bj1{pgb)}xf2}ZWH zlthGGT7IB+zB1X=#PyxkOb$LzE8W!v$y!vXr&ciCZ?&ujwILi|w=5>x3jJFppyl8e z*P8Xf000F@L7SpU;SVNL0xSOvRKBH<^tMTqN{)IsQAx8b7?^8MPXR*~VG%3KhU7ob zjx73Wbxh{=GN={f4Q`ZV$N*S$UN#|?e@8k^MmOIfF6oCsz1n@4*#^y54ecE1F)h(H z>Z+~Co0EX=ieb+8^eC{98>$SikJYva*LFvlMigiAFFdrdu z?45e50n?T2DrI?0a3MEJS7&0v*_>mD<8*4v@4#Sxv5UW)Ljma4Orh(Z?zy~f7K`g- zVL*cSDQwqAjlSY^0YvPF15_xDTS8zUX$rwR7)jvNY{$C9!o2566mT*5qUxTmJww2$gFE;yF(!&Uqn3lEDY@)eXXQ}eZS-C)^k8N9HIhQgR@h~Ow_rgt~b@9#Kj7kKI zncvYri~p+I(Ze?fF{zHh*93_$YzT@*w{h?p>%q_;e3!wv>^9a?B=%JA&OL7AX9hXW z#JR!tL*>M48mwY9TjBVp!r>&I6-i4Z2RLVY^=V`ijmp^fQDYr)k6j2>t2i6bZ!RT1 zW$V*jH24B*Vx_EF$?e=?t4(@Wj`gahc*3+TFJSJ>aqHK^1{&;fz3!0=QD}Je>qpFM z=3QC8L7)JzsoK9@xFzXY#uK*qKSor#r6?y$LpJsDrOgg2aFh#b9(wQq;R~5Sdyf6p zow)Z&WP~Nf>J=}ncxY}N5~({onrD90x1=wZ-(Wf&+rTm2c{$7LYovcJZX|aWE-zDW zY&S4R>kB|Mff19pOmg>6xjrqTRQ^zPO;=yc{D65e1PQ74+7{>Ctj9S=32`J z9OO(x}ucw(KIVyz*S!TPS7>1<)yMp$*9;8qbLU^t-puCXxa zTV(Dok?Z<~kOKzf5VuJBi3s7s{HjO0SBb6^=@jQ^N2%_6Db75${9Jj9S_$7KF*ZM0 zQh)8B&R)1pm|@ZQ8zUh;p02E#dTI-yWfN|nJ{tXd5#3~H!Cq_m4tWkSOJ1g!9{cI` z1nuh~CU$({Z2K9|5hz%SZ6%a=36*L;RSphe@n*mj=7dsZ?w61~iRKoE@OT1XMaE<; zsNQ(2_FfKx)LX`x(dvC}Ib-#36qpaeIvN7BE!3Y};iIxd+IyX$q@U>o~aRp0^6b9SPpj z#}%5$Dv;;o!0hi3qUZc-rmpUCYcANT)~H1;%4xX${qJk_$8M-_Kv?|ArY+}?F+|&) z%A==4&RcJCoiYSYOo7bc1&}8()v+9(YFP!+cBtE!5-&@VpG&9zKwG zj;l_vrmuN{0000}L7T%#;SVNL0yF;s2}{5jU?wXlI>;FT&kK5Ps4xl9Ec;Lx^*wY# z5CGieJ2M5fnLS^GZVH`2%$&*QebXwM`7Jo1t7fJIkFvWCXGkzQB;013`Po8(L3 z2t&wx55++Ia0su(H|O^Wg=W^pqA*kMM-kCD@N4KJyD`sQizyc8icKS)P4o{mpa205 z_e}pv5t(({U|gR=;+1)Jqf%c=q(?J4TWe166!f(|8p#kw2bZ7VjNB5&r#=&}ifu{% z0zst!wPrs4tLbI`Mam*D^xHWOCDWj9ln`Xbiw{aeJv^aJ51o!(xeRsLu0zEh1+)a* zJNMt?-M~#}#o7puMpX}Mmp~}#BR0D-@-gzPYQua!YxXwsita}HHYz^aKz`)8-H43% zV{3!|aMKFckB@%Eh@5+~XeV5y+lZ$M9Jn%}TW7$?NCGu6+nD+(hwr< zE{B-b4|_)@jnvjFHaHiYzI}>-rnUd3n!gh(*g*Z~Bfr!{@l1B4cJQF=Z!KwRx)|wf zw}Tq|VQQ=**DJU4#+&>>$pta(ealg1L{hHPXCVd_>Qj|_l%BjmqU1mvL#bmiGHi^p zFw=YYp=cAU{=qLi-PVn3oaHmxfTP?&3mhr!S>j@d;Ev|Pi7%N}3s67r4 zv^TfrEbhPc%@&er$shcCX-kbkv6LUee5=PF)w-&? zygBu)SAE}?V-Ta+Z56DnACtGc(TOx14tC6s>nscG9%R)jSr(#zKijD5Ya*1}0oG*! zOk|e8tga~bf(!wDajP0lNbv8GG2+%N$|~n2aW%Y4auLGqEG4a_S|(}h+QY~|##vsf zlX-(s4C{Yr1toOtz$E!wuj@FcUML{vZCKU`f2W&^Y^c_}94zObpf5^aWx z(TzkLrtN8Rj63Lr-JB867W&FkY{V*R>}UDvbaQ?ewE;o^E$9Vao$6= zG+<->h;Je3auz7>7g-5mKF*Up9-TaED>s5~sd86Wk1#cM>+MRj?iY5P-0tti$SuEC z;Vi19BQO@2kwPvvWgr_6$Yzgpd7WPqNQ8Bmi4NY?iueyIV~%I?K=1*{+%`y^$EC#inceBu*k4yy}8L0RKK|?^Gr* zrm$2>UY}E)6;-gzFA5~b_@{UJ&MqUu%Y4!tt%K8{^(GPnlMlR2_Y1MV&p@(iVWfAD z77_@KR{M-!ZsYDhu7~ zB5U3BjxA*w!fCd5OO6hW=#5+-a1bfZHH@kJY{__Cr;JyUbM6DVW&6GF$H`b)-A>sL zfD%aXp3AF%&vrut(rJza&s zb4^W@UFt7=r#f!x)2OMlg@*axLLMqy#=W?q*a4z&lL-Od68Z5gddRQe>t6(GMpWvX=ggHTDdq}l7i+=MLFe$B&%ndP&sj$YB@ zbuJZd+K}r_l3oG@^`d9~$b4IuMPv1)#J0P}iUlZI7&71x3T}rwa&K(kQi??FiuAWK zdS}cHw>PcMqc7r)G?&da<-;G$Eq!!h8){Gl3?_YFegjgJm$0m_)-Jr_K-dl7M>PPz zb2iB!tqb_oI}iboH+@I1m$X!~O>HpQJls*{vDALrOaZ))qw~*_MYW4N@0l3a_{Njt zxKiQjG~dr~9Tw#%f74#*O;m-;AP*Ptqb6dAk{lP;P6}Nj*BWAp&;;%F9N$4eCvbRj zeThl(7R%=wALSab^Id8nbD~0YlbSlbbkHdAKSwj=JU-U#5cq{a0xcbdCu8 z4p7p7wpxt)tOiwDy4huV_z0_L1cw--s0ZMntCr~EhVHU zzS3Vk3ML@mA-0ys?LyXt;jPJD*SQXnR$QQ;)%kL8LR?w}-V9j<-WTXS$M-lZ@ph=C zUr+=7kB&?wQS2W=mR5|WXr>j{2X1qs8rl*XHbg{|p~-K0OHWiW=3;8NfJdeG+`4=s zRSuLW4W9qOMnRGq{hfkb%p@K@f}pB`qK^6WwA$a67R+|X+JzZIy09~cH;Uj|xejkM z22(L>bEpY{SrHE(RIsYICWW(u*z{}>hD+w;RkMn91P(W1M<6tA;|4{Xdy`x7DDWkn zJjwBsfZ!2*O<3E=uww+qLaH_y>&7$8Wg}`)FWb9*088xo84pRu^`x?4`?_YI=YRXZ~qZ+cwMS8 zr7>wQ^iB7o`ZzNkXC3$W`VAju5`1GX6cr{HkkPImhVtW&_p_62rWiFc1z>PxU+xCF+AZpn>YD*uc9muUfID-`45#;;A`>>-I;gE91yjGCB;5B9>*HY-Eev;MGB1P z%qszh`kaZkQ2+o2b3vL1N#PGBQvy5x1dO}E-9ZFgO{d7q%!X5Nz=vYxZAGj{Bp!(a z@pzw}=!l@daF)p&31XtELjp3Y>1%T2m3VkgOKj zvD8whJY^1|Xqt*t%N{HEQey280@vx3l8zm`L3F!afMyOv0};?UUe>7`I3Ytf;)QKq zJ%%N26dqIbpL5j&hyF-LNO`Ee8)LP>&H|<3o1e$p89)`^rtwaqH`cBf;*Ww3?lWPK zE2S|ASu(A=khRY`xH#q*_#{RmGOMq4O}wWp+^y zS1qsrHB4_Bc5jO zS6arS*OK`3L)|`z0D9$YKnF!UQ95zizKgv0>_RhHoY5z2is5q^DW@VFZI8ED{AqYp zeS;tp{D5Jsj8hopqFUZiN-oAiRvnY|N@J1UWh#jk?L zmIz+C8~G)`>SNrVY13FRXm0SPqLs*O}34L3<)@ECCyA;FW<&B2RQ-48k*2YzR! zRJ&%$Q##>Vk?__>1X-7aJ2*>nOCg@cK3iEe{X1({kj4|cCg9McH$g1djoqV0W+fQy z0-COYEF@*)R*}(tP_yisq4Tr*I9}9!(&Ws6u*ybO-{Fbyjq@Rju_F5yL*owW`7H*V z3Jom~40PkRqplq2JsL1Y|qmK3EM9vcq87s0X~x1Fj@oK!;*7Y5zZQp%@#ai zbxRBk_K)gGN`Jkx^bG`Y!?BFl2=U<|T%`;idhu%~3Ep140e9mJuSnt3Yz9@>9-Ung zEZN;28Rbru$J;(ZI+v2S%wnVi{m(6#eHT~j`5eS4pWr~`)8mz30xNEbZ!NqSGZ)rA zlTinNVYtuLy{%^z`wE9gSD|q`Su_X|u?+s%fq&6fT~VD|gq7JWxt_Q=Wel|VL{1$O z5$~6Q>pBqJgR0bs7xmhOE;ZgxDZ18ZT`lv?d;2Fa^s3jhN;Z~G|3a0k@)>$|;OSe6 zFv3w(uom9In3-cj6B*!+b@6`jBqDfih-~SZkcI*_bRK*`-s{o6NFP!uEwflq%zW)due=vH3`}AT zbGC%y>w>kKsT3)Jm{>nd5Q1)oU~AJ_{vZ?Z>*{V{T-f4&f;_hSCo3G*iz9D&zwF*<(^O@8sNjF-##c(K{a{kfNFV1rYiBXbCmb#9 zCDEm42~hl)&|(hk)=^scBwJN;{4&yLnxupSQG&aeTvM7%6k1bKHZYDb5wH7TaLGM` zSX2vKFD_=wuPx4u6QXf#Pk#nN>(S5r*)2Tn9H~)9zWb5E@q6=A4qq`|Ffm^(;XKUd z*K20d<=qpVJ4DkZdrJBFo|_`HW)^iT7X91GO@)E*NP{9>A3@wZZ()0+sVu|NvE8Kv zleaciG@T+K%Uf1vj0xesH+bQZ3T)QAIXu3 z#20BEV88A9boX~RX@3EsunvgW4a2ygBQ1*4V)aZfQa}64kQ55NnVVrHG(7MO*aU>( z0005@L7pZ;6#qm7m+S%8ex_b}UXLJi%p#Vjo{xNX-1<$4W^Tg%2^*?~k5l2UvQ0dC zwar{fxGeS8RY+!C~puzBAHUA#df3Fnzq!(XO#K2)!4y5+aF zOF}-jzTV9IuHX&hCt?U4xU6a-gR^fkL2 zs%Lb!XLjcHt2ZDkoJy{}51OF6Zp{G}S-=eT^QMV}^gptGwk*e>=)k!_M3}V!YOIoA zB@~~pOL5Rc+?14U6gdJNWr?9SQ4A8&n_hQms}?F5G4>rWwSQ~G*C;%YaGue^9b8F1 z)R`F^yteZXYypUbJTZ?_XBbHft}K8h2H~Mvd4lvAQA$|kVg7D>A{IOkjagl%Nn&pk z7LaDp#UXae*hc6&PhZ3-^UcTlk|WHsp-6K{9dQ-e@J`K8ZzmuBXo`yT+7d*;EjN!i zLo#O~0~kPx(&apGwm ziGNu~8s(SL;Hm|syd4yN(gT~5oc-Z=|C}`U95yb1@Cqiuxq$!x09FB>MQISHMD0K| z&=KsLr2gT#e~Tt(LV5LYm~#4*jJ5#gJp&cvw$(c)V%}6H;tak@s|UP>VnNLj2b|Pna#)WSlouZU-J}Tklk@e^k@$| z4H@zExNij!7qs6vmGn#=O|1y?TO=E~a$mvkW7?84v<&aDnObXkO>riKlt8xS#u%cs z(#+$Ka5^)~Wb55CT~SRH#cW=G`q(mmqz9)bS`|6ow@#s^%d?8_bZCEtmL*pN>KgKKpDJBAg^R}W zsl2o7iYdF*=49oZr2W(7flb{_f%q0-ozhM6gjlEc)JN;l24R%n6)9#eZwqrZzAY*j zt5bJjYbLA1#R7=`#;gp8v64RL(E9E1&;6HIrPji;5WtMNG`EJfs}4&s^(1;t3TACT z`cbdoT6UOdBBw}b_#+W`|4pgYIw8+YE=^S#T8-bbQDmR;#1 zYb$&9%`4T?ba<6hSTRg#P}EvAE!0D->!aWVIp^?X0XE@?`6QbSaS-+)DEKJ{T!V~L zoiN%ye$PzB(Y~zrSt7Ob!DL{DJrYZ}KPKcsFao;^YMPiYE^F!JLgsOdW`+v61!6Bu z9hV^HHp~yp38yn-#t!f!$Gc}*^>_MIlZLvpc0noMm8GgZQAIbf)=oC@_8oqW-1KQe z?S|>0o;q<8)q5+7>gDTPvSmxx1b_n7K1lLqNd?~xoip`({Yto5k1D|^)cTAo)YpWwnbgadf+Do7MzYfLSV+mtwm+<~0SHQ+7cdHmNP8-b3 z?#16qx2pC^;NR?@`Y|z?_KnTI7ys;O1aGOxJwTtQ!dzdR-_tQ^*@rTsQzX-;`6^n? zDCvFdnW1w5i}og z`P#GnvO=G07mbwKthmGK=w2-b#><8qWr^({&i!A5+Jb!OfLA@~w-o0_0;+hOZwL^H z+Crv2hnyXB+%d1o%PP}up6kRRy6ZGKO4ZzF3Hj>#w;3xIneFU?mg=7b7%o{=q5xe( z9Uj%@Jgxib8wMT_K5tJC*oJdAPf|4V-D>NNT31*T6s5%T(R&6%PR%_=)k(_pNU_C+ zw1I+Gr60ZeAZo2?UHw~pn30-HoL+dH8*lkD~st( zQ+?c!T=A^F(d_&u=iN1u}x~q zUT@1+sW#Z`9^d5M3^(9yzz((9Z6X)0cBtBMdtztjUK@c+|Me=`M5_g&E3Q`6-}>1v z4uh*i`6RZTXbS1pKaE0PaMiPV&t>h;ODX&-Qk#YedYB*a&RNujM}D)Ss}wj1@0Otl zY!pjHc*4>BGU6=~1$>}JU|@vOUn4~pR`QNy*M0j-hxpWeiJcxlGw8u=F=#~Fp`s7X zK!jwyq(uO9YMj&9*Z!#dc6A*Sd{jkZ_}Yq0?i_7ULHGWZJ=Z(+P0s2DFm$a} zuD0AUiV!}g6hB_2Aq6k|8vm3s8KQCyE@HwnGY*UOx#T>%akHnT#LW@5YJO#AHYPBe zGKX`iKec`+)JEs9D7(B-cseO<0uG{RaGbk{3T_4fKxcK{NOz)MaULaeQY+@qOYHTF zfauZz`#?S?LbW);Po881`Y8npFyc2}z6g5>g{qDIZw}gV@6NiL9(M z7%N?JAj(3dD)GbrpUfvRDbU|F8Br3oriTeqy>&mLmrgjAX$1sJi(EwmUCw$-6FFTOM zG2m14+}~yUI;@Q6f^`9G?i=XJ<&it|fM@P04_2^aryiSgF-oDC{NH)C>=W$Dro)v$ z9btYX;qTJ6pG+@sSx?kjxiI7rCRccpCiD*7j+nhUQ6~AuE7+#YG6^5}g4CV-aV>;# z$24waUgzUKYZ&dN0Gn*hy zdrPhy*ZMb&)kuUx^H%Qr+zo|QVVAPRVj*Wb)46Na8eZxXhSMpdF=Q!khE7%}61Wy# zGs8LJQ{yFQLF{|xsS7GJb9r> zIkf^e;lsWtPul*J;3!i`)yA7>qOKUrA{x)Q|Hw7?YCR9YL?EzYsiuQ?eVcvd@)P1< zGPTn)XDjj4fC8N9$qZ)inR(aSrLHVEcAM{mr-sg3_1NI$WX2>tjc;EB2dRiukUX|3 zF>Y#oHnfF6H29eh#1Q(2JS`70sc8F?D;dg}$$EE619!tn>l{8YG{^$CQpt>z7x@yz zRH={`wlC;No8*`LK^~zs z|8sLZdNkyAATLsV7os}uY)4PuIqkkympf8Vzid!ru)8CO>^ul-2sV6n>|1tnF! zmFeNP3g4GUaiNQca2F7DWlXzqMVsEfV3??@@}1jPlvr1$p2vJrdAgxe0^ z4AQNpSmS>jQz`ex>gSx2VDz(D1ZgHfTc|HPF#;B^j7xtnyJI2cU^*pd?k3E*6cDw* zI*1+AE$sH-?t2Oa`B`4QZ+0_HR9Z9)Fx{lLTF5$A^=B+9!vb30EnjysSN$)RV(6@M zp^DN$YbFT^)7Uhv4uf-`FKiM$uvoysWs?0vMuIJ5L$w8iKc=an^khcac zV!rZ7YLf^){k`%8RSgWOkAab!q>(B3WBxq|Ka6T})us_8O1p1%n920xJpZY~@pGH9 z=91l*B-AejaD~%LybCBIua^^5IRMgGbASK<0e?ZBib53sNdzDqU^I%1gNLaYe}+3# zVQ_JaCqTZkTVAvz&(n%6;8$>@Sj=^eXLwyXIMp~WhhMxzAUY=u4lOHxo=GUlsr@aG#*Q#@GR30H>3iUCoyE`y%1yZx zm5*b*UM9@edZqDbx%AOqguz18NKys1&1Ik{=W60Ka$K=J&HF_FrTL(a46G(Z6Wd^t zo6=3gkop~Ed$GjKDegM&(KgFGnP_HLetIRAs%mB}SnH)�FUIk{x8U&jVNt6s3E@V`F#AcM7V3P(pQkL|JEXn&|tJNBXjs#=!Rc zJpQaRN+>Eds6l9%ObAo|2jZ#$qy;#w|LU@a zSRde3a+rNT#LF4Pv>oFWJIQA_@4cwR$oUDnQN59RJn_mH-eZy zgcIz+0RC3|%uUYZDDF~{Gr^CXy!XaJ<5}^{M+mt(5g1*S%}&L;F<=NOf!2>0s$I^x zAEzte41aR0Pv4IG5;nF^14qw3@V#cxbzK$ZnbGxnfB*ms>p_~$OW_DYl!iPAK#m8Y zfFLj-0#FFg*I8fM(9Au?kI$rH4fTVVPKt1j16&P3r5f2kg-sAy%-1LQqF)(ZC}Y27-kmg@8t_jjJ6b(-2^_WW zE0o@kcz&l|D`bI@-^z-M5R4dq7&KOmRNs5Cd{>y?cSt|;gp=wm1xh}NE+W;Gu*EYy zWq@%=D*?A8v!XenRJy$L1Cp_rSC+Il-BYbB5pV z3O?qwuZ#OG8?q7|4|6dw`v1}fO>&f+YnFBEhq!UM!R#t#x7!q)ib=oVD*QhvWg*{4(YhWM)N{x_g|x0-$W zMBEon?q#8dd7y00RNj?p)^e{5onQ;q%*U6ud2Rx!4xw$(Db&SRYGHm!p0&`f87Po) zjmFt^=q^&5F2L<~}MS;(o~?(x?Kt zq6(}}-jst)5UJ(lj+`UCIc|K}Dk?S2W(HDxrPtqVRKXFEZqu4qjnzN(BZ#DEvX)Jj z-k`8sFRl?Uj&M}Soa2$M^u}scaG52u;;7DBMnUMo6Xb_3i_<^5$r&4^O^3rbQv)~K z$zO$1$Gc9+AF;oGIR>k2-!NSgItfkB$~c3Xl1A;VTXDr)6C955o!=d;=FlxoQffcCI)WwXHHjPep8>dQEtpl#+ZmML7s^%skNg(Gz6X9_fH6h8wyoY1 z5s>6yhD5n+s_My2by(79Ypj<^ckHq;Z;*&ofsduP;w9d3VskC!9i9E#mC&v*i`AvU zxFyGB!((6GoELgnv-k?3&YU&g)8GFp2++I$u5YWqj)W8J+S1adhCxm&1r))EG~jMh zo%wwtOdlNpb8-O@T0uX99RaYTM6?JGEfTWn>}8bUxpI>E7T%Uz z&UA(P+BCaetf{x>Qk(&>OL&DR?WW8Jjw;6$OeWaDLcQzZc4s-qfRMLe5K%j=53~{4 z;ID$&6-Q|}NK8XDs4{(Mf3DrVu`QRW{TsO3q2Bt5ykn|Ci5jRsbWK^=G>g9q|0adj zchL`07W8P4dZX3ab&6U7+HI9EYA@bf7f}- zyHgD8p1wttr_HlZ&kQJBn0k+0`@M z3tUF#Snc7&5;LQ8nM!Di0i6b7KSsmcgO(1-dfq(W-2P}JvbOSPYZYGve_c!k(PU}V z+sr^6Ghdrx&6~OMf{M=otQAU{nxKOMcEYWj2V)X}GX~dD>g~~n?qotaR=x*koWakT zv~v$UXJnzWnns>Ei%b(@13L#>WlBUZF;k&83ZGc?r9Co|*LL|-s0f;KEkNZ)PBxy4 z$TnNHG@Iq-famZ2O7P#=*_IBSVVCSd27@_5{}1hX1Yvqpdp`r6Of1odXWUzJg)9Cm zZL#d~^O=3W3+XP-7f5oSQXn)NT9!@%@xr}VDLpe-3DB*EVw2x`Pv%W{p@n#Q>elzB zIZ6{K_Ix@+T3&RoTmdQuL;P(WO-w!gmglrb?h-cj*UG+pnr9Im$a-Qn)n;lKNSklc zcVc!qVyw4f+6t5uII*6PN=$$eOFiIw7`+$DoT#n`c=w!tl}j;+#K84u*+=CVd-Hf% zt3;Hsz4^U065kbqs%oulB z^PjQhZy45W@_J-n)bB;YqV)UHQ$CM^w@SAjE;4sY2vVKE2+?5}$^+(IL9%kn7yJv|*SbGrzcw%frQ7WsNn)&>UY)I={sz{n z&&os|OFGTZnd;@kCrsXyZZl!?fc-9C>qM>p`C8>R1H}C~F{(h{3P~Fu!|zuNT#ENB z)Xy|8WfV^HpU>hHO=Q(=#^vJ~+uOm{4X%MnmG?;2FJ;~a?}zGe$P^bJv>z~q@y~Fs zF?z-e25o)*V%s>3sh{!S$$fJ^h_5hsUS_XvEdqZ4CCq@T;hcx9bsVlEbOCjuu_J2Q zOOo*~9BCZ^(GqFlDKd(8D|NI0ro>O+$@C_B=QmY0%9(Awyc+NU!;fc2+WW|`p{@q< z5p`qMm0!|uGsVqYXR+D~Gt($GLF;UnyflO(J(E|zOw#j72>ZK$Oc~;5(V#W41GVoZ{Db5w-7QP_@i)bR}fD*?!lktj02xVUU$1_}MjxT=G zr(9pEpPBME71@%pZ>JOKn?oTTiS7o`#ure82yr2tE)UaB|B;D~s2%QgrCkm778y(F zt+9*e$I$@?(6O;6zq8x>nsdQgTR#(-M3v;mpE0~;q>L-4Rl@BNXH5r$t4&w-H0$B- z4)$uV0?lJWdNX2XVYrYnc`873mJVv4@SZiz3G!@t9L#lE9H@cM&$$^DqI(+)!37R9 zvE*0niw1k*2ByNC_sGY_9Nc}d$d0wlGCxG=qQU}3Vq>k#Tr^4Qr?M3%lMY%6yUYpA zYDF{r-nM)cto$_{ed$;uZ7oD zN%o@ynr7TAer8@svCQ+d_Q-I?j{Dd`rTsgE8?MLy0BJ-o@J4<0L__jYg;=PY!;(Hr zpx~T^L>n2FQ?FwP;zph4(0|#WiaXl2Dg6mrnszLh;;0owFjzgp|RY1F3_lJJh=5rbz1yl{~%}xJ#9T`LI3Iv!Bbxn^C)sibK?w>teSQi3<-czZi z7bGls86hqa$gadNT(qJ@U_8l1EPi;0cj^YZ6L@@G0J~lX1 z+qYl!9&F`J4I$S8nxck6XG0OWZ*ELXGXMYszd@StN#PEOl)!~Q22~%jw>>}(g%|pa zNS8!N_QHZP+E5ttNPQrO&GE~31lk{th-61rG${Rvh4pEc@PG75c3#?$tTgQumeAUu zNKQ3AZM&~BPcV$y>$^<%z9yS184{$!-*`{{e|LnaMgtAVsrSsCk&0ohNYu!%N+0>6 z_mW~bU12MbZNGCewf{0X#kn}~w7qo^HdIy-j9gNTz9qaoanpBHspEhYk!LT!TvT<_ z4Svam8B|_#Ls>J69g<$;uGh1iC9+SKwrID>)E!4q|0Fr=$OcP(s+a5m&b*4GtE49` zuqm2Y9Z^OY*@p9Xxk-E__a|yOXA62pkK|##l3CY!?8EB{HMi+6Qp?%02#`n9e<#Cw zw-Xb19QvCWVzBXap;fe>9^8jYH*JW8OSq_pokCcqF$h|T_A;yw$aNi&Aec{HO!k#h z>O9xqF)9{-56HWU-ZaVT8f8;^_{MMR35T^)l^=ALA+kquVB}Rt@`@jX?U4W#yYA$t zl92sUEu2HOuT-*H^}cneotQlctQT@$@MRpnXAy+H+Zs>n5us}WG%EeDBC!gU7e)g` z85ujYEwPE*knE2r8^KBlkzwd_+$;vWvnY3?{HJyOH74Of5H2+F7jY0Ewn)10?{#oH($>bp4~^{%J%DAbp|AGg@2?oG&>6`+{M^@&jV{gS z4C9V_>M-EJ2U>U%IomTplr-^V23<`x`mLziC{l~?cJQ>qZX`~`3eX3L;x$Z;8w{LA zxU*VV;f)pi+R8?F1oEokJ-?Nx2Y6GThQWGeJ+2NQuXd;C$+^%UsIsui2tYUBuEL6< z50|L7N+;kC{$rlaJ{O_k==ijqEem6GULWb^`^#v66V0AkgZ!;=CZTNxP(ipQcTQ;> zlU6a~DHNZuLGHAyWif2t8e-iGeY?ykD<_z0Rb;RxjuZn|NEOx4gM!jT3|F6Zoi)&3 zJ|0WD0WwYHl#^ZmZ4;r(NvQY$T|04wRj%L(+9B`R)Y@(Rhi+9-4ou#JU zl2#Pf+ggV!39xPreK&&b7@cOXNOuPyw7GIXeAx-;_M+rd64ms^Ahn3Pee0RO7*G=9 zPb}=^OgZkdl6wl_a3stoZ@ba#PWI6)xlsmPzyn*{*TXJYx-Eb7y2y&PnC=DH!*^Wz9eqh?O$he$c zXjoc?xk(X#q=0XVIs70!@BlD@{&BLmE4B{@#LEN*SCj@U%p~DK!YKftg4yJv>N$bm zli2}F1l(Gd;%#dTL^DGHV{%cQu5Ic`wC-KTG_^8B;}4u|?sl>&)#@B=6Uj<4m7dQn zFvKu;X~i%5hyqs@%b)c}$Uiwcb8NfSawO_ov2g>TbCh;|KPr9twv?c$%r_4v%-3ZT zf4H!ERvB8XRs;FaaGQ1bF83@4Z7C}M2|0*nC@x!>@Z_~~?Yn&q@bRFC(zu1oY$Wny zR9%M4aHdE0pvkqY8t|$>of;py&6ELw0000v0iP~u5T{50ASNj3kP5^w4RN^aJ%ChO zb{GHxGe8hWoWpMAHTikJS(h;|lhE+_`PA|yJ8$ZnfB*mrUO}5NOQ=E+S|RWd!PEpW zd;&B7`B$%2f;(BVJDNt}>mRInSn$W{Io=T`WtH7El1n{xL8IC)V%pP5$H7b=S^HIC z*pXv{8J=7CpV&E=BJdH}V^J9qfC7Pl!izrFE8T9KRIHz@9OIUzy^R~>IR>M^?vg5Q z9p|A)7G735{LJ)nvHSN6+y;~wFmk13pE&vR;X>H&aXnuZ48%A5Kme1BJPQh}m8bMC zkAQ(W99JV|`KHOXY$^5@6*{0(|D~dNoj1!FqA2S$7k*P$}@I zjX&+WT1GzxGW2T zTom)Mq^?;-h!0%dhmcN~>`WMUTbEa}_F#lw@kb!1p~q^-;Sc|#Olk}{;h@?VQ2g=d zecW5NB&E2Gd+7&&{Uy!#RI!gHvD5uWo7KvyK-GT66Hji8PsE4<{=70MA@ft)K3nXD zItIU$4eiHRkIt;u2UcVFW{nr9SP%-XwdF%L+7{~L9}#bon0;Y2t>LMG%D|29`#J^m z)y5mo&I0m)G$&Sqr$%_^5YW3cKNL1M3zi$#WIuGR4J+xX&X=n4Qs?RFrGYj5bX}&& zonQ16^%&+z zKsLSrrOS4yr`#7dmIJ($z=-FvSlm1g zXA8Au?5a#s*YEK1K|E>B4v(5}|KL%n?0gd~X5hlBZ&iuv*2P(20S^Z(`Efcct~`ZW zAwM*5bYfbwz8}40t=O*d!RYYVChqpxxZm+W5`*i4rXI{-F_15Hm1kosyC;Mp*awMW zU;{;zsE3gN72g7e1fBlr#TMY>fMmL5(wM69f^cJi&xbYCHr3s@6AMwU?X3B$FDv@O zMW!vX?af{|7_L(F0G;wwc;EI;WDvJUmW)iIjpF#Q;akOw!$YWDsn1`)%akzbUadHO z#z?CukvIoTm#W5DNHq^?&Hv3rgU@8=+bYOgEB~^Q`^BBq z(P_%E@hTCM!ejw3wKe9PvOE5lW2N0^RYCn`HvDFM03hI%o{#54kYB4-K%`0QhnZ=M zu!Xz>2eXh;l@9=)D*po(c%L9nEyp26)IG^4CW4@1W$^ZXd-J3EtIJsI2?dBE-XV24 zKNO^e_CoVoOy#LDrknB08E!U{s9EW#D(MFE%$R5op-ipy=D=$mi1AteF%ouZo(ys% z{vWUG1(rTLx8v4@?+`yGH%B3#%?Vb`F=W@iYfbD>xe2uNh5T$Ze}$qT>AvacPAZcx zjK85hlT#%_8A6@BmwhAeUZ9!TA+m`iN~Ip>hs-zCWAw*a3hbORpPl#N=2;Mdbm7Z2 zjJu@G249@sf+v41byuZ1#Hfj-!LgQx@M7v3KzKT5k1P-8DB{Ook$kBoE^4_s6RCiH zo<)3sb2Di0>UL!BuK(|a{x}$M@km-dIRIbMNo;?P-U&$Kovn_+M-wBR(dHdz86@ES zpB@fmnD@4EGqX}Pf=82H%2X02)f^pQyCZDO3kn{P9%t`D%|~l4EFOsnQey_3dbGij z&nE%(UNi*H6D`giA@Kz`7f@{2PO0O#+W;|lXrvrq6D&JVP~vZNIY~aeULn7@Fb+7l zkT9v0)M6N;r@(4A!qGQRS$Di#-n4}DnncmA2g4br+o1W{#XS|)T&;+^F}P@PwnaJO zCpo4=@nVNC*m~!j8R*|r62tj7S%7$fw%zhik_Z4HR#;-x;P_86%cmfdDQTOT3)boQ zev@0p@)jHFqqDCN1y|!ZoXeO^sX<--PRE0tzn&%io8ui}@C{Q-h9+u0&IIY;i$!P( zk~f5`xqzSDj<3r1Uv$St5AeV=gwIDZ_lF$e*)@?UzZbyq%iid}>(OXZ^-;fboImdk zk5|xa1T204i7Vmg5E+H?%U%9{CqbdlgGxqneDV%^6}QlSMY`t2dzF*1V(Kf1T#>ap zx9TV{vKBi^md+q^ftA>1?73Xe-V3HQ{M?BBv_r)c#s1Ezuiys)X(KAs9AC6I_Fc;1 zy2XvcBAru5EX%bbJhFX^4qRs~B4t>pUA@$uqOsn&kX$VUy>0e@*y$g+<8PO+6Eea66Er=LAv&$agYrPlwp?49hm#h;X%GRWE&?5!MRU}fI!h+SsNg+Z|HoUm&q-!;Ga zixICxtup+$Q#NU}qjAMvj+pb&Wi`bG=_tUlje*!|l~HjQvt2lIcMe|tchgJk3tMp+ zmrR&^4aq4+t#ZpEIKmadVOlmG-;@2lxB3e)`A0 zvpG#1?qM5BFDeFd@tLYEWj-!tity=Z8?`WNF$%Ss9(z+iGhm3B+6mx&e$R;_DZUjJ z_R)(oE9-M1rn%Y@2$l;CrOSCWeyO{OnGGEp-(0`2Y5@>@OxM+Slf|N8KIdUF`+!=sv(Tj`i2vWo#i@G@M!vA6y#{Rk!I0a?S$r%uFDY>f^x~{6Ff$4G zRjS&2jG|u~+3Ioy3%b^=rGH6-gP>Z6zw+NZiVp-yDcn}W}hUs1|N>>C-{0l%43jY6+Scb4ORXg$IN zC0YO-(i;5>-g1ZR1O|!x(C$tkdk;&BIBk6Feo|Gc^(TTOt?A%}KbH z7cKoynHOf7^#sfMNfBufrS|xBu~zTT10se}CZDj6rE4+Wu*kH+N#-R-D?(n5JcI#Rfb4&P?=SDZxD&cEtRHN81 zp*_9FE!{y%`U%m-H1aXkroPJdiW62 zU&T!I4<0E87@c4<`OLH|gNYxcO@5;Z#ZRy;?D;D-S4Mc6AyI}sq$`K~#}KhJpR2W~ zbi>P!+1oOo*^RiJCL0Y0&U_m!)U#M2E?hR8JXX>_8jH5)EEk>)JPrb)QDw2K5!ikv z93(>hxUEd4nYg=pC}bQwlN#4*(oo2r6nv9<_*<4&lG_TX`>(YUpmrWYr5v4TfmS3# zzr6PJEIl}j3=?m#s%u~^jA6U3NZFfF#pe`yw;?SzvROYjLy|$Gco2pN5JlV$RSJ_p z>y17n1fph#LJnlC4RI-j4ovsqU9OHSRr|`fhbK?ZlTMh5;`4{q%j1Fb#{^BOE^GWB z?KgkkBd8S zetTql*;Iq~xgd>R9wl~10K*R;CgKTDrb`9zQJBaCfN&ye z(f2)B5%PZZ8Zmqm8p}v|zkg^8Optv2=0NA2hk~jzU5t3oHz@GpkK95raW3Pve92=T zEK3T~$E@o3r2_&g?RSRFLYSP`pEtlR#eKY}c43l<$`zfVx>K(L?4qVEUZvVGOH zPM3-9ro~{pq3_!=`^zfUa@Wj_3hj)4&<9ol&Tj;#EVQHh%jYLi!Xo?um4AIjE4O*k z%EMRvdxiP`)l9+I7{(B6v~i}_L{izqyF-0XXa5ukNBysZf~*V&i@sA4??v8eO2{||kjTj$zT8J)Z1(eYZ8wt?6MRlLj0Q;Q1eBi4 zE+CK4)IO|WQ83IK|Dr*EC04!tXZns`5YH_v>O4DTl=Z^6sx7Fh6=IAkH5@%68?g^! z=wW4`D3)P~$$$8jqa;bj1@Bo=kx{I7S|EH#6_C;8 z>hD4`Z_F~Yu8sw0huQH0tf{ILzAb&LFUYCwIRF3vWC5RYYC@k?7I$%SL4=TJhD28o zQRziXz#wG~sU02*v-k!(74kQ3vSk+!+0 zYw2{Lh}kybrU0AXt7J$=SlRTxwe4=rnL{|RyPTq0Tb;=*=6<8uUB5$`7wPrP<1Q}c zB)VV4`{=u_m5S^OwdtR-pRw9lG~VnufWSH9@51LneHzqHg_fy@nv6*WofC=PO`@WDs|a zP)J!>UHvD@Lg(neDvJUf%lm2V&Y^qoPmJpM>wb(hH=CNh{Lq$*?hU@4wRRJJ*NBzz9Dx9?fjA`pcPb2bUP(@+} zrm!Bb$lj69xYh_r1BoYFq|IWpJeDXE45s$&ew`i>M1T{pTLr(1el;@zZ8V#K@8uo_B($4C5L>P`OkA#8A<6N=9$#|X4E)-6F+DSYQ7hiaikhx)*C45wd&M~I zuON{5($?oNDW?U6@*>_6k(>tzh{Glsp=O1QLDeH@fDS1aKr z!f680wtM2yk8vbEY^+{$^-sw#Nf+jF7gVXjxexKn9m(tNezn4?Ds#BpdZotJVubw8 z;Y8GLb)X@~z9q)P?^#-7FW$Em5~7f_T$Nw_cfdJM7)}g%)^^GO002}0pOk7spI7&u z8PS{&01Mo~%atWc4Mj8x925{5r4%xbP}dYc4lfx%MV%F{#y%x?L~r`Lqo!^) z^_jj!#<)W1856w5BiF|mh^Cya~{Mj6X3A8_3A+n6&PA0SnoS{VoB2s`4UG(-NJJp@aKEk)J;_YZ-gK)n&-G&CFfb85bK@8)Fv8 zcYp=<%44F}mCK(s3VPqt3;U_(EjN3lLIONv3fKKtnK&2A;U$i%@@SO`4!PL`d3pJA zWvFIQtMeo-9e(N2zQx66mo=jkN3}{`U0a(8|1NT$S%6eIo6SLSFWf?|qyP%glcU(L zXrr3I+w;Z1D|7-aSe6h8j{E4tKF13dVx89&!?^Pyc7!oC=U7ELUA(vEWx}l{S=`jgDK zDP;#tl5{JoG%n4Z!YNiGK_dX%-4U;qH5s4c2;4Z9zj0*0Q8s~Umy_FY0% zg(4PFW>vl8b+?to{`@&>8v{+iR^{2tVnoWKm*5)(J5fMrBjt=)M$(oH@&K`^pI<}A zz^hqWubBE7;mzO&-up#Vw?*amP)8fQQsiFUtjYe*5=d~izay6=N$dqT_XS0Y737zU zBL>}I?qNc@d0QwpdL$QYpQ4l>tEqWd)kWh*gi_iAXpYr z+_py=UL2%~nB|BjSAeJ1YyFPQNW*YS`|gPJoX@!* z5e5Jrc{>+~$dC2yY3k5#A3Hy$h)>{^nW+$pV8;3P_k0vd#S)j9!-SHU*d&coLxfXQ zwk~J7)o15cBXLP4QU8mt)pwMx$lH+Q7x7saJvR(A>t%_-8D0Ji#Ba}5hLwWUZ}h(S zk%W^aWx0jH_y5%%5GfsKNnRQIi-kIf5xoYwY|R}G(lr*y&izHxAK-F(8^sveEwm(q zBbwhjZ`}S z4WNzEL=bg(EJt)<+#WH#4B?t`hB2f}KQb7{a%#6s;-A zK_S&c=>pE&pn9&7=MMr51z#C**H77iU%YcbAlSf#B|rlhz@h#y^z0YDnDU@l&~@VV`K(cUs;}C~e>5JqQTQx&Sq#XNC=v@LrzUJ#8Q*0&cP)K-18Z zkHpI3ZAk)%@az-5w0pf$M4X1TWG`g5MhqwMSR2NrsF*F`GZdzQ#*W%UI;Icd0YUf^ z&WVpXg}svD8g3;4$MG@Zf=Rb%`fkdkQb;+WlnUX^9);3?IHP_9h151cdF069lWnRz zGz4&wOtEC_#?Y468^1a+)*z>>+pHhj7lvwhxL`VO@;A-MzN$iQnyOq`5Q29}elVQ= zN7|Biy8z~Q0j;GMGfd1#54h@uX*lU{ynf8%aCOx-3&_>sM1W(cV0KVxX;a76pE$TF z#=0odNB(#t6y<8|jNOfGrJ^FAEu&F~s97$5M+FO=PO7st%77eFNpi}AHi>j8jgv@X zu@C!&j$enYv#7B-tM@g;>xrj3_8%6FHQSYX6Q9aJFkD{ho4ocABgT^TR8($S1p zWY>_!6XSeJ03YG;sTrxIuy9+Uh}XFye`h~2mtr-)mBbPr59$$;?&F`hvi1rKydAB=6l5vM3*DJD9-|)&cBK$`&ydCDZ&hCtV!97E(D=2 z6hQvPEYVE7>h9YOh(%j53xEJ*Uey7k_4^?jCAN;vf;X&1(Snw&^5|Mp1{5UB19t1j zgZW&&mM(0#jY5q_NQt35MFXHFeks5sm(gHZ#raJuMfiJah^`~!u=@A4Q9c*BfR}8y zj^~Z<*HNCyIq&uQ!xkLa2m7qvfM}0{^<>mO@Xr6^Eoz2K@Bi=a*mHgHL8ECq>NihS zsjG%HahMP*%eDQmu;TOFL5~dI#8-i>qR|3=XopR%QeT`D=00PCRCFxHnh%o9q!&^Y zz3Uewya9V@=3Qt~Z(BIl{k-`md#ZB<9aHnsLcz-7lqu^sBDKiMKh3#-$k9E`yl)qdP!S-!B~FKuKFR>0JtrTCJHD5d63g z2~aKqGj6z89;Ez0)|lcD1{P_XX{*Z`r<{&7-)C-L+(@BK;?#WZ_m*`M?z9Mg8ym<_ z5ECX(R1*r0FZGCP`J$?juq>*4gP?Q;41bi3UUzhX`AMyW=eKZvhve7~3nNu{>{^!U zEp@;?5L|h~WbrlvN3$(p-6MYpi7dK>mz#c_@jOzh2!jvWdy<%vwFKrryItpJr!xeM zrUyMzc)mC#besM^j)Znf0+VIu$`wKdD)PLaHm%K<>~ACe@A6P}HuQ)Nw{o2lGfp9; zneA)9&>nsM?am>jYk>!~EGb%w@vqJ2I2^C1G-3Q(MmL6m?380h+poRWI!g{CLet24 z8XUbwAqKA~1(Xlrg{uqwPZ^kKd_}S0p=;#MD6LjM+kKR|T}jt>!2GfjBv6WwZF(fc z+!NgEi&Q=Kf0|%)T&(G%u@r?++C{ot?#@PYv7|XMBdt;%L?Dp`#|u8tM2LFe(%Tkc zmm<$!d{7UH5DT6swy9OM9VcL}5W+N78$Zp7`ht8}Rk`>VFo!o7{ci@0BfL~n&pD!R z!^`jPBqTFpko&5htsqq=hrrumD6q}zoX24$Ht03zVLC&f4W%>(b^!bMC^sNi|M*VnP`^v14%agggn_{AqFVIvMhZqBVKhLE#z^PtG z2f2H>PHv5)A75S*Xe|%pK-%r;IB7lSVv}FyX%rn8Ff1D+t8PRlW#RA$yhOlKgNk)B ze@=~85!f%+qgyho-4%$gk8AOp0)|QEsYVC5>=QC~kZ!+*u}(N`hf6>~ zO8<9`pB^EHYAu@-tv^YVM}_LWTLB6o#-BS?`=UMD?@Dt@vVmiliA z5y|ZJC>)G8Br>jo1tIrBs_5>~p=l+53sQ)ba3=0hRqXMHu3Y;12|`$5w;Kd-9RJLu zVdbI&@~x#I)d3co>;7+aI1U57b!eKkLyTRd37AxbZBK-H*N*E$C0oN#0l&fQp>o>7 zEF@yHZaRd!c8jq#(UE5PDiiD)FY6x6{?wc8f@G21HG@qG$v}1&=4gG}t~Agt1k}_J zf>SMMCOkrBr463qIS$eFMRm=fQQERlOH<3h5zttepc%&KO)N=wTft$>yWrF#=9n6b zE^*M3HpN!3sEq=5Ye0X6!(B#G*7>s1ZY;$W!qtb-{x(=% zwBe@iSw&g#eetqh;IUSoAJ#XQO!Zmb7meX<-=Zw^ALo{ybY&2obl3V|ZJp85NC=R_ z3VXrU;02O`lBp;ck!BSHUSYK0waLcV%*%<3&#%xzP($IYPc9+`$j)f_EN7sUZZ}{S z{z6OM)#GZDfQIm@M4nE$lrPJ!f8|}15VV!aUB0=>cI9q!)ks6KG`{V=IWR`xoDH1j zhcQ1Fj#S)rj}Jpm@4KeeumK&e)=wiyUBLM86q;3UK#Q|R@HKk&gI#+wWRHoIX4kWg z=*#JVFBfX^*GDmU1@&U)WeKJ36C9+n=wlZ9Ww$>h_9(Ae^uH#jvdSY|aMx3YU>oLf zxb(w-yGy0&_h>4kfiXu8vCSv;ptlID<}@7QNgT*K3%=Q!@1ZecWg+p>Y}^1Y#Ko7a z)JA~f1w>tz9_)pdkb!Z6!Mtac`xw&o$P2p=P|wGis@V+7ma>)s)YP?&hYlPTJ5S)D zn{|LaeS!MN{}#b-Cr5kpnO86PP7bcxMsa>L4eW;u&b9sS>0( zmCtMu{&sI{ect_QeWB#-N8=1tj~7`6Gqj0uTNE}x^Av>vDxHfV;-qx{evFF0oTAtM z+Gh;GyB_&vE7Jyicsrv@+jlfEBq70@m~6JDw=Pt%G01+qf*lA~*TzTqmC)}ew; zPTRZ#FK?xf|E&uJ7CjZUpv@7@q>MVDvWs07x(QMGn5djtemC_#HkDt^Sscy()Rid*Cew#Uz`>v_zdTaN`>N*)d?~D`$$fR~QnRZyieKUOe^4>!;`ibYdV1%@#Q#c6lG<#%L=lPIcbeE4w~eJ*ER%?!*%yK}0X$ z53q9@IcutEL~6_cW;l_bnx(1m1cu1q6`<|SGgWKL_!;ZuK-(OU~f~7v{JhVSv#Zf-UME4*f*Dz;6CDU zor(*J=jB@h2C^Q7X=54(b^H&p;j$z@z%}}q_|}1=PJs<9=W+Fizc3p9kVM%^P*{q^ z6|^_T?~22<8Y+ZWjFq5GwONg9lXZMWRS`F;2eCp zH&NQsUVMt>dK@0Q0?5tZc9drq^HuP>kh^-Xg_EPgXgpa90T|r~=su2u=gRRL+%n;F z7@AlA&E2cHuqdXm%So=J>JMOSD@OH=0-L}vbb4mJv3MV%la=3SSi(<2oeak|ay z`vFDL?N3{8#IVi)5EWE{`v#Glf0T2LughRq!z$oHHxo%;9JW}?`pEQ=>39iyL-vU| z-PES0CJL$12B-A7Ob))$rNTD8bE#J;kk0y&^*zC234cTAVWUhex$JkpO|BgK!S8m; z^oODsrgUX(4}WnZ>5QWDA%K6g2j6$B=6s>4b4smOGHa#3D+-7)n%`vW>GyfewA&u- zL479)XB$000a!L7Ey% z;Rr*>BLVynKz|MZoBz`Vg|P&e#Z-vayjxTS?s2jCUYPYD0=>3VP{paD@}HlDNL9}8 z4lQ6^>EB?afUpE-TCxPWkTAgncayC9oD__03s*4(efHy`c}jYo9Q%~dyb2(qFQUmP zMrX%drC-R~0K1=`q0TJeFfr>gj88xS8Dek*WNK#F0p`NPawL*QN?zSu7=heN1V_hJ z17XNE*AnGC!U<7QU~G*Fi}Cj@W9D8skB4w7SIh6bSaMq{XlqfvH#soO3 zN`>Y@s4%)niuAQ=+okcDFjcVPNRVOztbPO~xVBoayf*{}*<%lxO!f^y&e_r2Wht`I z??|hw=kSN1i*p;u5BvBijbA@EeC9rsiypJ_=B(wH9{ACUM4Jh&42GT?@Dygf7g^1- zYF-gprx6%q444Bxx~x$3EJ@2^F2@ud&DHBvMOyFRVmHZX?79R}{YyP^3h}sYL|pgI z3I>j$nXEE(SdqOMeXIbNss|_xYbYryptk4~HNQ!MPXVDou)_8sfQCRGH!=4O4@u^m zMjj5o3P!iUS^5oUa3Ena+u9jNH8n)bv}Lu*?*he%;RAalizsC_#p6zcnz;&cWR;*< zBTdOcD(}#HQJf-^n3{S!^hjILW`--1dwJSUN7bRN~?)I+jXk-d*!#k5C! zZ<8QvPXYX}>vHDi7cJ)&$@RZQ!jq(HJ>{B>>gezHTIceQwKQzDru|C;{ zuxppt$s=zqyj!$7Nd94TSiXS%)$@M!gdM;`##y_>#9Lbx%({}Q)G7*1DWD=cmFdn# z+_3_(CJMA|BR(>8PL8cVY+EH-AQooe)Tm7%V4Z+T35sB#p;Dq2c%97#H+Sf^Q^;H4 z@C4|*$gmjoCbRw!0kEQu$PrYnyvGyWMI-h2> zgwsR=uS>aa-7{jk+i3mDl)xa9ch3g1xgl1(J$(Dx@O#{lGFIka+4i&MUnsf32K9uj zCFfnT7TgE2R?#)El~|`Avh(s>1BRnhyRF-fuA?5XTmVWA;IJfx+lT42HxRkP>GyT) ziXf68K%ORKcCn(!@OPg!HehO|`ZQg6mw8NK^kgy-qFYrY79yJ=AKekw!rkVI)@YLe z8r}{w(y8YdplUYO;H|D{4kI*YF!u=u=!qRgN@(j~B#qZEGIQZ*j?yrF>zP;NF7IZM zkG__yTl(~S?qrICBxfgxf=yV9K$c%&mPWwrT<69CH8QN)Le4*fFJV)?fZDV&4V`A` z+n8QGu+;b}>XVF=VX3$m7d!;7hkoe$c~>Wn)9G1d!R4n_cL_^oE;g#jn7=_pa)J<*c>Cr^#t$8h87O6Rm;La8>ts;9dHaP>3s;*y8&wJ#-s0o2<#TJNNzd;xi1aGPEQ zLu4sS8hBOzP`bpPB=`}=J4Mh?K~J$I0z)-HZaWl+?*B3830G0t< z{nJtaharreD73kbWaB^)TMnn=jy|lRF_J%grYnKO)cGJo*{`v?k~m%`yG&Gp5sV?Ap2iv^XS=jaBldCGaVu3IcvVWXURB z{(}6H`Q?xg6HW6>JQ%|2mx5dl=+7}7+mN}HN^%zc1KZa*2Yr*D_Xy zxZ#kB^aMxOSD}uNQxdExAeYjHM~tC|$x3v_Q-ADdLE`lHG1pTM0$j$L_px;scsM;_ z7w6M#hNro_0Jr`Hxj0Qh{V%0*A?E|3_nha^ zRk;9B(Y~i9f44#EUG5I5u=EgmfMidK?zF!xv_4REhG%~Nfn9)Sw~6w+YTSD?&$Z8F zUPd0%+3xP@a&7qt>B}TLZ#+&o3JaNn5iH1>Y3W$rm_)&RVdm7 zqRp{P854Dq)x-o$$H=z?{EjI@p=ubA<4fJLjaT}`Kg2#|8D8GD?;EKL9)Nm3X}ztF ze;G$%EixNwrZ*Qqb@MINrd<>NA3TGC{JLsnA`E7FGQmKWL`!jyx}{LUM36gTLScz7_=7?Bd@rc7m) z9(%Yjvf6PClyS2&{S@T=G>rMMz$%JFwfbad7HoFF!GvPCPiKU9;q$_EYX~u6Z#qXn zWUNZhXc;HgdyP9;*~gC4j;nBL$q!2YUvI~*Tv%Q>25e?o(3k4;k`v?oVk%C@d9^w9 zwn2Qg#v6z!;3aI@>xZbehm{ekWs~eirwe`i*kKAU&X}i4(p9s|pXA1jy(HQhoc3>z zVHVN2$^f+1**aZzbgSL@oKy3Pu&&K6le9n0Nt|agnG-3`NdM6>`wvP|hd&wyz7Jzi zT?%`Q`B2YLitY~-$CX^(A%RzT;8cuDkuxcw4<7SlX{MW@y)-{sDu;cGi{~)IKTS-3 z7*FkY_H9@dUZCX8u%SNBD}CI@jk6U8Yxdu~gvFKgPU9{tXruP0zSQqkVywS&8E@2H zc9 zAtWKV52Sjl=(fh!r;L~=*!(6>vSQaO5h1o}*M;B`V+Vvo?YDF`QmpVt?-6HtEorfz z&Yt3o+lq+|dk-#9L(;vhcAp_2K2EzLK1TYMD&7dst{t9Ti z4X{c%#*On5pQm4o&4XQzHxQ2N~@X$WNG z9>3T7M;|f1E&S9j!CwzTii$0H7W6|F7$}ApD15B~76}?nZu!YskDyXkW%nv#9Zb@( z>U#N6`)WHW}T>4Czvfp}FQd!nuxp4ph z2?jx$JxSpYCQ|}$yn+JY2`h(z>o2{Hc?{8IXcf48+`1(}sAhC(6LXG!y06{!`(|FD z{fX7Pq$Ss6FK4yP2oF;?V>2cW=3BEz%#VVSdMrj%c?$zgW#(ZEd6?T2JV@Cs?N(jJ zG2L&X>&Gk-qWKl0ZM6$~Z1g zudSit-{MMLwnE}m-KzC3z#eq-t zb}oiu+5aUjtQ`V+!r{=BOX0|dXjfs%N=W2(K}QJv_2JOSLSaQPYSc^rAU;4lt2{bi zU!un1g|6%wDu<@%Q%6s9-k|wPn1vRC0`$)>Cuk!Nd)*)%ViZsce;l1HjmXG{kO0nE z)b(^r%Sr|V zmiRnt(ogPJKo@GSE8;2v?2REC7{i^4uQVV!D=SC7Xd^|{j{X+}r0GJ7mDM0kkHKNs zUV~a>m|1R;m?Wc}jcAX6qi0m``=2B{psJfGYHT=I;Q+?Fxtiy#u?Phmbtg{*M%BKq ze5fxP%!E@f9TJG%C+I`pUb{-bft8AF!VF)CChbYBgcC3deT`2{orB*U&^fGCppJzz zJJ*a(sjqiaE3xXB{XFk^2M4Njl!1`n1rz>KU5|`_yA!R^B?m@2Wnups+YvsbYjKtT zJasd{_=Rzzl?s3rs4PPBmkpFtnu0Bmpxi%P;nrIY!BvrgI_C4ge(`gf*YwW?HeIoM zT?dvRV69WoLe4Zztmbm=$e$I*RUBP>0X`Sj_Boy|o#7bEa{9q$e@OPij(^2*S2Xxk zC9{ScP*Gucv%Tk`Ol_TJRp*p0>y9TZt_$QH)&hd6mob9s?Cfw~$gE$tfEuI;h8-Kw zuPqeLFJN?jdXmI6!aF{NLY<<`H-`H^! zd@jtlN7ZLTV5hdZO#sQip;>rXKXh`2)I*aS9UYep#6iZ=T&>S|y7(30xwtTT2Y&Bl zM-;b$_*ou3VXtE4jJoqmAU`*ilC;CM$_elQc4-BxnpC0KsJVS*mm2{6Zo-_SD$u8w z^qv`y)iO-iQl0DRj1vQf`XdWj*rBh1n=LZh!lH9>ll(>PAarIy{E&KH9u8BD%F6V- zMHzU(lGWDc0^B&rTxpuO{XT7Seg>;^GsY3Q`*16^C5|pj!rmrR&M^5EHm9v( zCj)oRZ+AlJJ2g-gK zO+56+2IaYG4Ve$@{t3(k_It4X^dd)2Lud{lChgLHu1fDm$_dL-_>H#^hQKkPQ>8WS zW{AlG%f4(eM2^}-{LisNe*dpr}1ygz=a}Rqra^>#pvt?-hHe^ zAd2lidV{Lq+X~CkIggX^_-{!QBHwV=h$Lgg7iOi?yglA)X2iPw0*XN_E(qx$M+Fxm z0dsO6YCohPu#38G>`7wJsZx|1b0N;}YoA3o20RbI+IgU^liV#ECVwVdcS9h1t|Y4U zH8e+!Y;Xt2!kybk-zvdoi_2(GDy3-kYCxaBaQhHk-R<}Gi#ae$N4^d(~uka zHK+S?vm(afd1QXbrjA+-K|b61a{rdUgtaHh>3-osTK>wvkbyX%LUhsdz0RoSNHsle{- zSLy+?N!PHz$m)t3*Y$SuM(5j`%Y_>LrDNk^gUfo=9$YjqT`AG#h7^+)Jd=|(SODwG z$3Q$un5PuZecI{wChSr46;;@?PQm=dYy@XegCht0004ML7rSf7XM3QcSlmY3@V|{gxowF!iT4- z?s-3Nmlsd6JelPnvubBVkB*1}6$upZk?~t1=*FRXIWO`hyL6 zvGGviI&-?Jl$|vu_0Fva#poA&Fmg~Voa-DqK7(dcz%=K5Ke5aj>%w>bTwK8LNG%r` z?h^;zqC?eRUOT*u#?uLA?;x$+r;)!ilDMLO_-(BO(Y=El-Q zJLJMOoea{fqq&XSy9n#Ufkv5MC7tthoOfvE$1+ ziUexCM$T>dqZ|SUhxiTsmuZF&@6|ppdEn;|uF-ySNzLh6HsPxFjP*5FC4KMizaWwz{08Rm(dub52NTyEjKp5P$2PwEm$I35X zrTtWPI?WgmKH66}C%bM)%>W7Xf6KaHd|Y<)BOsbmE#KUVnxG&LNWgMy9{R*{5a^v++$asA(VTSYJCX>;<)B0W zt(bP9_N1D@j3*;VS|V^T2E;Jc@w)i3w#xIhba=;c@wi zLbgZ=+LIT<(B9$F$hYTO6{EOz<8dzCUO`YT$s^xR+K&zl^VipwIC2e~Z^{xb&mh=S^>JZF%F#{{q_|Wy z`VrqaHo0R&4LG_m*B}l6az%%H+dJS-oDLM;Uf+c7-)2X~L4X5BveEUwyIM1Rngu<(a?&1WDptILhrj3d1Z>xCP&2{;)LX1a+@8iHt_r%6OY@)bv=lY>rM{SNccp zDODg#W5BQ80Euy+?SqA$=O*KEj{NzJ8abN2U&AX3ibi}A9OPC zkFdB>#Z5OYXI`d!ngH=!bolg=cf8#?Xl#B1TO!==nrTFm!6%5OOP@qvYcE6yidHE7 z0C&>y>Hjs&k%mm}7=Y9;D@uc~jfFPbu#UM>fj4qg50Kro@s>VuB3 z=^sg0(0WIT7q967p7)cwEs5=4cze4~EDhM7^8?aSodV+OF@qu%ZPgnNpqz`7Cqcu% zwA~gM9$!4IH^Uyg{s(a73kNK4&xl z(4=x!&At(TuoFM?R3HF6FE2*_$}EndVO--ajM>K)N2&foXt{SeRIxlO0G!9Fd&~Z- zP2papROKrVF8|QLvLfZ4s>2dKn*#?*-p0i?m5II+)b<)WiS$kWLJ^0xh@RQJM3}_z zn5niqsf}7ce@O7i5OaRw_F6Fu@0RXXgfOnGOkYsqNHtf+M!~L>t88sMcw~9+?*}F* zi1?caCgQgMZrB4*#r8{ZNa?=vE!=#2m77tBb5RggA{nV@mms|smgPS4-l)%6K$N;M zvuJ@8j`%T(=q;)zdDjKrE$Ix6qN*72K<-Q5HMgNKJAz|2;EV^elyzD!K>AttrD ze&VyBuif5_8*ka!kw-VB!hm0db9R5H2MEV$d+CChmm%J&lZ9j~`xh(*JV{IhtP?JD zEun*P!8mbkfB|8S6rXv88>*FxH)R#dt#6tkh}_1>PKmkVO3CL*^h4o~)JE#Txzvh8 zY8hZK^RmI zY;zSo-F=p8@G{5_@gt8+(7>0DeQR8p_=gvTeKM~5ba>S>3;8V>4~!?P{O?(-5^whP z2gyTa$sopTi}<9LCjT&v3^u-Cyt;ASyyymsW`=NvY7q2-SM3}5cXMmxt3JLNxNI3M z!?RE~*BHwR@8bJ`GMfpAAYrB|Uw`g%CHxHe`$R+PNVj^`fK1doLUu=sb#HP^F}I0> zh>&8q%%@!{sa&fiQe|t^`G8ISq!Hl&00{{}nxjdm zL2Q{!2|s@oTT5y`=5!9qw=G|+ukut8RNNhx!CqW{m*RP^?(ABv zmd|(x1g@V5BsM72*pyURvd;b>B`mOzo=TW-73UCAt$2;MPA7d%aK*n`zI0e3cV-MO z7R*6Oi76QBcR<{8g1HuQftvu>NYj_ekbdR$t{SrC=R~$0=E%`t<^HcP*F1FWgr%@F zNA~jaeHfKgadS#Bx9Z#VKdUJllK0bLv(Tk>1YEr zQt)%<5j&~`YEjQBNKwFKvwguA&}lEB%Dw8>B6|$no$KpI$-mKaj)orC-f0Vdxg^(g z1?K^1oKV+j2!V3(+7>*NeB19+qPC7ZC#rt$-1pyRsYdw$qJWG5W+u97BKM;Fy4*s9 zM!HV?fK1>H&yBawN-#LT*ES(d|2X~tn+1Z!Fe3$tK4aopFIFkj;>SaS!x?5k#=66{ zrGr-hQHM}cD!tNYHAEDk(}mZCum^-IIedFS4rAX&ZaAE|KO^gq-ULvAd#O~^o&y!9{$>#u(`pv7M&AqOi z%ysyTy=3HU(R73jU|c@E9q}-sK6BNX4X~=dO*e~D?wjz51&8#BBTq*oo^2|atjL4e z6e1HR7xLMT^l{6#;t~zS06LVbX_!MB7qkaM#s513-g<;$!KRlJL=8c+E2L>Fxd|ex z0hQCUdpw0S3VT_!AEdmZ>kUb~xQW3v7jVgAaNWhY3Sx&>KvISE&6}*tRTAS2_^PEh z!r2bcGW?>rctu|@(bCv#gYTUf@Y0-p?<||e34SG!pf_jGZs8x8=(wF?wC1sru}Ck> z!Mx9C)BO0u)Wi)=yrr&yPqq-Y|0-H1xSeXUMcyGg(GldiZZ)}L8@jS2ap=x9Y7zfd z3Zp^VQ4K6P@(Mc(8Ep1r^ThsxItsjP_{4`@<>K;jtUoXVb+X;+2-F5N2Mw|3#Ao6@ zR`f}^+m@u2#FeVtE0XN2MIJZYQRv5RiLIw8{_(MnrXo4w*y?X(3uz7@~ftZ7T>xn%O zn@-Y3yx0!w!*ME&#(ah7br2C8u+Qzh&*>Xa4ljN|qF>G9j7(bhM?{Cm?X8&Sm~%8Fx`3;Uls$T-|Cz%p>XwdEM5PJFxs0uQZdO)cv{P zWzuyz!48qlX-eXk&JPL?;WFLVN|_Ex5-vjkd{!eDn1KtN549Gv{LVmGi4P*)m}qc8 zBSywCMBn2iC8|oQ=7>i$lO@wVEL=NoEd|`;0H5JUEJ=v9=CYN+s-^U=LO?W~HuXbmgR>%5&vB+cre(|Bl@lYVgm>{_DH$(Xao2 zCMf4ljn4>WSV8GMak5Q1M*#x*1!jUF0xTPzsHN$Seg-6P8UDr-wp&_w z!<;~T4<2z1TemBSOcfns)!{vRTBW}K{@L&2yAB(1^obK+4035Di)Bhlaztv)Z)Ak~ z+wxkQ#(ci?@Et~hO*MhR>^Y@4DSwrxvlmA9OP?U18%{RqR`wW|t^@aAl*!>aso`Ze ze8|HMpC260$*{b%CsvNK3aRQJMe z3|-C>lSUvoMkfr*KT<7+v;+*ozIy~MH6hc%ImMWRT^wPc79fT@mqex;G{dQ! zHK(~pv8#doo*DrjVN1s^H-iFOt^u&8zNa>j|3e)x)}nX*m)hv8G(NsjKANK^RT9s1 zOW2wm8}Ci=;f?xZs2-}5L=q|wO+C)i@*6%fyOqf@FT~y~4~d+IXUHg~QXlUj9}3fL2+(96s@mP9Y>}p< zB!z?JQiSGcF5K0=e6xO9(6}AqJGwB_m^sztFP_v(1sA%NR5lw_@g2 zI`?stz~tw2Rp-0Y?H***;i4zWS4o}B1_I@bIg(rTlQ#Y;63qlCv77h(vB)2swP|b} z-EcPo{ieTG&k(z+V-mOh?m2_|E8vli{Ix7^YSDn&3ShV=s~eU<)>@wu-2xLC(nBml zELrx{n;uNqL-s_N9PI07VJjGBBgX2-u!U{S41*XNB%4k=fbZU9em@e?$1KT?WA*RY zaRtnT7fYHorPI7Mq>s^q0i}T80003QL7u`w7XMMY0D-l5+`ZuKsuST-^Ilvfq*Gb3 z_+APfrW{MDX)gZZP!<-lp`OZh_NA{r^*-g;decLqVSRw%& zVeKkg;6N`M8d~Nr73HV==8nD@8LgOZ$i|zUrT1>xADC0singp7J8@O=crK^d+U)FM*DUB8lWAB9G~*;phXxCBjEV)g$XA z9SL%>-Fg-(?a2~9I1P$>3R3tt*+bw2yIjBk00!qln&(NVL2Q{!2|u!Z1mLvpFVfm; zfCtowm`9U@?^Bq+u0A!z`!7`S#w8JN%>%e}j=0lm{cbD>Ewm9I5ev7-tfG9ACc~SlVMqz86Dc;iw$?tgT0p zaiuI&x>KuBY&zIk{Hvp6OtFYW6@H_c(}4U3z=M}pqD?Wcjyb&__2clmgn7mo*MsKE zgYhmXFsC7-ygMnC13nWGN(q@I7hU&(JEbLYZyeWiDAX~<4psy5XXy1bND2Sv%or?b zZ@_Mq6*LKb;$+_vut4(iF1P`G!`?=OwK6EMH0>(wFu~@&D*y@QaSTB))-X+gNRRi% z-Ki-m0ykA{)dr;7WVF=bNm4Zb+0SK^8a8IDAt$AOM;o8#6Mf(oHttrHo1mz1i?>M# zsrUwx1_^m!3Z)&irxeH0^dWQWYp$%%57(&ILw{aVQ9@IH9D^Ggf7bwxRddVbg`E$yi|LJ%2CPd{ z)z2`c5I=s4@|T^{idOH<~BXfh5?{2tj{C8KV9$?(vKE-8mN>lz5bP3@`w)=>WwL)Rzs2fuDjIS!NdEX z6>zU$mlsLZtf|9ml;sqZ3?^Q+nMAX#vY#iuMlF%xXR&F-8*+0N_8YH{FhJHGFfAiJ zoM9*ZZqQftldD7Y{R(JGQETt;&-(z!$u@+O<-~Phg%&-9$GAFnQaRNalcWx!=+8rQ z|BTt~<0F!#A}CJPmI&le*DAJ0j2O4p!KX^A=oV8<`OJ@`3zX4gf!ol|Ac2&vkN}pQ zzk52l-nsLU+i=ioV__#H6qAstr9g;zgYRNtaVo`N?fW@i$u zV`Upm-8)FxLWyuK@W~d{{fc; z>NXzp*)uh0IoUk`Wbqfi-u%d4Ae-zlR>;0+|2x?^MWJtY-ROhBv@FnaLcekKYT-s} z7?%`HV#oAS;MeJLvXF>YA_=xN{1+#eZ@xyzwHdI5B~wCm=?}C-ofE4-e@76-5{azD z*~TqQHeGR6BqOyuv-_p=Ifdb&CkWZ4rP6GdOxH&D`_Jvte&>X(uy`bnGu#?MY;w06f8Zdk_M+SCu!~HPHLt#(PD^*?UHNq%Mrj0PNBf z+OR!bgG6GLIOK{5xu~GJhZ0?Se88ATq3e=77LKM5^GMLH z6rcL3gc!#?&})8X=?T~zZSY~ka>i3y!6_KDUgOe)hRcfm9*3A7{+bV=EGJ3I83`m5 zdd~*d8pU)y75~b!2i(h`YreCz=MmI@Cs0nk4EF#40RKUs1w|1oA^$oF^v)%fqRxM< z{;0w??}dmJB(->G0>}dH3x9CZZm2E^g7nDf_KGF>fGI7gNBfY<$eMTO9I0mU@ALN#Q?;B+4kh1vO(PlXGwWIVkb5Do{Ez;O{Ie{PBDxQIOsxb5I4Sa ze$>n9p_1UyFz|G^_T z)a$Pn^@3P=E#bNawJyP1j!-P}IG|dr>bY`MYT8%=(G}J65dZ)HI02s|bVA=4ROvFt-XtAuSV(!8 z=%oRYr6H5|W_8ZmkCm6-pZyInO#r|@}v?azZ~~fRAB%B z2Sh=eD@mw9Y?(|6KW-n@s(dW%OoEwIy25$6{~64)7-+4AHm^Uq#mr+&X!h=mjvD@r z1GP}5Hoj|~ri7cyFOnNBh5Z_-V4aL(I>A~?d<8E&3U`7;9-$93qKb;ap}K(=#!TjQ zr6;A^^ng3@8ZGQW)>l}Xv1-2RQeaBd{GFYW#`yFMLg{!jtV4-KGAXWhh`mzaPa|fC zlvsHz*E@jnZ9{S?&JX-;^ZW{jWV87=7>AhtFk<0Z*!3gk{O#uGx?TFuB-~nKe@yl1 z=kK<>}cZlv0%m^uKj_h4OQ(%q2=vN#u*$J&GGy5k7D-y0xRu`pHmZ8O#CMG z)81W*zo9u1q8813;=v;zG*nL$5wgYCIbR`tL*&X4=^g8Rch`Kr025a!yQIpR0 z;OwJE0E?OrBGv8T6zy`dHWZ7c7Jq`)wcS<)wauP6OOH=rsQf5C8|kAZrL~_x8$L3Jdy9luI{6UOm9@}zpcUedlgSc5j}p5 zm&Q6PE*b+Ha27;CKZ~JzPbUsSyHT~>kg3>dJFAw)#mhw_)e5&I_?sVAm)4mP&2u#QrJ z*9}cX?B`8rB-`5&feUq2acl(mT#@sYaY=7~FDgqGta240o$dA(TkXTXB?68w#8Re$ z7dzdl1!qm0lZpeXH< z#SHM+Ju&toWwtE^za4ym!S(x9v1Ng>voQYjb7&fKHY9P%pl|K8ti0XaZr=WsPIAoX zdWA0jlZOoDFwdWP#JIJ)L|C%wH0mqneyJemUFuFGx1NPA$=wIsEy;Wdua=<4YQ<0o zE{w%WR(!;S6axbzRyJ^n?zlELAu|yb_olW-Tyv(1_RIF@56$_s$duEI(jcapK zFF`lcOs_Q$OrkTreTpo=Ax6orjm!v<=<+~yjj(oSf>@jH)ClbdJ&GMC6d$r)eotUJxgNQ<`Jb7i{IDNCfMi_(L$27Cu*4k8@g_ z10`QjcH$^nua5$xnxX>Qrr+g7&7>0E#{?ln5yeVWJhl89G|{)zc#qQGRtlTEp#T}H zi(zByNPB9ISGwoXa9uL0ib@_pN3Z-}!j1z7=8|I30;j=l%!>aunSArhZ~&l9Lt_}_ zF>hO*_G=>4E=2yY6&5_LTwT;*J{sD9Qf__qai1)=yzkrmwp*dxq3ow?x zmN&JDk^q;Tr}aXK>6uIdn2k`~W9yp>ou`B-E8gszDBb5e_OIqsm4jZ7m~&QR5mYFq z_VL%+G&}!I+F;9T)!o5Fq$HDMr(x8SlC6S`3er8m!H{;5m^#6Fc!Q~v&@)L}EB>|KE*v{lG_ zH+W21rwb!Rxsjj|`B-}ld`9Oo1U0?aYdT`D#8WlVIUIf_p^4Bq zQ<?WCxJ$PeHdP!sC<6>!K1QpW%Q0C)kPXmmo~Sc1zWw8ZXEy_yy-If9CaNR-YV zvHpqb!u1ZRf+u)s#5XZcqQ=`xqm3T&)*a5L&S=p7^A;k}RdQV{o!jDL<< z!*3!e5iYI@k@{N$d*Kovbp9Zc9BT!tN|63N*GFoZ#3r74vpkO9nqdF{04D*TYHC8? zA6Rh#rvbTOUg&R|IsC5-er4xSb7IMcqVQO1R>)@3-~a#z(m|VVNvJ_=nM?^kvS97N zd3AoBG-oNVqiV)Hi>netBy7I>Z9jB}0^e{q>uJk4T+DGcsRfP3GNWA|Uxi#OObvU! zkeWJKCC6Z!6ltSifw8m{5b4XnPf2`yj>{bew=0IMfmHdph>!?&p0O?h;VMf6Jy?>S zM7C;d-yHRrv!)TPEdP$4I|aO3%U`=n#Z|qV{_y|6QpxA%S3kvv^xEV-1%>%{4A@!1 zIgh}k%BO#tY_S%mPEO@4yrf8$bNojt^8SrGXL5q5+@x`vyk6?eK7wfO1q53rYF ztVlyKn626brooCyGP|BTKA6dlmT@bU`rat&ZY7h)D}#v(ob57153kYI@UT=iYGllf zBZ9_r=Q>@Ocljs%RrJs&pv4=&Ws?q$M0ZRbfmqF{37^;~Cx43VHUgS%`RAtP|EG0O z{@6!X(fXBm%Hv0>mSxDIeK1Ais@>>HS#&px9h=N*nYzG@-o$OaT{bqi8KXI5D+o2r zCHaMGQEbdClusegHEA(2h}&T*fjf_ow8hoafIZ(1@^%i4=81SM6W@0_0%zQ#OwO{|0w0*5 zV@pH;F*^o5@5_mi%65KC?Idsl;;rmv>F;QE&%yrVY(e_MdRj5!|m;DT{% zvvE@ta@^d?Wfw=gqLUZS0{KJFr~WTJc7E!eWN0AJ2ME6MK!dZ@vA#QkN5xDoF39L} z5uB=Lf{y))4!?EjvRZEunE<)yhz~`-soBmG4`@sS=ksi>3 zPI}MLrc&a%*<8Y49yiG_ysMPq-dhka6aiZ}-QP03yj)%DZEDQ2b#cj!I!P zSvD8$%cKp;aILlW2Sie>-dZL0hK{RA?VyOeQTx&X57I+40G@m?k-6NIg44eaB4W9b zQ9&l%$p7XgN8sr#A7?2o%i$4cQGnGcZeYf_36m${*Qdu0aJQtmo~-!bB61KQr(gBR zw66KyO~EEhUQKZP@`Fx(iPW8la_ph~_$%>|8j2wY)d@|ZU!fw^lF4*iO#7-}i5sBV z?9UYWtRc7OmdyeE&5odlwewdX*0vUCDFOyE2_57Xow=cpOSuAOO#oSk(WLn^eR>HbX)=bCGRS3@T$Q>4OY4D$#A#DicUzaTh+=t=#dpK@oqkJtO3}o?C zXI=GTIb>hkbIzwN``ckD6ph>AC7-k!R6!||0YF7Psf-x_A4D~dJneq$tnB=Vgnyaz z&9xNKFG);21U9;{H4dnM@0-C_rLszC0N+W#Eh3HK`NdHOHqcup2H9jALF}@6wNOvC zWq8KNS?phZT_B@g*6L+|wIAr%c#Mj800=OqrNu~OX z_+;srkv(+EJSb*NMv%sm?TBpP|BJ`{h!e1uD!Y}5#t-oi^3NHPQdh#Uz^7Z2<>Bn` zLUEU*Bi?=p*!|BoM?S6-M+a!vUwlf~qb)iv=w+6%;&TF*i+ea300m&%^UBHd27 zRZOT6^0%(Ds+QlqT!d-+r~?By0r*n^ne)OP|G{y4w+8zD?!D>zW+)^Gvr~~7i*~eU z&D?dC)oO;Rr35bQ260D6p$&84~KrWj?>DY2Vg)V4K~Y_A3(bYeN4k_1H5WSFnv;F%5xBtGO(o#AOu zq40GgL&p{+Pub1Q8$h>la`IP#hB(Tz1*@2fDY}A;DuC~2PKzRMu~15{45G`;wDxMJ zff(77aTWR1)&@ktCfjT}GN*^^hSZ8>fXADxyb}iQ>6<=J2EKMnGS`Z^+jyhMS9U($ z-#{|4xe!p{RP!fZEN+W@kJDF0v|<$^!xHFg zn`M=QARF*v&JgT^V=2s)y*d|FJGWeyr&d!bz|Glc88mNEYn%4-Z}+TMfhQe^rD1^9 zE8(c4qcK|{?pl(iF#=!W);51?);&EKVE_OCHvyllbVA=&2%B?e1(|;h`gWhn;6dT> z9{D>?U)pfEO>JB}egT$HM)f1^LAaW<^t39Cj-=C}0001A0iUjFLfrR% zp5GXpxHF&q`cmiE(`s2R?iOKVYF}>{)@^$;JxW%UU{oXt;Iu`hs{oVhSfuB-(+4Ms zQ^+1{<~0a6X5o&jCSXMP!b#ipUO1#23V9OuB`M!L^;KOR2)M3u^k|yOkGI@;T6? zDH!-+0+AA*IQs5hN$3YCEL#J{seSwC_g0O_(5fN$HERG<$Aj>v(i3K4*c1M)QBs^26n*gL4#MITGi# zgj*EoRs(r-#Ez}jS$x+fHS(I~B5>m_!J<)#74Zl!j&Ia}hWlt|08Jif4lYUhBgWzy z(g3}I`i#(l(<0I3L)Fj8aw5Zt#8J^d+yy0vF!Bo!(s9kN=BM71uv4meRoF0bMiU38 z4nD@+o0pcI%ksom_U=YRA#ELtH1{xieG}8+pI$%CoA-zO4HmxRN#H(qExGtYuRn6^ z$)VCQMYXl=JkFKQgu^;uY?@yIEPi!{UQ?C9$xm?yUubHdka65^8pAWtfomdLqp_*S zq$oOz-}Y5>M(L*z)t3apEc~|vgB*ok1uzI1Ca@A9ViAz?`nJ$*jyoN(0`HC)qC&vh zwm7`tK6PwgG*@;&I7Twl{{sEsKDknuAqTeiwvQ)^ZU5sMv#jK^K?Z7;sc8Y_YpmfL_1h2wJo`UG!@F6!Iig zn?9wQmLm#MFi6_8K}JSrVOmcB_M&OFVmxQM*_ZQW)C(UzH zwtS-bqV*%JSa=Ddq0Ps1LRp`XRRTyPd1C>-(}ZXE4nUH|G}D~05}Ia&+3P;5Kr2wv zxt>`9e_2`NtI9=}@xAX91hymyF1#qs%8;v`P_GyB8;aBrH+jy!+VN$N#(r1_Ep#!> zo=}+Dy-?hLUw01uqiA~I*%3^4g2n7s+WV0Khbf@?0ZiZZ@3GMo*y5Xtb$zy za`n}p8T{Tq;4ft)4Adnz=Me#jNA;@l`QWz~qQ4r(+Vo*GS>u}^;2!Y3C=ajONoFxK zgArj2@Zr1y*mltdOTm`cv_~4L@u!PA?P~2HtxbHxevc84SuVoxc+O5% zjt8R%C837myaB)#HrB8AsMSRDjHRNdzc|!U zXy6@OQf<%#>c_M4Ha-}*PH44c)k7!7_SHRqV-_cQI=^crm1U@OPfdo4n;enX7ZBeK zxSMHisa4!1%osg4ScmwKo#Ovt>=Ar_Fe4!i(HVSGn9BdLEsbK{H%)OXhnpfR4|9YE1IU`~uI+(;$Rwf~g69tg(sUML zwSSEt&xq&kBRLz28VHZdJ|vHeSP1Tvsg@?S(l9d^zk`cwk?6QA0yqxoQn1Lqd^h@p zJZR>{#9xIet(&YD^01&rQvvI>OR*yH$}v;+NWH zbHd7($xEs##z`E1mK4f|V zIr91G4O|7WY6*ohi05uY9Lgdbg=!77S*vMC9~{5M)ZV#X1IF1)jjU-P^J!rcYyVB| zco_(Npz7PPvr_>T|8BEA-_VWEKms2AdUHE)rZ(Q^tRj%BtTGExy>PVgBw^>{qBoTv z(0Wj=2ZWg0RkW%f_$WcOGb##0%z%(IkX1#2P(zNaLoJy5l@{7toS ziFLax{upJ=4}@65ko7)rP0N_G8M4dr!E7d~6;pOy^?RWZMBUu|ft3>F8c{5Z*kxAX z8-SS|HlN2ycdgRJwDbDDuY5B!yYaE0yNHICo zxZfkD>a^>?YooPrtoQHn#x5fmb6a+A-~jOKjeE5zUjn=TVEP*g)S@xLxQ^2BY{up* z<6Wnu!enZLmhI2N#A3)$t~&Om8F<)i!q@pw!Mz*;Tk10QTPOiY4RS_Rt&RN(=4;hr z(2{CAt71L3OJuRD6UTO5hc?%-rZ?FvZ{5v5S+(97c@xcVP8PV*34zD$K7cyy5IB|N zgUpsXF=#-5)pXq1siRq0b90nxH!2YAjC6p7h6Ni!tQ5YF~ zzY0(c#(ifke?3o7Gn0czh)?pxHoT;5{GAXnR#sh^9iV>3yB&{BsnNmOK%Ena+t)O$ATsecV%=-mj>VOstm<@Osb$h2QoAN8PuHD@YQB)DkpUf};l+tUh zPl+A|WBHQ9#4$ras<$a{1*T(Aa=Ag+_|t;folfePD)C5s(6|Hz3SpW9wBmOG&idiU zo(8iW?|NbDIAn{YuGK@a3~5~PznDI~$C#N9ByQ=h!+jhnB;DAuP$)Xi1+Tv3Vecbd zazx%lVa^E)AU1#5>haQ{J?g{BXWR_j`+p2gC^MLTr)Xn$5Mj8F5Obi6{WA&%7%ZCy5kJMf;KPh2p+wiUO ziqvhqV+aQv6TSK)IhTF9jq1)tu>b%7^+BE!MG-t9|CqW|65JUGz6*iKoLyX65=FJW zc*Tm0Fxg8b!ZGvb8$e}jOIxKH;Hq+D0zZ|lPOu6DCiDm>&9mOpsZr}_?uRV(FAa(k zC{yPx(FeO7v)IEFG&<`!C*^GPg0#$91VuL&c(X3V!jitZJ$2JIygS{lK$wI`d|I9e znG0Uy1AC0-dRZtEn#NUiHd$Y(fj16$UO+^6s}}qp)sShX z8S!wF#j;VpnUDYg05t)gGIT=UOC2^!l+(k@cuWPzgt3!cxNQ#bunFU7NVS_IvN#AW z?zMMa-G1DAXiOl!nnlzA003VBo-}Gg-&_mChG�=na4{O(I`)gdt<#*?9dRkCKCy zGoGWyWl4oUmpGL1Rb)<>`+4K1c-iEbDM487zL}a>7Z#5m004mhe~!hrdS)gIaY$&> zwg)1>%S*iDqHy4FP1vw2|oaZp&`qsB0illd153^oho(j-SCou8a9SRKE|LPg?n%;OeNyu~uM6UMS!oH5|RZN9q?P;+EIe1CUvg@0L+R# z`a?HWlem{S{GR!7t*U7VC#$p+qeJEsrfc)g#ChUNW2^9d&ukxULIx4eZY}rJPzc~| zx=lL{D<0`gG*CW!^Fza~+&o2$Drf@>D{F#8Yez46DNlby>t%8DuybIOg~S}!8sfOB#j-nmK?ce?NHB)#4Y#JXhyuUUO)0J4Kf6bc zk9Avnji&~RTo4!cz!O-MQ6{C1ZJ;kZLjojC(~Fg%^SAoZ@z{(>jis6VOQIseK18IM zPIwM`$PWH1*muqy)8SX3%bF;0m~SC-)Fnv?A3xO z!&!+ZhiCL3xqtuw0FObQS49ylA^(^!kQWMVs8GZHgX8gnn+nKYO4rRwBn#(w`UwCV z+JUv9=|psdLh>~aA5?qkE!6SC<{bbfTHdlk66Mz}4vmmXkkU3{)g2G8F6XnEkQ>+< zY-#R|GkMf3B-o*ZJT&dMm+prsJ^+YyTqzlsD;P~;?rGKi^c==3BjVpwj zPXGV_I{}_{bVA=T_ysIKKY7SEkTCnSD`3b@VL>q~1zZ$Od79pKP|bQ&Pwo~=VTrHB z4TXA8(P{h_L!`-x0000!0iJkjLf=(G0ieZb2PDrj0dGv>8U!X3HbjkM?ZnZZ4zu{x zDpKB(3e7Txxt&_zcWXA$d;@6(JB$iT4}|~#1qwl$d`YN5Y?(|6zr21IC#J)(t4tfGkk99bHlf9p6I+=0h;leQZ#C7r_%zjuu`Db+g`s zRA8a-)Xa20T*$eidbcvd{`hEG=Ik!kT^x}!fD!}8GpsKDtVi}=nwJP|+>5x&N-z++!apx=MV(L1xYxE+FA`P-}fItt?jw=#dTI zv8V3)wTa%4EWbNaJ%;Pxj2qYPS;zVRgH~Eo3xpy(i#Ny?LYzxe{I*NyjU}MBi+;!| zk|_j;ias1BE%i%3np~zNa{%Xj8UYeYyTYMBQg3gE6yH_AHx%x3;u&dd07mF>6!R}D zbnDD5KL55Ueyb|UfrIx(|28VsiNl<1sDeD#J!nt2&geWaJ2N|v6V1NO5-D6JoS+A> zir6c}F=JC&;Yc=fmX<7pc=$Z<(3Je+3;P$ib?=rQ$Tai1yPABunoH4S({NZ#sSqx7 zUoFrEh^SW()p@vo6A?)zQI;309ns?9J|UJoMfuSoY{mj%g)x(h)B<127dj!2L+6`X zMisYj{NeDFq>vg^`E$oZ&3e*@A?d*s;+jz_A4U8`cGKVAE0}QqPLnR7t+weE?HM3% z3)kxP4*z#e#!I5h^L`6H<#=E5`1;|y>RV8B1BAKx8V2!%S>d7BrB+xpwfEc%UIo%r z=;>XbvPY3l4$O10W28{`mBwk!F&pqQ*onZnalnl2rKQ1Qh9UgHmSm zub3p56*T_uNicDRlb&*DLH>4324&Z5TF(RQxtaoLvC;lQm)q8@T;;FdIZMQA}@O+cc^mL?ONeKkq_xiPZrupRtKZxEKZHk%^FoBBQ5G zRw#V87?iT%Qs}tTE(7=@_)0b@5_8Atr0+oQo(`$^?>5pxEy2)?XH;!hL^EHoXt%bW zdkZoGhBr_6gQCI+BTXAy%9wEZ059)30mt+(1aw z#`}OxaZK9t5LeYDr8Y%Hxfq?5ET%DY^@TZK@bKUxIf5ikC$tW^F~L_9_cL?dv3EvU zbu&Dz?n`YA4zP-RQ5IT|4E?xV1d}#GDtz(v#H~DBgL5Wmy&YfP%w$SEkCHwHyCD%r zvxsy(;VHg?=@F8Vb~btubSPMCEJKP|7vXXf>L4TF+$OKF)MnKaSx3L+%FE>O4H=4z zKFr0<8LQ4Ch&XJnJ|0wzxG6quSnWNT)>ow3d|~_0cq8IB>Z|}B4|Ytuj7vO#f-$<&HuFNA&Ein>dhTYhoitvnv1JF_%)Z5AT>`gX?K)oJ zo1Fh~p|-35cLP>cXm)zB3x-)n6ID4&OHi>qm%#$!-v5!=)6_4T@)d8B8hQf2f~?9w z?2+gA$PislgrgPcqfcrl_-h)QZ?VPr&qvh$K3V{FcbyX)x3W&;{x%3Sig4>ho9I1N z(e`;3@_I6?w{9YhF1eO}pO4N(wE@vN>yQ8|fPK5XfZu)pP&= z0F?orx@i!%N}dt&LcoSm+EK*J?3kU7bO~%TqymA^(=sJ0gUtO9{x(?ML$h8#WOx(m z5BMYTn|>*4<1sGl#9$}aM3{lc`fJA3juQW~PM@FkKoGCulvvx5*wbrBVt^bri}D&A zuP>q0F2jG=v?Js@w*@h*(r=1mVWXL%&hf#Jd2Dw<3lk{-00g5! zn!ib?L2Q{!2|qv4<{e#Hs5cb}Iuqa)`(n!(!cGvUZhe5AVmk0Xk9PEE=zp%J!9NZ~ z2LykJ4SkZX7!h{(#M&1CUA%v>mYBQ&Nugh`O|i-gpo>E0WNCnyCX6=H2TO_>4I_U_S2(1{{m2It@&B+ zj~(GGQxiPL9%{WK<^tp!l`n>V$24fc6Ct($5&# za_q3MnksuuDkj}QYSf$wCm=ScPRRP#m6b&MKJ9uo!m2SB0DddjQ439$DJ%^<) zT0J7bPCso#6+z9225G|Ih*`}=VtoE*T^M*4mP`?V-B*7Bpnp@+NjpF{P*?E`g29=} z+~B5D6Q#PLBPKy@jN@!c$5fblh=eaWQSnlt)c=ikNU5V#P}(>U5&gEAe60b`A~kbqPOM+F1m)H@DM@cwl$7S)5b#P?J`nnczjWyCU_p@TsUc1O;U2qi&q)#5U1Lbe^WT#gr*OQZ?FmbA>`|7@13 zg~Dj}0S9u@^;>j1v+5pWC3C9&RnAGk@6y#esgHfia*H`3PmhwL%79+is%H0m`;hM- zJdufznRMFQX}V@4^j@X4`b92DiAr~Ah+1w=5-v${GZ#S+jYe8*p^`qpGGXJP)Q?DT zP6NRlR2!4A5dnPR`I%zxsafT=<@0&Fu*FeFPe6`A)gW{pseR5kel}isLjrx({)5zF zi5)+;25`SfZYwt4M|!n^a8AHQzs6x23v>p|y!Z+dMe;iyYYVB5=vQbK9%gyEkt8_J zA4M3>u0|p%&>44C5LRwFDONV5Y8x*H$3xKl&72a}T1`$>hZ$pV>~|}TBb}{M2rEd; z4(n&ooj(~ey!(Su(Y$Sb>sYa9D-XQ9QmQLSN!X7JC2=sdHW_`N&gVc9gl3hL965`F ztS^9ztll;KWZ;i6qUA#GjW!77$0SEE+-%}!TZ>Jmp)slyhsR2iC}DyW%EJ2y3Bzr<#+iw9%5#Csl`1K4UGf=~oxuPA04+hD-a;1t zQN;jgly$q|H~@%K-5HFgk#k*<9{xSI{R6HN*?{qi^=TYP>)xaQ00BM$p8aVMw^eMg zwp@ZUnUD%O|CmoI#GC`jDpMh5)apnvB&Y#KbQjPe5Eq+q-At(sD%3}fd1pfOzh4m= z>%g3eV4sq^xUi_G{eNxX!TyWs?m7o*IE$*jbnwYtPe{-~ns@b1|vK?}2+g z+Lm93e}^}@Z_4Uxcu?lsKNK{Z5)|Ot@FHo^2UN4b%GMze)+wq}bbqQD7Cr}a?`0(s zLjuccxRSt@ErRV*H>z9%NIl{O6PJ|N&l!u(#Idg*9Y^;0J5~UL9Hh9c5y2+hIL_Aj z^u6k5Xp;urxmvaMV~MHbFwZ5V10KO=9D+5h)?8QbMVn%S-nntZ=IyV+UFgGhyVZv z5J8&aNvJ_=nM?^k0EOn#fzfnX(4DPKD+lpVj;AhP%lxyMc7NRJ!}H$zqD_hL<3(%S zCSiw0CO|TFftwjg;zQb}2Tb=l^*K)Ed3Ch7zQ7X8z7Ipmk>bTK|yO z9+;FqzK<8A6Wmq82Tjmw6&5XXdGQce~Uau(#J?6S+-#y3qs~FtYFK!xd z_O9OuCeaxdozTZ;*5&vMm!33<#bXqTzwiwnO}t7|mw+`V9Jo~ML>|OL_dnP)X~^CNer7Bfrf6F z(O(f8&L>uy6}17)jd2|k$<)K_ciL1;>uoX}Z*KPcN0zr)7^ioMsM%`@UtS=qZY4`z z5RC<_xnsf)(!rCSl)9O{UkBTW=opA+}L+DH#&P3yL?mKqk4TaVL3%Xgc zD>4vAP1yhfaz zDtLxc=$5ARGsUl!&cQWA^RhoDZI!#L7y5NhX}Sz1mb-4bN^D#69)fT4Ua-Xdg;8q) zQQwURJ5%;Q9z}Pr7l8xc0k{HOxi1i6DN%AFUyN4hIotAF)s;jD1kFAOgek7ze~q$c zgZ8I0wbQ>?(uO=J*pRe;oGbRw&8@V`qRe_RtK-YR8BXI^*8oj8rG~#zqI}X???}J5 z+Z~n+t;TUqum5swaOHaRoj0Wr@nEcSH7%uLdcOEV(y}9dzQ%Y!aoMDWLlngInPv6= zo2v7rrSH)(XO&DTpW&;viQud6^}1vD?vY$zUSKhxOgFKdm|1rMY6z3@$A)|sqGyax z`k_Gi3+Q(|&53XQ2X4fNov_wfM!H$Uc;B-#P2$hR8Ak+ww@N-RHobL7&U5!*Oh-Iz zH9zLmLY)i|E{(dRkiuv__`q(-OEkDI|Bj{o!s95S>zpj?a|n0k&0tsTef{y7ls=fe zveVVI_)y({L5JS_pM%E>HDH?Eci*v|f8rJnpNtjnUT}vSSje8#u>TbvGK{wXOiv4# zP~(+zuzn9DEITbw7ZGtpys*}n+rk#=i5+j<&{#--Mib6ETd`jyq*Q&Lv2CJH&Xd3| z)2NtPseSK01>ooJ;?rSyMXc-;{xdE<$oNR@)0Ct0MhhwdqRRKzwLa08jCh6&CYRt} ze#8+5!S&tN|AC>nI9B2JGuH04=lan6#$ST`7SD7YU2~KC3E%qNq|Ga{yY+ON;0!pv zmmp-fs;f6~DviznmW2q5^L-5rnhY9u%a&wBbnHtYjQ7YDt=z7ex z<;sW4nDsH**Jf{>-c$$mTAb;M9;>=oQKCS zNG=XZ%)`o(SgzX4vfTIUN8y}cPjgEz7eV3UXaC#b^>gx)is@ZwVSR4koEe(4okWLV zU94k-NJ~>2-L{;D9IM97_NM^&_(Tn6S%wh|`eYLg=BUp@zOyVWmEOw1Bi%U<)S)x( z-t>0*{FdwNrXs1EhLevuXg0Fqzniv@Nr+%<)#%n#B5Ol9+_69jPf$t{$|L}&srhkY zgg>P1%AH%NY&{^;%8L)SVI!fwocPpp3hF7BWxq8u%V(5UY0vi6+s zn1E|srdvy^{P2z}NdMG)Z$4^SYaoSZekOGAQ(UuNA;a&hX3#22;iQ|awV?6-YeqTY0PJJGyy#N3JKS7@X zLKgp9YW9@H{V>UE9FY3iLwqJ6P1Ro9Mtz!Phx=_#uEB)zZRfEHbAC}8sd@iJ&g7&8 zDBu5wl4sV^000490iPgg5Vt0bgiw<@;_dmoFd{tz0FX}K%bCdKSEYe7x#B9OCF_$& z&K5Y<4B1WVHuCNlpU}Q=X>ykQv(?k_r5uz2q{)2@9OBeTV-aPYE-UbNiq~N3&VkwO zIQWuWwT$vk00oz50@&1RD}j(lc+hv-Js#$D2@SxfX% z2l4$S*hbFdpGzaZEbiAkjVu?Q+x`57X}(HeItgOaR2}X zm_eH*NvJ_=nM?^kvm@LM;H2-wEGV~DWtOP>=^uuSBOG5U?|#As(W$Ql_3O{}=!FvT z9l+{Xdk~h{hAO?uZ**UI=$EyJ?j(X(jzQ&!oSx1@KZzNN@%PrEW4)kQTxY#+EI1d? zCpQ~xNU%mQ^*^s{8D03R7p_MY3%*nwn9KL+wuslDA_f~uEFOXR8T%99Quh`Wo5DKa zqVaVo+$JLDKs6c&pdyEI@(5_G93h$zpSo(-Rb-IzIJ25ZB!LC-)H7W6{B24)Hav-d z)H3b%RZS&04n0V^yEmS#vQb-3X8qKG2o!-kz3`)$r_8QJm8LMzASDJ&F}Ap;zNW?t zDD0j5`i?%2Kk<@TH{UXdnlIGkL{0qd z$ZrTR-Z@Jf_it>uWVS-OqyjOQzy77ev-o=33z!$Xq5duEw)l?7?g-lW)n0SAc1~BS zXZS3QZ(U&trcncpT$nKgQt_2QaN04g)?uc2y_7za+UuGC=jC#t+VO!VYGSZ+}i2gg4du_*1U_{P1Kmu1Hk|OV58R?dX1_TyIYc zN+2r*84*|2jt2sLtm&M#?Yt+ATC*;yEdDosQ?5^By_ZbCOMzt07QK-!D}9IsWy>r?}B!bMKk@tFs3ik{ga!ImcX2x%Q!%o zacy*?n*t%#7PxeV^`P3cmy-veX~VTDWI|sIs=69*!p&6{XW^iI+3* zhO}Q@?Erg-m`wU@k(em*YgYgM5@AhxC z&gP}SocY&rEX|S`mVtTQD3g=QkgW zbU&4Hz|gcO^lyV4|8`H541TI{8l!`FPg<|$ca>anQGyOFYGEX)A^Jckv#(;^jXpcu zbb_Zrz?v*vwGuk2};@|u4G^aX~xGGWHxue zgAVqilXpSUra|*hBB!~Yl_pPU8<~QK^}wI#Lm`AtjPqWQDF05~iAk)R|RgNY}Vq3lH5WlFQi~0>m|UO@1MMI<(v= zKozhriC?lFAXM29BRmEWKyAyM&$^z*2&~YPG&{Vy0`i65Y9=Cj929%uJL9|jizcvq z3I5w|2mN<+jai2Kt7AQ>|1=ev5iSSRLdKg}o4$|?>cuc#)RF~62Kj-i*IRm(<+V)a zC{!xQ&NaCN!;V3U?#@>!b~!oZb*K(pg(;7(Ql6mJ;s6)~u5B47P74=_3@!BahHv)r z4$T*JU+^PO))JoLzo>u}@cVub8o`y2nWJy}zl18QOkiuNa}^LVUQiZd4ycM0vj=wc zM3mJs@bFNsdv;8Uaxh0QqGxIJgel;bw|Lw7IRF3vLqVTJLKgotZ)9}jnsx}?07BPi zo>ZVuo+S(kunx8ihnezMD$SS2B8Kr-^GOVKBS79OCReH(k`Ki>1kyu_Jy@r?0004p z0iR=O5VuOCc^$fq8$uI^*Lh#+RZDaUm2LUlPzeeIjllnZ;DkkUmPSX1=0tUP4168j z^8>4cc&v`wnopc`0Af5U#u#gO>nG538G)0hA2j8GYGZNNiNZnt7Fav2^uxI#SZ|Si zJi73CrEYJ~I;a5LL1QmKx)~^YSpR=5r8LhFeyph9#j3m_<=tSE%C!_JStyE6HfC~` z*(bGL^FW6^&*><_6*O=u0ihDYE%Q1OkP#JI+%9tF>*#4VuRqdgQUSski}+hNbzygo z2YT;7e}Z+Z=+HE3W5Tj7TB&j&cwG2FEB?M`+UFbYUuqilUp6R)YI49Qud`>^`dQEn z>Y9Wr+{GT+k6GRmF2(Rck3zJ{EwP>mdbblp_ZF0&j;Ikr%iOVhSodtX{t1@tU0Hwf zNqNQW$b{VjGg*AF4{x|QU!6sf@sAzq^R<2!Ar>X57=f3@9!olWEz$51#Cg8<_>8U% zw7Bwb*cR&g4g6+HX1XZn7xRl{78`67ruC1LhfV4M{mPqy@WhBo{WhopJj^gWa=|J<6Koy2sGi{{mdVqW# zpN2it%z8?L3^)@Dd?Mmx+3G6%!)%>0c>T@5MTbgC2#v!cBN`p+OD5Nk-=aSql?->X zwj+rh-wznEb~QETLT0k!AyC#3SK$v(*if?0lns8UAVE^@y(=v`KpgoWq!KF5H&JnA zCSEI}5;t%zoP+R>T?zSc5?p*2Fz{*+Cmiz=ZSt#ji<&03?rDO7@uz?`o}B`{Q6K@& zPyP){=Jc>?ez`*oYQhH#{!wQymwc?cKoefh(g(!E(4cY;&=}Md**TP3!A!liE>r|{ zeG!TYph_@kN?k=HG{>&3uIxwb7=R?wZzY%^1z=Vx96J*Mt~S*?&b$;T!E$W~c9Gl( zxIjh5C#+{ww>D{f4T1Y$uHQzH0Y5sOkfWp7njdccJOQ=$? zxr*u;>5qeMd}j>^uiM#kAs_74BEro?A17~Gf%FJgb@GU8&WgZe36ontTLkfEvCur^ z+vCW9r^{e+vRe?g5sef(TPKFHlz}a(f6#rV5?oN<)m|L{GYxE4G3T_P2Klj-k@z?6 zTJl;$`x%F>Bp?Xon!bQ&1ov`9;Tue07m)-qqBYn=Z}u40roPPRJf8{uBG?_C%KcC8 zS8cF2_QJ4td66yxe9d$v%$%EJQ>!RpZl`fAuDf5V;PWOuavaOe~ZL-(@ zzS0v#M(4HV54vh1Vf%*IKvDCDQ%_JHM;kOQ0-P*{sfQ`(2g580*^>#x*BL2Ka%Q6& zVre-jbw~siZkq^l(Ao|uA8a(yFb-FZ3^`*?I4b`SvW{^0;DJz;rr139+ev0-K_OE>k_I$ec8? z*wNSCN`ET2>;%Zf`r`I(tnb|ss{X9E+kk4ba=ncCBddPuuLH&(ID-6YA#n+uCF2sO zWHgDd6wVPme^~5`3yym8T~A1X$`6ILUyR3IO~dBi0Y6tCW7Me5PbQzGR2h^%*4 zNSK`h7_@E9?Ub{P#W&+t@n~bJCPCH|aH zz}di3+b-yFDrE^&HIdVm;mLwu!zye7HuwpXy=&%+ez0jgzety=++k9%9=aSJ7%nIFO4d z$*3ktMTn|Q!@-kH#ZAsf*jBUG{PGDbPx7hn{-I@S$2>* z2KBymM4Vw?E;1C8Ok14aSbf4F00!wLlza(K`ybgKmxoa-3SIpA-s9r@S|3bD^-FfF z&b8SLN$BfOTH`IWXInNl zrLtYh;Bp_S*(*lV@*+jf+!$cNHN{~S(7OlFjVu32fe0LVw^5J?YcC<}W-MX#wx7u_ zaoKJkX>T7BZivRb*1Vv1B;TM|8LiN*5zLV3lu`5S2!!v&MkkyGCoM;5l6;=dB1e>5 z;rtc*eX!p7Gs~d#P-2x7RSJ&Dw{`ov0sURIwk7^P@;zrvpj8m@r@f(lR&Fv95k~+3 z0dxVMhH66JM^$;!(>8+iJ)u5g@tJLHXdcM}Oui)a;f{J@1_eS;QnGZ|u&A?bap+31 zCEqs3VPc(^l>+fpCMR1 zxnMIem*jE=3kMA$*&p4UNq~PI_2B{F|Nn-I$g@7}8^z`NonY%N>P(lnl?!LOiG|%j z%-&ByZv+xZoW5$x&B#+UUC3(YozGUC7SW}cdf`hx;?9QJtSuIEy4 zE2DZOoAAJmGT8tC0ro+gh)LlJQe`kC|860-JZPG%i7L54pwP3=Qf`oU5N5ld9~=yq z8E>!8f|RN0*)uGT1l2W}#%Io%CPiox%*4^NP1^m^Pt!0xrN!BH$)SR+6puVya{0~9 zTSXgjaR~gz3zyV;RK)8JY`DaHj4JmLpJo}X5vMX^MiPiVxNtqT_Q_rBKr-)l+&CCn z`QV^cNY@E+Xl5H9isFxX9#p5GH|WKHBkMN4GbYyD!c9l{$e)AikXtSK9D$2e`K@WG zo~J2b3RLgQTp}zqe7jt#!QRqtXbC^?djF&?bB;zN8t%Qfo$|B|@{rvf;+m1)S-K_jg z>C`v5dptytno0#jxUe)9+Wm>2lf7I8YJy3=&73WIv7d!=bSHf*j3pS_e)EPghrQRA zAJE$NuTiw+LzO@owCiZQT=|Hv6;9X%Mwm-i|AUJ;1(PeKLvo@aDL1`APtO|4000Il zL7S^d;SPzEz?1)H07q@2e)#D{t0N0wr~IBMpJ8z>5*4Jbq9gmrm9Y0nkFvGA(Dc}NDNwqZYly|!(JPqsBMhjj*Yzx*pUq}iaFnj`GWT4KiyOKM-{%^5 zHvop!j82$aG41$kt)G@T8+%an6)>OPg9;f1k`icx(beFLVP2->aEHIQe_~pViF*E; z2S}D432(a2ES4M`nw~4J_?@d_=*VrZYdnZ83_^{aJX|bh_>QF?7kEYk^^0GB`QgwS#0XI-rvIfr9@E!#9`O3E@a0Wz6@%D$K)(f zO=Gc9+9hqLbMsF`>#UxMzmSd>=lIRlWXu~jebomCVo(Z7w|Vcee$;u?>}y+Q2WsSf zKkWT-&>FlJPIzCs!Y7x{+ z^||KIn(hW~yjVEO_S zO3LzQwDUgHKHJ&8jm)W(j@l~&AvB5y*r%rp$N-Trn)Bxo)xK(p*M_L<)$kfQX8g%C zkPBp3)Ft2kh#b$Jk0P4T1Dp~r$G)#hl{Iw#cOq5s!LN&!5u2oOALPohrs+MlWIW)Y z!-Nd0aI^eHVB_8}=LdanfP+Jlwxv69iToPt@N|&t_zIqU>6t;D@K2y)<`2)b%;>;< zD|NiYlojENN0(_kdURB29bb3t|3-9Cr&rL%0*$+*^Oi&lfJB6+E7UOmm7cPwZIQ|s znzvs>ggB1M;+tAaEcorOFXTT?Tid<<3k2;VgYH?I{quM|Kho-0>yZ+uk`59VRV2|; zP}8~;inc(-mc)&Q$p>BPB;i|Ozh^`w5tYiWihyG2g(OtR)7{~COmjE?#cKkM^;85B z*s3|j#MTUkJ=f>5Y%6o3yIy0Od{30HqMv!MUTS0q5B$p9&@c^80Z~eL%Fw@U*_1Vb zwQ^*ZRZd3a5*i{1caRm%!gsj zkjmI~XjaqI)VwA~#T6NO47Bkupf%ud74~)MVPdux; za}SH`McQ3#MFfe^36kHra5_piBOI0Zh66oKXu%_WzS3phw6Gm`uR#xr{jol+^Hg<} zN&0G9+Z$30s`j$|g_vwg>cpi?&wMFqp!&D7PlJikUh`wq^QBWv!?SBfuJ`n2CpKhT zA|zXk7nT>I)U3(E5C6Cz98^MZMmrs}p(Ng5>*e8^zV zU9L-vo%>E}dJD4@I2jvtP8xwNdnf3*Sn>ei7P71es9SEQEmKS3VAI;0_z3xfMMux9 zVE7x!GrV>wRnnz$plsGPp#<%sYyjHj3tGaT{H8H}Dz~w0I82Sh!^>Gy%8-RauNZ=6Dwe;;>pm zC+GqIrXEy9{GL`c_gDn+ZHT>n?|RAX_&yDbE*IaWy)rBc%bC(-R-C}0F}_|ni&;mQ zJoSFz;Evv_^;G=^XeZ1lrjw8QNdN>o-P~{V#5j@djd-19ssZ>f=b0!VeO7-CCv(A@ z<3dShxyNMwE*sjEustHwVt%FV{bt<|aRwvY1r6kt0d)(Y0-+3gnhE5+UA&@l!?}M= z-EIhAjTzlubupx}?l_uUzZ+DDANI(yE|?Y%jDYYIshuT2p@WX;6B?L8aq?wDmwB&nw{owbcM(UZy=B^^lektHJ3sX0YgY zY$yn9dfV}OFypM!tlw{92k|wP@`BvDK8I!deN(vuYloxoCF_nsQO!pH1A}rWdovzJ zUsnxru@AKc32u01LTRfJE%kFsNGJFRBFgQR)G&nYY1wm6Aa89doIgo&dlGD#klZe`d;QcKmcV2=k}BEa!Lx)p#nh0iWLj9@&n|uG zd5AIB8eMHU%M`)m&V1ze4UsD1lwDmn>yzDG31lVOGD`ENraHd-p*#WOH@>%Vxo@1P z;3pGyk^Fig5xKNhE|ZoGx=)?wF;i<+yZ^T6gmc;LRKjBvD5-hc5Nv>SS95;BTpvt8 z0Vbl<=!xHy6Tc+HOxE~k4oXTy56MZPL0d%e2kWAcUEi2kuWs3`kV5~7h@3|y%7O!7 z9nZH1wfT4}X%e$KOUBHDBaIE{;+0+eo2JYqQemv#B|A|$2|;J=2V;?#20=J z(htWVd5a66xEw|iP-cXJ01%*hvSB~lYbwz6yGnUZ{k8YMDvX?vRe$AI@$Vu?A(PQ{ z4XS%HhlsN>=^zDTs)98=$F^Mi4Bz$+ZztWq+NuMyWHNo|t|HymEO!7 ztn9v!Hrg+WcrA#$Qr|`>Ud6qe_0X-w#IGO!9{!BR6u~1Ep;|C0<7NM#p*Djf8bJ`( zbW&;Er%17(u4Z!JBdOgs5WrVzhB3SkDLb#3R~F7Zt>%=(z-(&5zo%eBxNS9b8bsRS zVS$Ybj8~GHhr>6Veqya>%;gZ)6uHwIFI(JA*@Z@f2AoZ8JJfpSG>EElgs-F{F(SX; z7Qjh~=veh1!Rblq1J5`{IZ%yAk%nDkpkKr>OQ)assS8izN$PnbGV z2*^MXSO?Z=FBr@Nojv<4a5>Gz^X3mZ8W(dCy`&GB^{TQBdF6>Gw)3O(e(w; zR3{d3rbGVMqmOa@vYIvh2A!O`Q|7XYu9Sl*t4?ca5k6n1hV6z-d~in4T_UzUoqHP5 z219*3=v5s43`ObGa&H8aFY&4GB?;Pc>Ws{{jxubOX=PfCM@|qHmXY!-a`T}g6}(a2 zgL|ctfjN&B!(_?LQOt?2B2hsKP(;eH4PJviintdCmS%F@I5)ev=Wf@D@4S9i|7L=3 zkEiJH%!p~XNYrhsBTRBLJwnj(lh{eV;6cILupY>&Z>~fo{wffW&+O(fQ|S0wjLV@f z#n_5XF`H3LogMoM+s{fy1kT)=m}hb{tn}e?T9E|!>j9(|qN0>G&)wpl%iLk75wpLw zXGH`idIU06b;E(D;~2xUH8QK~zyL8O@v-fAGTp@H1Q!0wQ1NN`lZUV{>BVAoqd>4J zM7sH#{Sxs31&6qC*v+@6e{mHYX#(gqC1d-uB`>RE@jrq{Ah^EO* zP8W~};CJ7+PK7FRe*M}(hn0S0Iu~O2P2Wub_?-BV z+DZFCDPB>m618VH!}SRb-D@r7yXjohq9O@IJjw*UV4Vp+KUn0RnGg(4kA%*@{#PL& zKag7axi8g9wjDdYfC3~wqcSQ!2GO6t-C@}WLMK`X+i2(8rTju}W%pQq8wpRkZ~y=R zL_wYnLKgosHsf*7AG?8;XZixBG9epwG!S;l2PlC?4SDJ8(}#U)Jtw4ABXW$+cc-5* zAXwCo_0T-C<2kifA+7GwivR!tHUXY2X%M$8EjuPo2IR-cm1Z8KsL&numbf&mZwg6S ziMvq+Q@@9_OHK4D&P-F}qRR(fXy19td4EGSe;p$TCs0Fe>H!dqT-Dd&x;vHmqM852 z{NPy#tyLWuP7Df!=vsF9thx|lAr#8M5!xjkzX}7#ZK!~%ePFtvj8os-ZHFMDGe z$(%$oOS$CLoIDLvE%-8O< z@ADC@8-Bwm?a^*Ar>H+UF#11n0KmbNG&H21w9L2p4!pq=Ryp4y^TdPtkQB`AxyJcna`1C-;WV~x9}eKD<$2doR!VSzZI~!TZ#(IS!vuRyho0+W-dR4K zE$=yFU}K(gq;qo$-->*3A)1YjIRfha$@ zZ&A2ruzSUV=dGjjH*PYUBO=yDu}L$?Q|}aeCDCw1n=46xOe8> z9=@QxpxA}_qMm!oH_dLCMS*A$SQHDT!z*lz%K(uNL9rnfU%Y6rYW*-MZ)Ya=qc?*s z(-z|?X5yXLwwxb*PR9n3WU4^J6YQr`R;`yLlgc!vOL+b$W7Ce%!XfPXA#+w${xWWz z5f1R!xJYf<#Y21$#A@Hg;l*tf?ll$_TQR1{Y}>_MZ-(M?;!b;~*QylL%JT~VD;H|^ zIo<=Krk9DS@mThPYACipgf%LvP6e(mOY9RHg%j$ENI%vy?&v{pHR4ZoI=JN>V90`- zCc>XZ-@$GI*(nwl@u0$D4*=eu$t8o zQaP50GH%p1C~p=0 zn<*hN_DMC~_Q-93_V*3}q(z(mo?=-Lb&{~zoRxrfUKwpW-Yja!$i`RJ?BY8rOpK+K z@)wN~glx{UFva2f4w7x(f=W=gLew1w#3O=kl6!D#0e;F)X#MrlCZ_bRR?7;dsdg-M z49{WqyA{i=6XM>cL`*Skazu_Qt&lahMEs=Z^c*I(HRg5SFRWWcU&_eRGpk1<$0;)R zGIokwQ6mgmxn+@|QrNv96-h=2b2_2hzRLzo)k>s5Jm!yTN6dJ z?nneJxRVABp?d<{}x!ETpMcKu$Wu#@R-!;RG|FWL40Y+cV{s<_|E zu;fQjDeJckFaIi@Wsz&J;Ykqz3%>rDf1|IQ&(0*Q#>;YPY()uxR#9qqL_Cj5_6|E$ z4OlGJGlm3_`|KujnvzC-Y6NxScxjYG+;ADz>kY<1sR;1LLo5|1Id}ASB(H6kLsv)M z@!u)DLCT{h=4MZbD4nX|=?d;F?`u|*{d93b-dOviHV5*q4C>ul=R7y9rPtoKLReGq ziFhGh2Ki9PQZ8A`7e~$8^)a^FnfQ4D^H$I3{Y2~$Zn;pRQb)-?w6VM@YbR^ou)*qI zqV7VRB3;aNf9NL5H}JYZV?e2_ImQ(f_6~dqBr~%r*L5(i7~A1FPVv8lG{G)w!LCVa~U(PLWuftjf{=7wp>TH5j~E8}W$^MM|qintQyVtGQ`a%)uoT zcV;)DGIAgPVcPuozuc zFQPow=t1Lzr%E)NO=95Iqc?xbJxl9&nYIxOnE(I*!$F=(kkZ)~-?EENjVhKvRrfuesqmBgL2v0q2CJ$pu8xzRo4fd)_) z} zZEQ&K7NHbhS4}DgV)(>ly$zt#IPnhvEE*+p19;pp2`IiFq9a&0Yfe!6+4A+oj+q+r zT|FpfgMj>6MA`eD}ZqJL`1RkCzEs0AKK+I6=+2c>I1dEiaQ)lw|a!X zT=}YCX4p+OeWOm)!M&x*sdT9HnQ0g8J+n#1^fI@rk)yA~0o&_A(Oo#Cr5RMYIR?;b zh9qn|jLpx*Zo&l0nnBYwG<-Fy*3D7`&VAxU-dy0JS+$8dOf!$@w8qc%vJ))XO$X9a z$k#SMqq-m<*hKCCk~Hm^sH08m6W;*?VF#-c{~q{z97#qq^W%LNyIOYcwWP|C7c+>^ zGyNF+w`b{ebQDu3z5HCGmCv&kyT{b%I4oOw9O-J}Yd#YG2qx6R>uaN9Wkr4JO7{** zRF%&mdx3atM~JP843}Z>0003X0iJJkLf=r{RL+(RWsDLr$O*Q55lJj^9ES-PrbU|X z$bWIXJicuf6adtu(bix(*G9XTjQ+$l23)5uT)?uj66|8r=R(FHkt#QkZn(L=rz41Uu+Gr6dhD+pTv!Tm4%52CkdD=T(wV{3-@8AK}TTVdmvQ zvN<^F6|)u*2h+VImm)_Lgk3sx=^wV@Z>7|WpbLTnOrkETR~mu1E$j0}pa4&7F5B5I zie^%}rZNayg>x@WOmjs!K2UU^;v_3~7}<<5$qUrNoJb^aRfd?+|HcP_W+>2RR^l6L zixXS`h?8&K()IujwUUKy0zjI<6A7`DHM7{)yw;Ol4u?<}nuLja2CSkO6zZSi00018 z0iJPcLf=uwlf2iYfC9cr1X%Sq_X)8iY1DJVZ(M^w4AL(m5g2eFe+0P5^ON=V#3=wEzGEM?so%NvJ_= znM?^kZls0V52C;pgPI_bCj*c@0G*fOx9}0DA4%P5!wqLt^bfW1q3(&Nd0YhO*uz?p zxBuLpGLh?f2ez0}#!+e$-9@o_JDir>_HG$^vx!vP7Vm?};#pBy<6RZvXW;7QQ-dY& zUgg@1-JR39Ab8x{t(7x%ouJGy$=d2{s9Nnv*Y0jZWz+u@J9=WF6}&%V55!j5_5MRv zL~E9?u)L!hB0+G0 zoE%rY0w&OFQTQgRBptR~MRP`O zZYD0#(42e3WqvZ9ks&e-O&>Hh;1IXNkp-*YF1l?fn8fscB4Hq7Z%fgI{fD=tSO5=U z51?#^#)nx8ZZgiUwMT~1{5OvU-sXr7e0R7EU+ij^s)Jl4zR-f;;#$^_8ZR=d#I-nY z$p|!LVv)s~Z_0`tNi+Asc!(1nJ{LBYrWG3QXh0Np)g|X;d^9NI%T(8%Jr+$U;sEHh zG1f+i{FPAne$8Gj6hj3~J`27;L*RThWUTLNSzi z@F0v=$2i$p_$KEd z000H>L7J9H;R;e^FeLwOFwelXL?NDmtT`Tx=Wo#e$$|F1qakM^@H1F*EEtL3CvA=W z2BX^>hl~|hgG!92A5c6?|Lxna(07Sm0weYp60g(5Q-Kx~l@-m@(EjXPW8lDh0(T#L1i1gM{Xb*Be;21u~~qm%8#&gFFIKcM%o#L>v`Q<-CC`%Y9Mf z9!E*vP#j6LW8@F&mbwr8?fw#fieM;MI3hsMA`HIa!Mp}q?Lu&8hGNQxqjsk=6Kr6d z|2{K^KoNX1qF2!JWH;p!$mTM61i}uJT;PFNs+wuSP{9|d_Em2~2^4l4ScIk608|Yl z-HeNYJ)Ma4ng?`tGo1O@cQv}68=F@Etl;fn1En&ucN?HiOep1RCktbblHn)+&OJxH zqf>~vxf{}kfeaN(EP0W5>F;{cj87Jx1i@l2i-o(I)zVk){&1=D+fflbQ3SS}3a{JqLLKwl?;`GJQq1aQ!^W;wi3 zvvX&NsjNeR=!GP2u%8Ox~MG5j77f5i>_o{|B!m2LOG+@v8GR8QcWOu!R!#C_*i)p05+>RLrh%2Dx6);|7eH+Gpde=fr|35Sz;7 zx#YGO0;<<_-sGhb`;T5YEDP&`m&~#-P1rYsFigk50gOgPI3tO6h!^bHpp&dW8^#&| z8p^^+;)kM_#%xWwknh?+J(cA+ic`B7`);iTXhu;EFyzVGTg>Q&gjM{p$znaoH1_m1ivB={b2@47VD03)F+4iz%an zl57rFRT)LbWpC#9^3^dAww@99(V^a4j{uPo!z!CQiEdQvvo&LhVw3VBlh;=dR{vqx zp2?Z?)$&;{&ul^Vpsi&6&#RIX%d>O2Abx$7zJYHs-Dl9}noDf#_VACZ{x)4Y(Y=yP z67v4afhB39W%Kj>2P-tW!-AKC5RD2($rSY)dj$>>AsRr*#8W6uKL@jBBsc4K5r{49 zBq@oSZZvt+QINqV$|`>2{IOr=+$sRjWeA-0BrJt^Urm1+Ao-fqP)_D307TO6nG7`> zY-(iSbgvU3W^5IYN1hn$`9Td1+xxjuIKt$G{v#>)*b0ly<`heCs>!F&RL6Hz;%gF&rQPE*QT}=eEZXcM`Z{QkxA8E1 zLcoTp20Drfifd_YkF?yoRee(gfyHa_35}yQz z3V-^&_P*Mtr{6n>lJat7+SUp(G_ALe$_21v?HlBEpxBV6tGlwT6xkRNn>4yi;UXJb?jnxH(SYcNJ(s# zQR4fu9UXMGC{n|IpmJFAjxvnI000J8L7Lb}s6l9%OcX@&f1vK={d!Tv#=;K4Us*_| z_Vd;z|6jMFG z=ns~-+=5Hlbo~Jhl}azbflA9{BKGYn=PsU#7?bg_hEwHtBE%%%(&M@VG(wr7pA=BB z>x7#60!p$X>LTBnAe;1Wi0%)H>UnGnPk3t&O?41)>>cyQ8xZ6UOD{+R7&_n*%C@Na z4(rhrXmrJRs34Cd#O!D!^^nB2IA1!{tp3BX6if(iZhpU;|H_K@u)JnJKdlMCwxX-0 zBqjx2z~&$TQ zyYbM&X9Z~R;?>QbX^8RupeM?u8dYzolj$5^`V5Q~=%=c1=18>Go~h@m`9aUE9B;E# zI(aMO$fsOFha}u8hhvBeg9hZs43>!oG6%yM{ZH$OeF(<82n1IKZ#XU-=Q_prKk~0@ z>=HiKhd`GTA64jCHfKn>`lNNXyM!ZY5t2^O5odQKB3PEbg(hj5y?1=LX`u>cwd*yS z5~ZP1p-l<#W|UK(nbqW0Te+y!7677G2)EwV7NI(?<%wjIjMzMT5WKf%Flih*ftBVM z^Q=xfhX<_rgEeEfQ7yWBCxoXSx(Bw0vy^On?xm}>Rd>1?AeOEoL+F`41e0am#tXt5 z`!c*5ZfF0|UXHdJJ7RK&12^ek)uQ5(=uR&$E-JswFqZs>!ZQujjCUGzXx7mRe1jZ6 zs@t$@G^(UK+-P%EtBnC?$nvuk-bLoD@d#8p=|qHecA9vlKc1>c9L#Ic#GqEo8_a9*sR{y^$N&j|Jk-|8!5IoamwS3_R*+VsQue^^1Vz-kFq zr37*z%(RIwx%|Hu{zb?IVKA4)rpW4_w0lBm?gkX*M%OvIim9Lym=*|WYDne?vhMBX zUv&AGytJjOr{t$#QJF$DIa32MVR6iJn*1gsv`H@_6QQbKTs})<2&wh`FSm^3ameZ+ zGA(;M?tx8=p>``jsW0c|PvLJiB@QB@I6t#XZAlv`{vBZ*xdwoPL6Z+S698O=C=nh- z-W+aCJ7({O-&1g>m;yvLtwsFA7z6Kcb?g8TQYlEkDqvR2niFK#y^LmbkMQHrrdyG* zIf6aBnJ52xB8>($bFo?XKpd6Ad>J*WL|}c|l20)BfZ7Hop0}Pg=&^=^vi8u?P2A49 zykQ2od?p57aQVyt2zsi;n1ehqT~ar+m9GYax*nE1o(%^Kj^H0#AC+}|P+>f(-%g7A z9j*j8f_NM3`u(1)K?LVU;vgzC)A?8pt@J{{L(0QNY^>(f)X)As(?B|YJOHnytAha2 ziIY5GVO%2^;mu5W{m#cGC(!g&Ko}Xk*-CMJL9Hx@*yzrz(E61?Tq3?uiHLTBjvbn$ z2szua`d~n5g1n!|$-&=`C>us)v0I4PgC#=09LpB-CLpVpWH!8=SeI zCW9x9%st9YcuTs3f9Z=>=pH+=yG})kaHOUfJjMNQTXC>UB;Ba1j6(!o3ERrCueIzQ zXlyg)J(y6(=A>-L3oTv1*Y%saznyE%G-puzxWN#|J7|wD&ORbTTH(;@tuPp&9e9`l z$2nlesv9(n51yPe<>|y^&mi<9M$DjQld!VW;3U{|c8VV*Qg+lR`Wj+{V}z+}Ne{?7 znI+M+VM9C-IU&Pn9DTW7P53h2*nNY6<*Piix|tGORhxkT003_Rp7&}(-&#NcW44I0 zKcq9(e&8@fR{)s%pX!l{Tf3QecQ@UJdX}IVrR2E?yWF(?u?i47*dAc`grw!0q>{4h zqdWeoWvP7R%B2yAGv8H^nk_$gBSrRdy}uZ17@+w+z@R0c0UfpvAb+bER3QKW2b@8g z`$^#nQe`kJ{|y8IbJHN4&VX#Zb9^6tBsXxl&Gcatz0NpK1QT!1YdM^8HteEa9tx}K zJ7?fUj=n0X$#jtael%j#@)^aIKitUzKaW>ZS0t4bXtYK&KXOve7UB%qA7-U}!~y;> zMbT^iWUZ(TLXXi}gR0shKvqr~=LaF(bMUJJv}vBC?jC6epA2yOj&ugc5#j?|5s`7~ zC(TLAeKd+JI?-u*6<@A)LdS48><=*!nZv6OYx=OM9O=3prDC1C*_l3ksXq6$3&rH& zDmh%+ph+By00Xf zpU2~JyU9iWP2~ng^*y1fD1{NMM5=AE%$AA14wcs@YV0_3Wxc6oCid5ZpnP}S2a3L^z^r~^L5Q4 z|8l#S4t=urdp-fjUNI{i0PQujbQl0by`Ia$FkfJ@C^HR(`}@^qyIJINi-h_IqLb)LEZ~|LBl}QVsyG=^ZPToP4t@=MpBy-=`e7a3uf&WXwBP@*-yvCZfpXSv}*&knJ{v#E%-dHyR`t zEcP{I5`L&5Pk~#B*?!mp*R~>)jnI!yxC*`UGV5F>T_m01Q49SlcbtS?++E`;gW>}Im`>ddl7lM`x#{8@F~5n2Hv(N^^d=DQXPEF^0G!LJcQq( zCFTSbs`=?V?cxoc0x@Ncc2jUGSurr4JpYMIi7M+{&djE-z3i%i$wm6A5!G$qZKax4 zz&YyAgXn%!=mS*)r7x3HJNGup0L$WlB_pMi*hVaX|9fb*9}hTI3_&p!N9%LAX8Gzp>$&;UE(ii1a*y}^CW4m#6B7dTCbEz(jp_crCftg$E zI?R<^Iwml=Z{Jw)2N{1Qcfsf6)!VPU0O{rRQzI6R@Q8EHC+LN8Ftw`yGGk4xX&5XP zKlUAk>2hVJ`Y;Y?g+ESVQk4ks?jZMsU0^6dCRet;AB{o9Z>_WRD%>k7Djki;b>;eN z967H1-g-L7>Bv?ZeFTt2l{hrY3Brd6o=GU>dXbWF9&A^3yIGH{Cod ztX(*%G9VRr3)w>qG!g`aGZ@DVU*7(mInRQZ&m|GF)o&6TkPexxGRkFzV{hP-*yeTS*H|Jigz5&QvKMX?Qj)A6ikeNl9r3~rvgJjJ9?}R&zj4R_ zqC^sG6jD)!$cxRrFmX1bIJDvzFRyH1@0SQ;%XpR52Ui1QBs9f7XTq=|@gjp&sN&7q zh@u7rP3?uhiZ@Wksx28V{^+Iv7$MNnFD$WYWG?4#reX$&D%?+vsj8w%MGDDHA2nVI zwE->K**yrn-`5SX;&SjIz1vEK@qJdsI_;q!V0_UYuc_j@c9ON>CrX4xkO}f|S2kn` zcMs{e-U+-1DlOv2G0}aZkK#oT)7sl;L4L2ypjIIFqoJhv?sf04BW%};pRsG{DEpJ7 z;4o9BjdR6Gq8D-t$%wb_a|`WJq5|(wn4R(`tR1TpZ4)vM7aF;6{Cyg>=eK)L!*>7p(qNiqj52wSinwJ)#375$s-f0#Z zZvulb0n*!(AR?6kjIiGHhqG^s{!~ssFiO2eufRGy@aZvr$nbVT2ZN5v?D_0?Qj0_u z6@`^-J4Itr~_n4+x70w6Dix;9N+AZSpD7((y$wUhU?}g1ZoYJ~l z!vjpbrN5CJ$0k*jV$ZwY!PUqpVxabbI7dwwAR002_~ zpE+p|w^~6ssK#0KL7)!6lh>qY*~Il!!@tU)XB|M>p3$K{&Pzh|P-2@9Jva}O4zc>= z6N)KT9vvgo-#4EY**B#IER_f@;Y7C7~{_dd-*$b`FmI>Ai>5{%RPIp5S!Hhbt*mf zR2UDlM+}ycTCOQUhc`}Z{8JY&-R-xxKdHj_f!T@p)0kH+)Cn1ljEim5gKf>BYN1A$ za(7=R8IEj{PEH8^Jm49a>FPWLzeTG$8uQ{Ui5;N_3`=GBAP$(J**24g#b8d?S)Y0k z*l-V$h$Y`wB-LW8ccx5Fm|JyEK&Z2@V%pEKEi7*`8tF}Ep`D-fi<|@b?5kniG$!sd zEX_;TN;^4;Pg`Y9dFr2B)P=sj_FEYozZ88+uSjy@kYBpV@Kna)T?@N-b*Kosq?VOs zU_Hh?J^rTnwN0x(83N&g^ExJ|AnKHAla&YAxz?bwwz&%aK(YvmWT;T*r;yKgdODI= zTRO%2Q;4u{3!k%XdraYDuXSLMBB36_j>;D5e^O%@!zbA!px_pRIU~sEA&h~FVYdoQ zZI)~p&*T3rKzY*~s50>Pd6b`@d!dYknrP80-)}A@X_mKQXoG!xgVe2yg)GeJ%w$Kl z0*CvF*lA>y1G!Y#A2VVF{VOp!6hseV%Yhg{uX^e7HC2??A)7nt>+>TB&a$c^C4QT) z?x&R@>_&)tDGBy0xalohuBdy^V8L3S5;=j!&o#kZDuv@)wL^k!J>1WHrNl0co6e0j zJ9j;$M09vZ*KOY)KbYRHpNgIU{2tNIzO7H?SeoA=;6)58WW9{pMIZyTi|UnH$_GwJ zi{3o7ptre8I$xg^NR0ka!?Ybc{OfDael%z_b16xDRk9SEcd{-0X6r(BvS~-X-$aG!$_ngqZEIF0X}SN2zsZW7$=+9kOXQgM(oWwLP)L zm8-B~;l^LPd>d~?wn3-Q7%v#Z(SY$I@q@oI!uRwaJPh1Q&$)?p!!ytc+Z7~ueJVP7 zG^nGr(E4~u+DhG-5bv(rfA}|Wf3=LvMq`;>nMXxK&cb}k5?q^RzEp-xG}!Hh{|s(V zG&0k8_seoEUdl=JI##V)xHGE>53c!WSxz}Xaj{h%F`mO>lotrs+`v^g2T%?{UX>bEoBtEngpR5cdPjG%T~s5E|K3*iz7zpy>h!}b&&5V1uuMuz*2^>?xl z&3K@c4I?NzV`&0Xt1NnN7*}o^P|Vzr`P|rdY%BugRQ@wcI$dgmH4M*s3pKT~A(hiR zzC(+bWM0Xzl6^+Y9_-uSasL1R!Zd(=F-}~Ty6j%zdzfo!B$I#cz`PB)KnE9Tt@Y26 zO|-;8mB;3`ckHFkAYt5>Fq}v*|Q7IaE?x;a3LKGj`(ex5Y?3hXu%HZPUf{o)ToTi>%#|wK(!l7N& zAi?T9keXDB`9_}|H!4oK!EFt5Ly@LL&SFjXm5h<<=j)25r!0$PS9U?xv3TO&#NBU1 zE~$4r)@Ml0<0}iO?0O&JWu`mhOX4Tl>7_a&gj`TA@X^G~86BuHL#OM+sgu-p~Z zfKZL#ij&@T^U_F`klr`F>@fj6?wc$8m-7#7XXnjTP6)r1@-SNnCNEckzDIA?F}>>Y z3HEF-hq{LE^hd>0n=}Nks3L#UGkv;8t&flI-kRumpG|xF4sg^0viM>a{`Bi-)Yrtw zrK^S3oc!i);5HA&p!)} z(P`M%^ML>W2ZTYJUrFH#Qe`kJ{}{}sk1lMvTQ&@2%QOkgApASt2%a4aqG9I8<(n!eMm^2C8G&tv>{hY!ac6}RX4x4G~s%)S4 zKE;213gSv0wz>IIbp7IRQv%}2y>zK)x0l|=e9XkNM=IOrq6+nfgAE1warn)R+9<;LlB;g+O3T#DQGR;d%Z_+L9) zoFwYfT^6YNtiaagcT!o`N5pOxRgGUdb@`l2U7)tI!5|3h%$0ZGM4T$D)!jEC3x4^v zKB>s>ye>9V|bBFNy&ZX3L2j1R%E!<(r0TMtxothOAeyU`v2)`&q%E zH;WC6XfM-p-&4T)r5$&S26A~M63w+(kBV>vW+)j5((t)?be|w$4e%K?;p8JF=b+w8 zA9=r?!{@;+x}N%M2bUp#{&f^Z;|@Zo_ahXn49wf-ClVxI;%F8F0mqV(Rar&Ad^lYU zEFq^9=>wS&TndzaHQ7xZYX%dEeIpp_V&50Vx>nM~wziagUd`yJP|Upys0H@S6p9JY z46%#4yRUXbnp(Js&ky`tU0C~1Z8ku}6+H_gPMX%Ldl*>==w`8EhheD?9Uh(TA_@5G)t&NnjN_c6 zZq!y`l&(m%+k_QD8Dk=geRP|hI?d42h!iG21V8^v)@!eZpbPu?*Z|InyK1aY7k14@OJ&^55-v z$;LNKyo*8H4s&m(Wh-#eJMBE}MUkfou3&Tqq}ZbwxWkjcUzPPc^q2(yiiAB3Iu@=u z%`Vw$zDRma3^!di^+Uz=MOx_nH<&^`RZre*bGA9W*Mm^)P8j!drFN>q%oDM0^J6dj zcjKT@p0C@2HpZ0}`H^sH@OL5+i%E>q@B3g%?{X5KRvZt55Ofz{?z-QqK%ZFd8v$Vm=M{pur>>{jby z6P+X>EwW>?tianfQ&9z4-~6n>Ww!+KG~%2{@R1U4;l8DK+edUuH&=8$j9yK+mHJ8T z@^Ggz#$TMq{LGB$ZSZ{YyenYfFM4Y?${j<~2%z?p>1a~?ZRPeE5X9(gJY;-$La%*Z zP4J*L;S|1`%!1hC16w>T{2DJSTH{M#f=aH5xTfkSlaF4XZE6hv@9cTs5rA&|EfxBg zqXYKXG+6$tT5qdylP<(1$dsjX803T@7 zYC_Cz!DR8mY*&^gXhHuoS^`&ua`|<2s6qT*#J}wbbE8V@#WdjC_iNEzrYuJb0;49I zm%n)>NVIaBcju$08nr~OwPM*djASa%#~tM#f4+0y>BJN&buj1ph-RFH+UCwwAxgvj zkzP>MZA-5woJr!(89S{HMrlPLI%C7MDU)%hpeNDeI{R%Yu~W1SMa#rII3CTyrk|z9pr1tmI4La8tt^nlz zTC(29$HgH#5#alJGuXt1qNi_s>g;^I%*%Vl+!lX=6hcXJJxE{Js!~IM*W%R)ln$Lj9 zBTmhEwz}O{AkI++Yb{=-quV1+Kg;nDrfo`Wd9lFD zn;$?$T4!J4Do|Me9ZpG~i9|d`xpN=;Dq_=u4Dl-!h87o%UcVUb=S4&&-@`Ho|4>|< zjfBDc;NsvQ*?!DgI*=e(AH<~`Ee|*f&{@5#z2&||;5E6-o5}ITucNWtSsi6_P6mMM zuf@7i%h6ruZB@Lv^6?0s2eM%Psd1FKiiX^#*Hq6|-M*g*m+TIwCYw=#1uysHs^eZ? z2#`NdGXncYyV(E$0Wd+IenJ-iQ!U*71$Sp&1!vP~N|E%A_ydPC&{g^d-6ihdnnn%Z z<)I2ef|pm(?aP@-VM<7pP^YXu&0!P#n-XV@H8>)-aXY_*IeTwJiL+gr&^l87GxYs8?*j)1?d*vtx6L^TiGH+2q7Y z=VH^S7~4wG(G9N(!!S{`3EFyF@)xxG)|3Karci++Yi8*s0>sg32j)wyp&FE2%L=(8 zLR|;hLlRq@XTT;?R8ZT~Yb(XqCa$9k0*xWUJFbJccgd{jkCN*_1HKe!z9Evu^uHxU z1_VKSmm-g&-)KK*=62k$?vnD?$`gO5C#rE`T1WU3bk&<1$9m>Rk^O@nnx zD$s!~8{ir`R4R`Y5C>S_l)_sZO*yM6000IsL7Rd}s6l9%ObY*=oe@_?&`rW0d-tbo z-SNr{b3#l@IpId5A7om_DqKdAGg1^Dfn5Rt5}D`_T=bY14#4K%GD z@CP+!ybLC6{?rmyk}wxluz~ztgu{97Zz7=yxrt1O4??iKpu7DSb*QU+FPPy7j^#vy z)+i|$W@C?)h)LTY6%$&&xVLQQj)1)S1arJgMGn>{4q}V^&yp+D$Xhp4>#WRn^pf8F z8Cpy>72~9%cnT;XbPZmAaO4j zac)m1hVh65Up@0QYXbUmAzU-yy)=$ zh?HYi0g!%XY@C-(+b!EMF@2!dwg<3xQ1|%RO_qwxw(#*F^2b8{uHGQP?mw6y-9~Yn zY?Jj4a0~9zmg#nVBGqF0tg)#N*Xu^Tg>}}{-oSX*y-mcDO%h*wXfs8l7-s1~voYCI z1+tY!%6l3yZEFvhVvNc)#{>jHrG~<4z@b2g7d@1!yC|cY-xGO$SS0sYV)jhB=F=eC z|JuL+A@qUGIu5Yr^{x};ltuVeVcb2}UI1#wq%vv(QJ-Q?d3L0v=Gv$OE!@QShFKKbQ`E3Ddt)OBTw_9Si@kans=74*8c zlyPpDsRH7(u&sbD+?5%L&*&e-`;HpZ1y#83OBMkj!y+IngZH9e>CF7CGvs6sJ^RW@ z<2as+wboNp6(oTmi2t&rHhJ0agfWnU!PXZ%M2b#SzjHL^NrReI2jq)7di~lh0#=}|4`ch$$U3YHelUitc49lZ$I;*Lo}<-n=Z3o{N%%R* zHC0ztI$MaiSt@Vjrsp|aN3R_F_tE(3Qr4(&Y)U>`to+=kugdgq*O>1HO?UZJPJ&P_ zEz#HXjJd;^84i^%46W|tnV-~xG9RF6qCMQP`61X{vU*a}6AD#%Gun%^ZNkl+1%v)J zq%%BCXK~XG;3eHgcN-K6LU+@80n9C9DHiQ`=0q@@rv-88^NCLMY^=z8=1KXhLEEwG z(dUB8U9pb>Dg#Ioj{(ka(t`Vf_QS{=`@Hcku7g+$2?qfX)iQ5uu`asSJkk<}I5ruZ_3h5;&TZkk2SVt!ZNj^47{3DM+{ zefsAjqMP61$-@SvD=K{qNwf240=~$CyiFHk8Vnh+3aSF)G}PIdc)hhpR{C_gp~P-U z8&m3mB{W)GzrUN^?kdicKmY&$JprGT!%3(?Y?(|7{{ROK7Iymx z{2#xc%Zpqk8Ktok^^R-9@hc8SEcCen7H0t&D#(vDl^iEmLk20+!(?Kpai}m)goh7oGjRh+ekY7dCxh=< z0eqrl!_BfatDhtORZOeZAl?$>(nsWet1!Am?W8`XyffQOC72m+!U*+(?*J0LtW1gS zDF_2d4pGmyk@gR2lu>Ns+Y?5+hn$A_8Vc@E_8v`0##ejgC%6f7F`;jalQYpUgEv>i z=rwxNF71-mDdAC66=^wo^d(TbW)*Y~jhpJ@8(??=A|*bH{fN9Q;;^{vFnOo1K=OK} z)|SbL~0)u^C7E3@Bjb@1wotUN#P1o zWiTuM08;l7?jg1km&=vFFaf4hQ7{7!{=sPoQ>b_bfaQvEA_Cdc?ET49A*-efP~s{& zi5Ar77(*2ZCA$3(NY+3LOF(dG!L@4v<|Pr!!__g!2pM1qLW#1 zyL9)jTtwm*YdA*7%oSuk*&eVb+dBt(wVg9&V`nkMG zbj(g$!8JGc#yy$>+)3SPGP%vE#d@F*b@P!r^9SC8aX;+|mvy=4`2obz4phraX2|~> zj6d=3odp;=&`Ccs#u4kTXBdYXAoTJW4?sWZ4^}9jUrFcS1ssr4s!7#>Eb>Eww6zCW zk{|qaYNI?KKRM{YrBV>xm(mBT{};I*A_D%Q5G(!q%jZhdeK`cTIAmhN>n-K!0O~pP zj}WI*$xZtwF0=Q6|obr|Tv8D&2`>>bVa662zc6J%uYn3nfw(DFN)zh=H+ zkSkV>hb-{X{-|elxIInM1#8+MmX}-EcIl3+7x)&FX4ww=)Kyav`llo=)q>`fu69tpc`v6O*U4DdJtUPdr862slM+{dqSore%@CJ;0clt1gfw9AcPp6>9D%D~PP$JPoeiRA3YyTZ9aEQzR;$wsP z*}`%DjR0rrUJkYvhMW&_A2nZTJX()oERuEqNb1!tDpdH&Tpk5~m=*jJIn=shA6BDh z-HXA7tw}IZ!;gxc*C6WrmESOAb3PlOG}92o1%rzb6_=P>We=2GISaJBYI6Xa4Z#Q;d$j0UqW{Zoh*zMPj@=Q4N>L4m zc1VKv&C;TXyRTD)Bb6MC-9G zO~>(NZU_YzFZj^~zjqcc3u~L_Ka!L`ywEf#+dwlLzY(+_#>@6KTR{&8@lTnmJrAsg zUf|TmFzd?D{xsZQ`UxO3^zV25`EY(*&>w6t-M12qoTv6nM$cb-?ekbm1-6gv?9<|w zadH=BiS`iy)>qEyBhstvWw>z6da+$Iga3IG1GtM1eWdUju}V;c-mGJXyzb}0nfxUS znIolEM*2`WCyKxbk%$uXw;sze3)}-#YvftM*2HAcfu?re+!Q4bb(#^pIW;84JcubpGg(1-U2c_@UJHSg}S&JElghL5vABCgLq(k9_# z$)a3MmYQB$o;yL}xSQ+MXhkv?F1+)mHG9!Z)ie-J+n3G-etpCibdUF`Me_ZITQe%J z9F>vdjnXikCv~eXV+&uV2jJui8YRcg$ZlJW-8z;?uTO0>T7t>9Heh3&g_=F3BiKmK zt?jtIFW_EV6Xs{F$32fNkDtL53{e>Y63HT{k-E$ACa_uejm6oj=>pvO^X6#pW7gXt z`bjSST3d$L22n^pPS|!E=Wp#bMqGtpRspdrD;d_=NBC!eA_6=ySbCO%II{PBz0yj}pqQ3L^O@)M z{Zf`)mif@?67M@z38ZIX``#I?$S&L-{E?unj44ad&so>hJCG%vi%W)wYDp<|PG5*= z-{oZ9HQxzv|5e{nD1Qj70DA6N7CEZfY0sm=$J+`bCek_Mq=Dww)v3dB0kJkze2kg) zca94*3b(=C;mGb*Im=mV^IGnIOQZ|2DWKaFM)bVRp(_7V!eEse<#>(yCfwVuN`rL` zCreYv`CgP^6j&9xKxp|zW|%he3-G)NR>40uS?vhHT$)R!P3ur`;fD5uV2^0WSJ4)8 zWaQ2r1y9WkiiRh4L1~gKrGRu8!M6Ya053tF1wt18So9hL*N#ZzyOqz5o94Zwl(L&} z-4Ke@MURyyz`c>cPsc5PCjUZvc>n+a(gB_%X%M$DazSp8g1*k_g~OS3isCYCS4e`vw3jY9B7rB{im=ga+O@_ucq)43xNGUm1lTAa1&~(01sr|{MvmOIw5D}v@h6xPD%oWp_-e~zxR${nvtmq}`!q@5^@E*w;OG8Y&+l4S&Y)9nVR!onk z+RC|c!eClb3s@Yru4Xs4sOoE;v=k14KEem~^SJ@jNHk5s97)WNwO_11HgI=bXIo`Yp|H2#%w|6b2kws*3s0(fdh)`kk*MWIvLeY{KhpO?+q+{ zFRZy|Pn^=i3irTuLsW%lM4ZcMn4H=x-0}=7V85>ZAXPh=GP@KD)v`DkqZ6#%THShz zW99mv^3tf?dCC3N=Xo>U6Ly$xX<4E0SjePv9mC{Mfm)TmeP_Mmp&C(!qxK^gYCo1WNYIjwf)@ShA* zvG9?z7Df?Pw%=#%l%c|?Rg6TO0|#1^F+%w46oOSJ%wRNoWP3SsDE{9MTG{J_N(GEF|2Aa?7HIf!^-LAt|y6z5NxycUB^t zJAHuVZ>oK=EbV_p*wE7b_4JlY@bq|2F=|7m<)b0fyJk|cwiJ{>=3~}hVw?GC0;4M> z#ek7Ogt4zs%jj(`%=ymRwF^JqaBz{!7$!r&HzEvKH{E?$L)ak&mXoUt9oPUOZ!izf z#_vxIrQ?y-BZ08lHF~97BnT5}v(t(?^MDjgACW2lW!P&;tRwW}&etKN$hm?cJw^MG|yV#cyX;b#kDKfmMX z{)bsH;{SCv!d>!wnIy3d;5RL|L~@AFrJ#c@poBom>J`vawbetstlI-fOX-sRK zpaS925=qW(fUg|ae7=LhFCdyWIKwo4LKLLt89=KzYfBE5D#Ac0yD$a=)kv1|AY{Ln zOm~aE%h{52rpBha9>?+$a~|AcXb|>CD2z?>(PreiNhZ+3bZO9+|0ObnbPTO_zo1|WsTaYJPUwIGoog`dw0m@dy}sv2k->|(hLKkOQ{KV?VjjO3ILca=!G zCk?~2!|NeLwI>$qGHUd&lYX7LS`ookK6{~_iIMBeXBQYfxom`NSr)@<8xB5pO78c4 zf5M^HgRR~_-^DB@^(*yTA`GMax;J1@!ulpHQ*o}B-@$DnONn6|MV2cqB8qn9-AMw9 zczdeJq+quM?jbwFu5Zb20wtGC!|eRgOLn9QYq*<|Id6j=S?{RIeq-p}6bK!%6`-oA z(1e>*bo7Wj^m=BZO!m3g#9Z_9Jh!6%VVnXAq3GTsKmVa9<>R0*=(xWR4+sNMnmaDR z6bnN4m~ITBVEvPN6JEM#`D?Qs+GmYo?`^Nw3G;%Pq<{G3V*V}Xk0~y8Vf}3^Ef((= zVg~VwArAj%eL>83@FC9+Ei7a~bdiRm{#VWa4A zqc+V#*T_bSkf|HUnfim~AhO&q_(e>Ui#V0_?An{W4&v9af4wqlUrmCg*IN&AvIIe(v1Euy%>y#U=70JNCGb`NQ4@^)A zYYIECc!5I+G{0^9H>(3_f`W#;MOK~$w1Z0MYy=jP9< zAk9<(E*-kX%M$t z8M=N9F*;G*17;%<(OSca-4E*%J3N6e5x)9!NxGTfJ*AvR%K@aOCeH~evcBkVmh7>7 z*9k^$sFW{ik`)Emky6I!ASj8f^UWb@XTa|q{{GIXt(e1?1kmHe``hgkHkGkcjJbl% z_9D!gc^6FHLv{eXu#TltiVe(vj!}okYDv(?v9{Ozqjum_rcuAVS+N?ibG*OdzvFIQ z+59EoXtUN-z+a&wdqNgSKQje-05Gqe1x`dzIlEzWEr$B%MWXgveFAE*5s&}?1Ajr9 zYDuU;Y?(|7|7C2hYytlT=;~lisbdO38;DIvS~sG+dX+uv(SWR;81Im?7qLLswo!YZ z8JYh-XOv#`14l&YhFsZs>GEZA@hl)yilw(6f?*y2!AMjo)rR&^c zP}3@o#CfTSQ8{e;F~0g5gwTTh!7ulc0L?a>IcflpP0!j?pae+L18=bdbn|;5TSG&s z){`EjjeIvOgGOiaI}(NqJ7@>jA1X^DsQMK2XMicN)<@8n2v`Pko6JW;pb^@1NYy=n zmt5<$m$K;CI*$C|RvyFq?7WHIL37QtB;F9i(Ub0jdR54}f2pK&@L$R0eAAXckEr zoVO$T@XK)&y>=UOo9bEbGzNmRElsJVWo9zQ(B{fKUF6`EqiNAM8nJ`d?zo5B%x0;fe z?Oh0lN7dZiX8GeM&xxGhSG-DqfayK>SKm-r?i@Azhzd z8^I<9$(V;Ewr0k+m4v)gukWzoOzdu({-ImYP@F|1jRlz15PWuzS6QjbN!&A<8~9iI za3Y0LK?>uT(EtDiP(hlGN#P1oWiTuMtzS*#G7zgD{+L}EA9qAudn5%PZ#u?}5zvF9^-0ex2MsyPqbvla~39Y?dxq#0~6h~T6Cs#Le zBQ?L_gu?#vKt|qU)(U9DGSCa~@ZK;fLV;p56+G_?>)4Svbm-Teykm5&A%#0i#f4$? z6jOA8A(3Q^2^@5K1S7SCKMl>``Kssbk2CX{I&Q*d5ji$@QZSu4uit}ponnE|_T`oY zmQmGYP$3K^keQOc15ls4xE^oP{9f3|Qd~Tm@RerM`q6AiBF7bn31#bxX3~4A`a5iq zLL|%1rlHiiqqtXSM{WK;CC3D(0Pv0GMzZx!`S_Mn|A(j(XmY`;MgK_jP{^9p$9)_i z4!vl!^$R_c@6x?N9cOzcN0`{$eAWkF+gk0r(|s4Ahnu9Xz1i!1US`wz01((0sBdoe zuOk?=k8N>8lG?%}TE(Qhe7jsnJKxdy<;W|T;^3gbN78bg->zEeeQ^6?d&iHpU-XvT zj)By}m%-4WS5ops>V7GoAwcXyH$L&yvie06Rt!Gdd~FY_41_(S%;6rT?eZbJ;6Srx ziiAy}7Al#+%2sNab@}2RaYeBY$2N`3x4uDCvgYuI#Ijg+eV{4hk;RC5d9~{y#nugu zPOdDd7$^im8T^Adb@8(Gqzsn9Cy#Nf-#wsiqLL4>F@ODtx_LF59Ywy+?HS&-E5;F@ z6Bc}A>e%mQ%4Uz8!dA|~9!IfY?Z7UIhn5R^$5?F=0rEb$)A-l?u?bfog58H_vzN(f zmSC{O{8^QagBLwAz3n9H#su1!<^TOz)ez9y2mp9 zj&#>9txz#ER;78&&j`AFWZ%PyDyds^homI4%ko91E*Exe3$zXbKBA^VO1eM9`IiYJ z2F=3xh$FW%GDKcDZp|@F$FC$X@}glPRL!5x>6c?uMnkdhufhu?I1U1Yu*ep{_Do$j$w4y0WVSuzALNCocb3Ar@{>#~;MS zF>bHJ@g(<<-f%t!)}%aHcgs%F>r~IGN)isR4l_1MKO}#;fx-j-!L85n!~6I5F~ds} ze0ye^UmD_9Jo!@T#KfV!8}Ioi;Spl$viG@^u*wQrb6Ueee>-a=tt#)4W=Ofc_n}Tx z=dXqU$uFgmfOpP^dl1VDO95n}j~*q?m^%C`^&NJw9LL86kJLT-CX{8Z@Xr(BwdZVt zV*A#T?w!S}A{CM26fT|>wMZOR7h}_N6|9N_h28Bwy`4i8s-Td~4Z!bYc=2l>Ln&QD z!qedHxkPGc47573r@QW+k21pVGumo{Hs1Kr2e09j(dCdW93jBJs(?HeRHZMxXNjAK zPhti)NOC^++t|SloS2pI@dd_9{$cZLaMSCDE3;WLy1&HUuDn>_2+9~pCN-F?Dj4+i z>a%CZ0H!)G7lL20z#;IY7YV}L?)=aJ@LV44Jt!=P@n%hOH))wa8oJL1`6G>y;)dtJ zC9Q`e`Ba#%U<_Ta#^;T8(=8?#dQLzsb#>vlH^N6O*hf@SW~r=}@-%4j{;wK@GTib5 z^EjE~Pz5!Tvg?&X5M<)e1h7@zv|`tCta<$tE2ibgq%XQ)IR9d=!Fd`98fD`NhZL34 z0uhX0aV2ZPex_t--<(!x?88D2jsO4v=RuyVMKnw)|4Bn(@A^@~ne$54b^8TjP#0fYHkn9bX9)T!8Ac#6Z<7Igpj*~@q}|Yg z4G}~)|NkJ!&oLDG892pPYUl>{8gLx{LD?d#!L@P!XbCag4>ku-lGjx1HxqhopN4dM zMgQ_MndYl!4EI@h92z<)@Pi2$JpEEeVeBO`e}wByYG4N@b(n{?7V%cX0>h=7sO7u^ z$|VCax^qjI_4&%LMdB?$9!=t|YK|@i=OZU$br8Od3;i9c8}RONXCY{^A#Nbp0000h z0iMfrLf;u|+ojQY7#5$*{Z&RT65GJM|LoyZeYv;6>+p(yV9T* zALmVL6;7=9Ay0BdIS0_=P^tjCX47xp&(zks3&Vat<)9*i)&4YSvf8*+7Z92s{0jV? zgE;Wc7?(iKm?mKIWsoOsqKjoZnl=qXxHzIunl?H>h;z z{vDS4Ps%R?^2SHdCK2iCIr-qXnNISU73i zw!z8kG-KnH&h+!hMw=-5R}+&gRgBJ(a==xMl`N_6jX48KKLjJn&g5L{Gc(SqYF8(i&F+C7aLz|H z^|NF1Su$+=g7|KtFEW$DtHfON#5fql``Xj{8~l&!CMcs26qckoNaOsLMXH1rwBM%I zcbl8&8W>bQGHh-qz_Gcfzm@s&WgFXGJc?UMJXXW%9l>S^d0m9k%EKe^I)u00&KUcBoTg93~;H7ECAoZcIGZ7^`wjzVLU zPW4q!BEy`&)a9F(X9>67tJs*w;hjWsU5iJcZmYAJ7*g^c?$%nMwe9<(pouijHyG4Q z7)?{hew53IQODLXzhlhsG9kY2+TNXw3cgHKjp63|MkX61w2X%8B7GyZz88*SoRSbP z%)Qimt-%`ApXKwK8~i=w=WTWB0la6%@7>bQdbYnWBipA4T2hmPTQNGUf3FT( z7h6Wj!N;8(>hUn3&L;VfX_2FvNktleo#pQs6LSf$>DfD~T?y&hM=VyW;uSw zih*-NqcnfEijbycm?LlG`?$(KFJ(OR-904SX@d@j`&}lic(Zlbe>Z=5X){mq?c07j zr-U*Q9w^&VjAtb;;lbEAUiG<;4CPw4<~j`wba;-P=HtrvRes%0-ufN6?}$>X*z5{u z5oPG-!EB(Az~PAK6C(n!Ku{l<$Zf!eJx+~mBd{Ck@xgNu7HfhB%Wt70`^QW8J^}6Q z9%OdAIZ23UDtvP?Rj_oDn#+unO@hy83MU(4yEBqL@jqb+Y!Pu2yuK)V!?bhbU zZ-pMN{K$>rVI@!A35$La-cioPBJsf2BVuRaKiewQF4h729qGSIN#S-lT1fB)U}FQu z5W3b6K&wHsDGrj6oryra<*!r!XJS3yx?g_OY6(n=7B<8?IRKgsO~sfUxTPF%r1cfu z{)|5z+;`v>FGam^r+((tr>WgB&E$W3j;p|+>(H{P#G_svmN;A}cWfF~!fgEHJSs?3&uvx$%NLsf<(yauA))xi|x*{x?M|b2)EV}lgrow*8j~C z$uAXg=})U$J-HDj{KFxfm81ZL2YgI}t!l%Wx^O-+-)*TJ(+(?BRq`S7=A~^xvdqT( z_FzxPx(9reUzGyP#K!+x+aHTw)ny;5#EG3l%$a2@*c0g{AkrV4iTn_A!c}`k02UJ=p*8-3ocxjt-_xgX1<=F85^&8#BsSx|?unmd#I7AMGJ6lsmF6Ps zzqw@}1qKCe@hvq#@lmB>NSRgrOTtG;r3h59U#rMr<$a=o)h?15FYNxu^xDpPYF8G8 zOExTy0qNcBU3ZD+Is@K=lrfu+gp36VY`%P^05{*yFxF`ph^pt!%(w&PRd>&8gNu;r zmT0apo4^zyhk!0}yzpP$Wg@xNtNDRM4oi4Dd-G{pV(XMU{t{7amQ|pL{ zCI#m~r8}W?RJ6hBj?UzG3`TN$PlxYzpmOWo<@OQx&QFEjWpnM<@B7WbK2>TG3=>8_ zxErm2_Q5|!Dna$oPRm{dIU5POxjcAIx?SKzr9T&4Dk81HnTeejL}VFN zG~we+mjf!b+r!fjov>3jx5OcFk;CsXsheO=k9zh93lmij0@1m>%PV+=S+iz=vxF%O z7cNKEpq`ACT!v!^5sr}EPFKE$uCb5+004RcpD=Vn-&pQNq|4XZkP@?m6f}zcC%jTd zAJ4PlBj~N|VRuo175;~BECB-&7`cIE+L670w5K7FN}t*4Qnj+9EUfSGs&#**W=>Px#E&-o1 zYC_*grjBRXnzUQNeP_P470vR_Q zL1>vw2|oaeOG4WHJe?OQxTOsUwVx5~ZAs!f?jYYc!p*yjhxvLHG zC44m8^s?LBno>B(%Xuy>i$oJ8v7=;-~EkbK4JAcIS*)uv+mk!FSn|bvwcmK#%RBEfN z=UO3MwukSIK;FI6b;zF?AM{;^=!>O$;$i|EY8!L8zkOz1L3I`fxLd_)VB0rccan7$ z|HSpP>uVn0=5rn(ypoM|kn6C%j>d%#+2APg?u_scfd^7iaL>Xke=m0Z$p@|5HCzBF zCGeK3K+jRis9-0_Z~>Z(`D$&@b2RjBX~;x&u}|k$IB3dK-%6khsq zbyv+!+=G$w-!2)CVV}7`aT1JO;*OKI)Y7sQ z4}&?w;9e%wXp0osxLsLfPoYqUtY8D=aOFl=EVS-wdr0TMts@QD>o$AxtwNMsX0|JS zWk>~|7t#`bipV`iV~T7L1gN7G)c8%>+QooNvze6P8=`;cfxgsf0$1t+5sQD5E1YAXGl)wh3SKBp^f3D8#m*!)PRKjZ!sCLA{VcZZpFx~ zW7=nYvc-mbjCBTl7vv=+4;A*crhhgmMHRew%m_??oY<>P89Y4LO$@<>yV<-ANX;0LJt!KAzR;|M z_bwjjN7B-H7$3kh&^=`K33!-~wC4=h^E|Lg7_v^lN7!|`hyp?c803VxoJegCn3Q+B z^=|5FqVJB?#~QrrNVtIwx}zmvCRzuz+~Vu7-NwpOHC1?Y)z%P@dXF`&U5y7w)Q^pd zPbT6Pd}nWOmnSqBceAH`nNbbC&O+5Xz+ihS^;QtLuajNcYpBo2WrFM1JKv)5|!H*dew1mYmus8nrO zBTUt(A^k`afd9vJ6$Qthf>#>qVU=Dz{kpzpUJ(CTo?3pda)WfXah&JNmmPV93R^A_ z2MZJJ$-xC{9OC~Vbd9Z>7MS60z@2`iCYgV~I|jj*-M&A6DRsL-Bs{_0QQhD(Dp;y`J0 zZ~^0Cwnr8uM#5O?(%r3dPvig?EFu3$LNx}KyexcN86{tjVXoh$7y#Qi;hq8$ED-_r zY7!%Y2Fp@143fGwKU4S)v!e6t-3I(9ODkCV9`IdYUBYMSjk<#1Gr3BX zR?sM+R_zzEfKg(zF_NqbMxMa%n>WDZu|p|nLL>F_pNup7RC4%jl9MFlnA7Ql>*HO~ z{*Rrq#@Hy@d%<`Q#Q2q+cqHE}j!b&MF)?E#t)zc2qBg1T)*11F%S{YW={YB06GDm zbaX=BTN5=Lis8B+8hX-df5x>q$*^>g-9PN~lve}Y)Dgh6;Ke_8JZpx>W+z0_R^))G z<|Y}y0001E0iSkiLf2zwE&4FV=RAD5My@XwqNOIreAf3>hCygyg*Vf&i`E9`F3R}!2vuGQc&1p zq;o;{+e_u{6^4H)BC1LUzK@=X-`A3XA_rbFPq3{#>A=i~+c76w_m|97TD0iWNsS3duMSV#ikS>W4N;g<=JhXGA<=D z8+9hH$M^O*axl)ZBrw}V=7;brNyaL*UH;NdbRFGbMQg*8<`!fU5jJ)ZjCIQJxkk7G zyVf+e(4fHKZ|+MHob&-z<7uN(>4@&0>d<~bKfV0ROV=ii#))pci+u3p9c%LVIEK4v z%#kZGgtpf!VftO2^r^hBvcwLE#_R`tRr;ln+bkwKf8N#P1oWiTZF00!c_#8k@V z>Pe1sF2^pq2Fj$#QywCUL#0_>J4~e_kGp%P6ti}=csV-t+D zIFN4K+ivGZB~J5!gQ=D0e1j}N8@lSItwjXVq28Qiib~mvWzhM_Bbi)o?5i^mul7Pu zR^@o!MB!`QCM?pE`(lBsHGrke|2{EKW}b+BYy5^bhAB~Ja6*e5R!I1Nmj|<^8J)wq zMw?zFWyfIuQ;LROdLssR#l`5~(DPbG_+1)l{uuMj5%E&j{x zO`zGj4o@E!p5WX4KU6Z9@}9#~|BzBCRyJ}CtbPq2=GCMyihjcLYQ;#Cr4M)L4peW6W+)yw=4hv0(3!}x=G;5uCk?V}?=Q%0 zoCJovXVB0H_WEf5GB4AjV2+==iStcodnAZF$%Fn1`+FwkxHpX6W{Gt4t_gm!KmUw0 z%h#xvvh!sN8T(duyf$(dV7dJ|#}SOC|gP2c^%ZiI=x1l=TTW=<^nIpT~*7^^n(dk5S-N$wSu+ zs#aoCg`Ktq+sGYn1Y(}U={n$ya zn0L+oyqqH2fQbB-Ha;a}!e*=Et6oT4Kw3>-HJTgqT&Qts5b}0@MFFTclVZ0BSl7r% z|2l%ME+_2~e)>dDNr_*gOz_xY``>Kr$WWFusJ}XKLRiZaALK10FT2f($iDH%wbsQbRs-$hB;+8uVIPMMuwp| z5BKgCXWF5~M$~XdOnyFRsjWIzPNk)A6qGOr?lhaQKxSekxm^z`YOJ$NA`BG>VTga! zT(`a#bY8FX!8DbJAoEO|<`~-jH(A7pzM%jB0G2_U+ezUMCQ|}$02f^naH^+j9pmm5 z5~wSODuJIoI47vHHp&viDr?W?s3zwvmP0A~1xF^MJ!bioBmqX$_lKwPe-r*|o5#ti ze5J*s!@1E><40{ZL>V*6!x~K(q?v;Zn2EB(hN}u-SvfT2)zLaOq^shYSx}nQzCewj zDVG@@=r#4RMHi`)kjej+!5jeE2_^?w;@y>5qmdTXBV+ZyuMQ7 zc80X_Icx^H7l?Ob@L0wdW~jQ4{xQsD`*1e|fnwV?gpQ*M2O&MIi_GMSw!dawqyL*K zfy~xZHtTED)U)~Y+88YTh!0K0S>XAQ%f~(ac@ryf#M@(7u)yWz6Q>mYSyzFtab;(` z!ej^qS}uj6b=?@K+*XC-nGsleaL3(fU=yK~#%TdCs3Wu1&DWNn)B#3dHZdNEB%;lf zwVg7(BZ0SirDabQW`%cw`5#%W?`H7O!Z%JfA7Ra=<-V(T)wChBJL>!Co`}J&PV7^4`pz#k4Te(f)}7?^ z&cuPQWhA79`afJxQ^O&=ggAHMY(j4lkfT5s8g$&uN2F~o1Aha1YE_{=fbCEA7gI0< z2C|e=T7z^W)QaFyN~5ZXy^fJLm{1#{9-Du!5%=uNhz$zfHa71Qn@qvbvC88wllp}u z;>dLGNfX`BR{EJ4dfsr8WGd1!{8wipmJmIO&pdczCm!SnF}(np&Y4FR3mU|WUIVKq zF926JE3x_b;>!s);4-wsN{(;Fz69>@@^z1p%oe%dooqqRkLLLXM-;&&Am>Ev1OMt6 z*KCSZXU58>q-_JAvmpFf84J1(p+-~dO5)O4K&S`B&tL{gbvkR4_pb`V`o~N*F#e{q zN@)(nJUb42J)gm2$RJ8^b^PRH+RBR6VDCa+ z^Hu94^L`Aq=Be5luz{ysD_gWXaFo@szK5exSpe*^e8Dq4SNVe(YC!?96({$UHPCwh zG^se0XBLmwd|TWqqg3{+x7JB`;^Lh%sde3?Eu_JOyIuZ7A_38#%mD{*bS_asR&;Ar z8@vFQ=cixREzh{&bm_wOC3hpAE(Y`m54n8?m4_(^Aat>_ywo=X4NxlxWDL4$_8Q5b zBM9rVC&__BK4yrBOfcuA;s2076W=c2i|fH=XFUt{^WYC1)_!OdMUr>4Wq*bo0}~S> zm89L)e2tYI}|4(^AAU)A|@Q z7TRLaPUcKQMb|L~p*BVKWZ9Cwte!SD6Xg^JFH8EoJN{03@0cYdqW* z{`0!Q&BT*IuffI5<_+$GJu0Eis2&9}6$tR3S#~w{6WnF46}%0k6bU|*eEY=c<{M17 zSdr3b+txpOv{Sa{#YpF$6>=;-sJ)ApKM50GJnp5SK3k^n!(9f)mWp-O(%GaKG>j{A zVsC@rur2y$LN~8n0000xL7p8#7XM3QGzBWsXaA+>S+96{0g{#!WUVx3KBwpJAE3!<^`U zbwFc3M z2`9vSOa-t_%Udccgs%x`000G5L7E^*s6l9%OcX@&e+K5QHR|ReNJRhu0|Bv8wkZ~i zG)U^?Dlt>yLEF|I$4LZ}d$89s5!#$@Db}{widbzJd67Ik58QoxfB^A95*wTEVWg;X z88`-Uh#PEFSi0H-v}&t;(4H?zJga>xF;D?rAE zt;b;Rt-C0*<|Gjzm!~$7q-7~frTRR@M0h}PUT`f%p%#gV-l zLCrL%&DO!rPJjbldK(tj;2lK9l!hRut)osuWzz8OB)$3 zyf*HCWug;acMd@zKGYoVC`9)Fe>jj&gd`6#FG<_yPk>)6)acCE-PD*00p+kGFiY_& z#dM11?Ewiih8J@Q=dwMAC%9{xS*Vo^b)_z-eD zZ(zzZh{ykPIwsnf5B~?bE~N|Y!Qe0awv1oL&BA&D>1B?Xnh3#Qud z$q}4h;R!=UB56srC}A5ELOKM7(G`Et8J7)E+GHK^RmEX!TAv=V_Or@_G z94>D$EjRL)4`5;{DP@F-)l6xn^4eQFe;`S+aD>+9PR;RK1uK0G8d=%6d`&H8(-&4@ zWu5^)jx~mkOi39JxRFw*tE=TjZ@-Zx&xd*n7zau&A17BRFRdlN>g0I4&vPJld6_#F zgzR`uSZWcn$Li5|My0v^do6v6FD(q$zr?42Gv-a)gg0!q1ksg{3&nt`wJY6j^{8%1 z)yYB-v+QDEq(W|;mf*~)j|jE`7$?4#xHPOzJ(Mc2_!x-vi!o!`o7_1`i|rdF*_Eg& zopR7T6OSmHzSu>_d*`46GDNK;E#2;?-Ko)sy4zfP5%?Tvgv&hI+`ceH94=~; z3#}++HOD?iG7IIA08U6wE`z`YCNrd|B-$YWD8u^Mn%a!ZU!ur6LI?CZ+KiGc{gzNt z?eBb#!4wT9;om~8Tq-ASeoCF(Yl89g-Hw0a8R7rEfLw{@Rkq{7ZJ&7ZVg&imw?OK= z-n}^m4ec+~xNrOq#T-W;d%j;TYKH;0T0>%=z9oI&&#s>G21|7pekUyjzC*<=X4Iqq zV8%QH#u@!){Hh>A%%6`J`4Cl{!u;af{V>#QgHwE*Za)^d>vVQB+OTU*!5EAF)M5BD z1Fs)=($O{a^C*#pV@&?zf!h#2@Hs_bx*kYfhg_bZ@$o)az!7+H(1+%fRp5}9bwR#G z!9eaRPe8?C9=iKlsVip56&$bdV^%|~7{ z#Frn8O@*PBp96|GD}gu`lO$=j^&A(A6#iQ@^=R}E{IiW_mhw(itU-M=@e!lPlwJQ~ zahhAN)|`Pa5butE7|W9CrR?x|5~FjN%%0+?wqZMcU;qFChyk8oYC_*jv$N02$NU8` z#*_L{*)w4IHjx6Q(B1NFdevE8SrgqdX(AaUJB2!l#~bSeF0QeZ-&qNCvo#ZWg5Ydr zqyU=V5?w6p51nYu-;{wzG%iS%xxUS7|HXaT0w>H@{;6$#<+ETy z2Rp)=zJM)~mvvtprT?YISX(=yXWB&iJO(L$0~*186^)4RoIVmhff%bfzz6Y!`OsB^ z^A4679)iH?h@~bg&4^muNB4!0+#;nAN9ds2zqEWFUc-bt!-y0WyI4UoU4DR%= z@CrK$Q+JmB`AYHs6uKl^B$TY_V)D*Tbgyo&-~t2!PN)4A&KI)aAN{6I0J5xW*=3Qc z!;1EK(On~g7sOKx?87B08D-8pb`AP9P-yWtYfUMEOV7mDN)W;BK6t@O#rttPG2)w9 z3X5w)7NzBk-_oireE
  • p_}BN#P1oWiTfHoI+BHqJ0?$l-Bi}!G1-h-;P7e z;eB-?5yl#PDTbO9s?9B6COHlwrr4ka9+#Sfej?cYUzc^r*4)3%fv(_Z1Geh17F!=k z=65|oCi(_KZ-1k5K@Fa?+}AAueuFAClaVpXDnQ7 zS)8vx77dz{C~WyHoKEC;Z+3iRSXcdV2o&3VTr^#ZPE3xYmh{|MHX3yT7Oo=icPK%w zSKLwEhj6~HO2FteLTdB@1fK#^+ZQo@m+|vqQrgAX$#w||HRQ#_q*Pa*tWu|K6U*Ec z)jR*+uZeffBhx-g01Qt5rs51v&Sy+c4gLoa@2#VdduTQQ!!LKmDc-5^%_4RtULge) zIa5l$Z=^Dl^Io4Y!%Mi%sSQJTNa=kSdNRGOm!D|&-Y zC{@76cttuh6y&2l9ZyWd;RE)EqF;y*4){P33?qFAuJjG!n>n$6xt$;;s3$_}sN-Tu zj0eN8-oy4$=cf!mkz74k3^C|R8tORoLSlc1_7Ro9lG7$>AKJL-T5(YQ8%vD%GkHia zYMZQBOPPVSCRny$;OmCSSTr|APvk*lr2)VHMdUHl z?=O9l!~?h#oN>tWi(&vkRwblL$CN!>$7P03NNBQzWXp{x|$)_k4o}ez83Ct;5MB{z*@n&=9UT#m;KF59&$*mHftQl2v&N@s) zy?)VFO!M?#gzqm5ir6_yCM-P}(;=zyH!yvbvbNI*=iVTTKQLxTXu&V8QSnV+pg*x#A}49JcIAnrRYpG{EH!f6Lrp zp3f;7NMa`k5jif*RR#0SGGeK8rTthBR{0@^K)m#O`CCTbnQvKV)0(`TEYX< zd^RT%+vl1`>e%_~$vo%cyQ+CBJ>{DJ&_&~b0004rL7rkl7XK_Ic=D9!dzY9zz{Gn( zE|MK1y^)am^saBG>skFV002?^jGW$@GXHW@$&>*m+K)Om?wwIV6YPC*t}{cOYRFaG z6j3A-eqFnCF{}wvW!IDDr##Mj2G`J#U8RiG3p)+J_Z4Do8d=vW3uuk6u)3>;>HJtuSJ^{bapfskX_+I* zsnp0`q;6*kP(bKjEryo-jk$ixE;+*Ephf?xrKYk<r_ZjH@46#5j?+auE*fXKOOcXL6Ab2MZ4V5w5)gl8wM#SS_(ia zGlhmK=y|HNF)-eC&;>9)HgFw^BgZW5!c(-nqJWSF?n*vB{w{uFO3@wa~1OZimmvklQbghKjA znPsg+TMN!{+s(dpnYOEB4X^gzabe=vcbMc_J!q_wBB26lJtwbFjDNfqJUpc@%UMC9 z>9-eHc1i%lem8z9Y5W?}g#T1=12=QXm*C7VVAF;|FX8n;N~-mwKmHUy2T2)NGi8AQ z00nPBnx{$O3Q}b-CjS7nU0A~OpDByMzqfU#np!Tw=XYl~A>yhvN3*$snqelg(Pa@B zPlP1BU7lV$js{BOr@-AL_4;MHnpX-B>N&U|q-UVF=jOt<@((3!| z1^SK)R2fLe7!HNty%Gc@b~GRhdyVQ!qBL=I#9RI8GlD#_7-qw(N`YM;yGBmIRka^> zKSmfPU}-#&DvB)zXS?F8Jtk4MoEz)H_cI;ZT7WqP6`5n^>S;C`P(WC&v=4Y{eF(|0 z%QgccN~cxm?!4BY%RVZh=?s*CQfCMQ{uZZmOk+*kypuAb*ZNX4~5E!|X7MtSw#@8>GGAVLo+K0w6*| zP|4D_abyBNAlSU46?ygd*gjkTBD(G=@Rz6a+9k`Z$Gd#-hvS{wycD-T$0d;BE!Xd%xQz@m*@- z4Rs~q-wpH(IY(tQrF0ibSuB|yvA1Wt@gubZ4+$G~`{q(sOBRnwuZ+d&dR(F%8?1~| z74*yCO{xuTznvn}g~FnO$(}-0a1DhXdqf@-=BcOv%Ef7ckk!fu-}OIAu!{xc<=4 zdc8^uQHg&yjgsC&k648#47yoYoJ^n* zx;fP#(pih4VtZX^HcL%ALcbISMug>3J$4cKKQDFtD+24%NsM-n1lL3`+7X1je9IQ* z1!un3%i~XlaTf>Rd>mWjqCcM}=F8CXyB%yqJN$2%$q;8hY2i+Sr&343k#lO%^;C|p zcv5yDl=`2I8TUVKrZt(!cy`uVD{OGesT$0YH>2LveiVVP>2#NTFY45d)hGF#Fd`Mu zH!}U#jq6akP~SN(pxAnme3N7YF$yRSD|ilteseIa4UR4-993Cn+sn+!L#P2yZ{ zD@(@@Oa477xrk7sSlk{V$Q)Oiy)CwS5}A{~TmrA`WJgV25{1#>S($ThZjtw%Dh^D% zaZbiB&RUN}T>68h&+@J3I43pK+)5WM#BndG)~PhV0Jb_;RpXU+Zf@=6JG=VlIfYYPrL1{y>zc2HPx@T_WlRFERn2*pZ<|r7KWtG$#~*t?1+0#2>l^K0 zte#aN$>#OzE7|cQO{{#x$qLhdmgMPwfO;g?YM^V?wIV7(_KduQP5A%-06jsT#zGeV zLV}Je!p(^w)I01^KL{E|=>JcAw6Z-9QP%1fw~D)Bbv+0P zyjq+*LF6QpIpy!j0!{;+@s@7{kJe}aGW;hTmmLI4Y9LCIda5u|As@OurW-rpDYn85ZKHb{ zd(mb9ocIG(Q->gon)mWvHdS1ANy+$P=0C5nGVD7AU$ei_+UI+*dBVn*P~kn^_c)Sm?%JRx$NxR(cjVTy=txBv7H z=K^XhAP8!El)Y2HsnK7J(U;A3+qo51L{TjwClqZ5X+;Wm4Bd$~|ESj8pG&tRk_aBd z(fJkl{pbcZx6=vL_Sr98%xC6H*n)s^Q+lC=qDm3r_d=j)hPJs5oT_HDlv_i=Mz}xo z!WXsiIR>opEf+zs>%4&bn1*$={Q_StDF6TmZb6#qNvJ_+nM@Q!^MAa5Ptl#_^fBO( zY%wkItCQ~ioT+@pmu91q8AgieoZ;16Y5@uX2m5ZQVXAti1dYphtGU4->|yu>?huYOJdIpX{lE6Ek@|PRkf_Y?Whgn#fBJ@ z@mLs|M&Gl+&LU*i9HS;)g|7Kb3>$8hh-&?p2FWitfe!Vvp%b(}{rq_!G!Gn8 zs4f2^)l=qc#3OIwoMec7eRzSG$^{Ev&+w`60&Fvw{aR)9K-u) zS)bGhs6|+=4KDCl?S5rXeZ1BLY+dEYl(A04e)$@vvNN+iEZ8`(MabUx)C!WOZzgc7 z{Jgmq>5c-t@>(t6A{l9=mZ2(#F7#;`Ou$A4d^i`vHiI|hRwMycKzkWF-a@%u?tadO zxYy+1_xyxwrpVusu=tP@)gXaH>0e(0Y~)BdC`tZ3tu^7-MWRa{kAXFn&JkK+`QZ{e zF8|aQaY70VM4il!F<~@Va!(wiDg)oupGfZ9 zG&JXIODfD-!tk@VLHvj6@1U&%_hiZWlR!5G9Ni{M)r59HfP4v?%&{nSbL%8t(?_f7 zguT4^bjb}5xMCUH2!^RsCuJ4+aAHz}D~Cv#qsC0Re0X!}BQ{03^|b^-Wh$%H)gJlD zGA9~{Uxte{<-2{4>hK3i&5l|Cn5h_aA009Blu&T}bOA%%wMXRf>pX4soi4}0t4aP# zgWtNq!Pb0g#bc^Ii>$4HQrhwfx{k=&juN7oXoI*G5R7t(cCXH;tpnYKER!JBxie)| zzBLgfebQX7t1ZqPf=;NN{XOzXJLr;vX^}+9msN*z!_7TjKrp;@MzL$K9*Q8E)>MJ& z$N(Evst$jb@BFm)9!msW%aT>N%wWH`c)yC`ZU%EkQ&$af>2F1<=W%f92#rp&D45&c z(=kjmL)NFuA&jrOC_Qmt;`>Y}DuuAeA^zP<-$Uv|bqU;}Fr?G5u~m+qYaTQIb30Dg zmAFg6pjbW`7rMk)6Z$t`IRzY7a2<>pHYL1hf(}CYRRPc65wy1%d$W0Q?ZNQ*5J3T36${DDbJ>0`VEJH3s&a(sL-g9{BjxXP8U20e z0^>0o+mK9M{*syn#v`EBnaHB)4COA(J7PCdQJu%7{ z_}8&?_<`B~(;+6&DxAs;ict>U9~GlIO;xd?{i8+JR=3;oWwAWh9;zcm6V1%HT26!b z6}K6$JP$FNKZ9i_lc?ZaM-!oqN|CEJA&V(!#(d~0gWuMUVaABNZh9Rzd8NP0KgwwxJx=I~rrfd5bD+ zchBE#&D4Ng>S-D5UnS@>J5)OVX^d*=&y{7&A6}pSeqxTm%pnwQDHCfG4im*em=B@w z{;d?>@yoayoT-A96bB{Ri?UPkUdwEtAFyQhj>3ZVwJ!Bq(6LILP&v4kPQr*yzzF`; z=OMt-EkA!VdqyRta?0R+o2NNKLZ?-mP^AhpE3x#?IUaqrD z+8pw4;7hh9l~r>KaaotrHVBr1l^ufrId%fkJM-=Ul6_{oR81RxTEFZ#B*14!2!P_F z$fIGdco2}|xO}EITrTtT{|-?OnG!Mh!m9+)YYV0JU!+*;UT8Gouqx{iO)wCC{H4wLz~{hU3{xN@CX$_DSSRQ%Ut3DB-= zn@~Dgco%>G00F!Kp9pF~-%Dxrp44Ee2l*!J}n}($!U`TQrUX)=!$w5*mH`Z*SPnQnu)4{B`IpMwwWIJEn7B%rtW5 zQ+9NXM>Q(Y!4yR03;iaL#BH9N*cBk^D8#d9CT!L80mTh!P4-SqEFoiK8fhCH^sM*M z@NPulLX;)o(Wqs-XhnF(9v-`Krrf0_mPI^(hN%^E`5(z19enOlc7Z5vU(l}~XHcv{rlJk^nz0gE#fxEq$pq>yZQm zt|fdIRE7g_!X$-B2FFLTDT9)T8?>iThJm7OMPp>ZC$cMRd03?Y00Ip`n+i$c3Q}b- zCjSz1i}idc!>Sd~{jRhshVmdkln1To2(zf!;sVGXRq()7y`8ryox_++ho#-MnNp!r zQpj>=%DF!WB)ZqFQ|7=uU6qq@&jl>|RgwF<)ZV_`#%Lusz}=Q^Hy}u5WS&ln%B>8R zeRc&|qy~B5RnFwIHVA)tJ}NsR%Gv)yY@q$^j-Y(JI5LQX4M#8OF{Vj(fyi8y7I_M#jv`Y{4)vcJ}^SO%!W6~4x^2SVMR(P!>2f6%1 z_65*X$a^+T3<)edZ<6~?$k=vMBRJpLUk?__uA>Wv&7?x?>o~??nqGfGOWOpwXF@xh zaQTxNEi6;}QhH??rhjXr(skDYdZ_{j+u)3|B$~uxkB6@zlNRs@0Br@t{A%N&KNA4o zo>h@{OU0I|QE}DC2TWv)vi`tci1DhpSbc_hK;tx@c|$cl1e;50o`&wZu{kr{Lgkv6 z5#2!HECqz7vkEj}SqQfgG35wY@DdJPoAY!&R z#*YK>e_P76Cz7L}zD9SG7ItM}e46&CY)#tDbb757a>^nzO?m&9 z-C{ExyZuYw2^Ff(G38(4BeFa!V(MUMt+K)34*5=*^Dk97jwEGc25U0EVOgK&NE&1z}I_CU^{tlvfZu!!%?9hevwiBCw)VH zLx|)4W|cEQX2Aa8@KPPvR-bBXvcazcZ#P4f3PA;Zuk(1B;0laF8>w*_&c1_ubWnyQ zB|?S8;d~SkrAyl6%ibHH&P-xfqzN@$MaIM~5Af^k?kf6rmX`2Ys5_vr#-g>b*?V0Q z^~OqbK?c#DWSA;b9MkSQX25n{7Q+bI#`C{2wyf})#})_a(| z3xM9CT`)P?-?-Oo;i2x(%N^cURI7H_UZ2|1RWef_<+d;sV)H*2lTu{SLmRw7$X3JYvysoTxYKA8%K z=p_WD&Ii2B!Dap1EI6r*s7lpFc-Ip;@ThDuRKQP1(C{^HvrLdA{|(1>P|3%^2<=_o^LZP;q^0QX8%a?t`@Af^;bR+ks%YH!qZ&Xv}Fnd2gH|&*i_E}4T zO->)f5)?TJA_^HGT?VSgXJ;_eZ$WVITlSgO&I*SVTzU^wkXcwDsH?P1BcJ&YYOm5B zqbS&#JPN?ND5Y}Ii(0JDDD}uKpZy5E}UUvXx~ZHR#d1 z@roIvqT7?V&*zWo*kqeKTXmdZpI2NFlT7s$W(EkNttw80NH}Er0j-ojK*58t1j|P- z9~>>7J@&qvv?Om`2AU$WHPL~;-<*os zb=vWV2W8Ud$qaHf0HDg;FC(mJYyAQzpr1)mbsqo%>n^US=0wYxVVrD(ewNr@&~i&) zu&%br>YuDd5|&>Sx+~EW?DEg@UWvKajgmJfkMlh&hbAL`WY-PrNIVsGumr~8Qvgcz z#9s(_^0Piz5t9nsrf@3lzx>;Hz5#M6Ei?@Ua|zr*elr9aAEMJd_AY>Z+GcNbB^#{K zh<5SG=m`}&qa%WndIdXYM-_9S><^=)FF$kKWbpNPYsWQ-?0!#*LmA0h%U)FjAOVvq2ZNar&aV3!hxI_I1aK* zs&tx!23!1?4gtmI3+qiE&RV<(~ z^Uje9X!_V+_1;#PSdVS~;EctE5PJ+;NQS1WlxlGMGls66^fzp^J*O2i%7 zL=N-XlP;oJSj7OqnduwiM<4Aw^MA7x4f$4>Z0X|)A!>f!)$N6qiZV~S+70``MrBZI zy;f!v*_s9~Ke@)%#_OWpCR!x_e^rqza2%Dx7v~E!t$yO4I{lW|`yw zC>{!<Za~j2>xG$BnvKF_jFG$lofJ1*Xt>joboViUDR~5R++l^LJh&G~L|Sq+mhn!|BOi z$iOXee`MQ74aXpaAHft9p+sF>aw52{=>KcS&DV~|mXF~sv*-iCZ3@Zf1ITR#pk|c& zDF(;rURC*Fcq2${vZjc$Rw38*qs3L>CK65&#J`XK000C8L7Q$#s6l9%ObNdlN$5F^ zc(6VKJ58%AcX4#c()g2urP9r23TbEVB|o{v=eBle+kBm@0)2tjdum)+p6#rZw4MXUtdOSU-D)dYFe9GW1W$dL*+Rj{ z4HwBvp-JZ!K&5Wlg3w)Z&&Zlo1C@{cjPIT0?Fzmo{2$-Hy=DsIu#x-K-AT@R*mg&k z?HmZjTS8Wtn%g(?g|La7jyAC$qTXP@iEj05wqZ^^4||@?zPDw~^tz4&6zW8{_ExSn zIrDVcQR>YaY$wogxWn=jww{%O&YYtmhmp&k9RAX9k_t4#x<_{|Wo{uY`0b86yb?v@ zGCT0wv#8GbQj2cN6wQjE&bwB@F(x|9R)IN`p{uJG6|KUu9!)~Zh7SiUtI?TTg~Nj6 zA3){}w5<;LcL|6V=BQ6$wmhsX!SZLPe`qCTvg1bqHJM;jxf>DRxa;s)QqKk!c!YMw zt(oU6r=CYpr&*%n%~^U2TTo~bU5%hJe(MKm7M^%xan~Cpr4%}}YI18+w@!YM?XB&#k_@D6bTD#37_{Ag9W{`j;5>qBODeka=24qgJ zds2uTpAOGpt-vob>k^$3K?YD9m1?VEbOmdC==V>-Vk1iE02Gdq)m4Ex$=(7N{8Z~$ zNs3Dr;+hl&*QFO8drrq7B z#z@e?phahx)rsDfG}-Kj>Rd88_sLJT;MFuR3SRHigb|SKV<}wUIMeO9vhWbCqlblC zIL*hL@~7{u#{lqE$X$a?-kh8Bi~}u7Qzz{g`kEb}Gb5Q@SUD>dbr?(Svd^f7@>Uvm zt0mcrUJC_Jn9=I1yZ|Z3$qEeW?;R0fXiiqv5EqSTdv`Da*8<{>b`7$|x@%(*`3TZu z0AYvWY0%@A@*9NO+LOe2t^~T^XK%v|>DkJq#TOl{L!8qduN{_YjR%`Qp35?PBs(tE zZ_LTs^A^ibF@~EiQh;?eH6C_wxdGyq!b=_^-)f9Ij1zu)*wXL!MX^8Jv>wj)Rf4z$ z)T^s$>$|5QbIpZT>D;v*xET*l@1n>72?B-G48MA6cyO!quB$D2KC1I!PR-KK5W_}@ zxr4^a4io-_E-lVuofrmUu5U6U zRoIQ@Xu zmZ5R!G?WXcGi$ap;01V@3A{Y{1{iq{J23W>!!fHxEvc5s&&Z)p>bKp zo-GU9z;4&Q8K7t@Fp(2lE!oyV6|TyS$LT*_R?n~mF)sTRYQi<`iJi6f#7=Vsww*1+ z)5iI?)k={9>>4Ee&s}Kc*fo|Tq^K(0$A&;0$=0>2?ANcX$gRq`F% zM7Mz$BO1E2ecEJzl3#;3r()GHOC0o)pmuA<*Bq&VX}U|Z-c)BAfLaLC$;HOE%Qv5< zzDK)b-j%kHVia2}!|-oimQsZQpJZbA%jCTydlChNU(V2f;)D|T7;3AT{_JW1bomFj z&%>Q#ybsX-dEY%%Z)=W6-5j`3jV+X|rk8-MAnE;rO0E~BuUd5&!VBf$n`;57tqaOx zjXdM~ICuM8yO9$W7KLlSb0*uRZF_@f08R=%Ay(u|dJ!eLm(eKHUE1qtKHaL`=4C_| zY(BKp%jELlm>C_Jr!F48QE{I6q=7kFw~VvBbV6ZuYHk!eZ(y_$&?!?mBD6vH?pOp{ zA)~nk(^Jpj4X2w8J^X@?5HX*^c>xhLAMjHAU-l)HJ(;@PD-MQb$JG;+ZV5}!hBlh% zlA!mM9NaN5HJeDnO*=IJ$tEt#|Nl8|;+~%@6t#SC7#0ob4+L;@2h|6?!rC>C+#f>= z(!w@z3B{i^#!|v9#th=q2;wInmfRmc;R8T*Wu+c;c$d$xLQ)|Ir@P%5&(AR-=sUdT z&V47;QNi1MW4@A(pxj-pH) z-7*nNkOLG@y&PzsSSVmak!KEs?3`T&n6P3!)$n*L#-HhitqS+MgL*v>88(h0#zD~Q zEYesG6JL(uG0090uOMM%#gm`^}$Yg(O{GBHES?zZzMaQ`8dYFS8 zFbv}x5!SXw?dN)Q@=y|2G=h}Ye_W6G2B8YEuJf*4r8j=dkL7 zZbbaZw!6aFO#2{YZAk#7BQ)pIs?mOUPAG6;kfl`(avZOlJazcBoHNyC@j6!P4fP67 zo29ywiiJc1iL5mPfD@y6BA1am4C{N3BZeW8X+z%L^<6R#dJ zdE236sY7lCyqFgDr=7*8COr7amN8jn-C^|d&2SPj%HIdQmc3)wq+*{zx}rdv`K!)q ztbJK6Er3!{-lUODmS3V(~D;QR_lJ!Gk0E-2Vr>R6Ff@u4VEv_Ez^`5v4{ zkF=44;+6EcJIhkJqLnR6h{xCE%;M+NoiERUt(Z@+ZH^ikVX>{OpXqJk@NtjlsHapF zS}lnPExZ>34W(tR&`9BT+WJU|B zChjXjVP3v&;yy>7h7;i7)jrq&001xnpU-I!w=s7K|GEKg1!7!`2n3Bq`RUyJ>pIW2 zKG1034^od2^&;2ueWk+Gn`sUJ00XZvw3BPO5wM7>ReD{*@o$xE!*X!J# ziVkR}4}vEbdtI-(Wy2{wE-_NQS3q08k?iA z!%ye}6;?guGVK4};rZ;<65WRD2|NG9lL?$b?$DOoAgyJ_);wdSNCfr$)IT7=3l7^+ z)CPyJ|G&fE8`TMxjYxYpzQ{aOjC-AhoN8yXPU)G5y{@e=ddUu9U z=K99S-p^=_(3=`;@@o9Aq2yXn-`5hLXM_=fxSv8vhPilEsO@h%khdKYI)_XmeVXKY zjp0J!N=Pu0bFuN>@q{`J9C|>;)#R7rM8FMAdtk1!TLO#@?I^PdR z&eQKc$@~I6SKI7^Kehf$Tx+zdlrGjaOAO<99l?nT)~=@D{T z3^K^A<+{!UfUZr+NkxO0oq@l;syJwKxksPcn*CybNtCb@xKc1KBbe_LNLqjEgtv#f z5S%dWQWp>PYK#^gc&hMLsW#H1_Q;h?5 zkjWmm7AuXq$j-?cj#JX^8WeUqsFUn=K+So zf$#NPO`hqzF>1VFRDmS4gm?LR!#1ZS1sg2M(;nX-U&{y1mM}nwTXW(=4WyWB0WL}w zp=wxOhyPwV(Ix)@OR<=bb{5h|gWQ+4M=&L5_Z6T;Tg*A|R2mmYzcww)GZKHybAi_( zG}P9!ki}9?A%NqBz*gZ#jl11+U1^%&X2)fBVwynOf8m_d|*lDgGunQG-zD5vUB(H(Dl6l?CS!x01fhWawV;sp$ zs})V=X&cpBHYxLexv(A9&l}S83}{=*WaXHLp!oJtQ`otd;YAGGqoX5U=KauXF1mHh zF-i{qHZoo(MuXXXA9j=;M)~^iLLh-fQxeMo&RGBe0Ixxt^GV?fQe`kE{{V-q(uH5A zc6a-w>d7&|oF$G_Wu4!{%AeD}%2=8F@qo8COx4*hy?Pt~u<`?I=5`%Mr6K(|)$hosW2D^Y+9quE8k_Sj!1B5Mkm0-c}W~q%nSX-AC-!>r>PMVuN0TCEZ*Ge-J@25n-{FWKs zT$}hDvkX0HGmX$T#z+25bFY<$@tuu}aC4MmvA{CM zZp4pkH5puUYog)IBU9e|@DcFr@3d-!#;Ol!8p<$Ad;{de9{Z!IL=g&x^y8NnWo|js zWX&xZzfG$rKruoOUNesx;xhXRKdWU;_l9gIsZ0a!Dr2Gph=`n70Ilerop`|o(y*F# zei6}d{QweK2K&(RGLqfpPiVZMRb$X?KD^fwP9*69&3b3=Qwy-9W?>4oOj3k`!-ka>n}jCjr0 z5C?F^GdekgCIe#-3C0I{>EhdE8N>jF!(qHBFDINOUV>%bJ_d8kHS*CELAei9=y?wG z9(Nh@u@l{rj1WADE4P9KGl93RsnTn;h(?qUH#=6{N>3dXTrzbz^9SIM8=1$R;}*s0 zD*AvYb^Dy=Rjv+4(52t-SkxrO85`wg#q*)$dR?1&qjh*<#Zf+Ke~jnfc|Hz0EqqdZ zaT!FyJjrIb;*p(cj}Gj6*~t_M%gkd>(kB?5Z4Wx?@v6LA#1u5sA`B@l(hFV)cIRfs zEKacdz|+LLeV?I?>0#{K^M^eKGV_T7ssow*L)J0NY4t>ksBNeyq}w;=@wla>X-C9P zjOJ{P5bO4-3dSF7iOxkfF+;kIy#@=KdA@;c4NLc?)3h3xOM}r66ZG zt9^vrX>~eF6_)rYR}JCg{@-uE9bfiq@z;X&5FOy%DbhAYrUrCSHAoR#ke|jfjE<+{ zOYd>wwcn5uVIs+T`(rroR9bB&aKEC|CLhc(khf2wrc?A0tIwze!8k)Vzrkmb`D9HB8!%t3R z+We*}5n%dGpsVa5t3CQA$?4jAj(B6)LVF%-D*Tjk9VdC*1Mk{871G+S9>gQmPO=|Adxq&>N{n!>RUcLc#z>5T03{E_1nT(8r)bzs;C1b^V zd!s~7R7`1XiCY(@EVo3ECzlyJMZoVPu|nD(X*9eNhUE3LLzXr?oASkpOv+M{5V9UN zc4{gF4)|eLUGyQpcjy*9!X&A?g4c0MVKNjz?~^I(-PVcvbQx@I0(Zo*frHqZK$hehid!tJV5rjJp!imHAi_+?%|;!|kg-#)i!+9<`&5<^jg@xG^jbhY~0# zUlU_SpjKO{)G+;3xnA{sP9Fs0?|t1`vpnawyQ4UcoR|YKK~Ehhsn)%;1qnUL{<`jN zyTWy*(B3S#NKc!8%7(RdD4p6d{u9>Pd+_^2%7+aXer;cu;$VO#{L*2vGX8nFF1y0h zG~>X%FY;+bP?6_l3N1Y^JP&4g9>Pg31zm0yfcUWHZ)C zcw8M#HQMU-zg%iB8)A#oNH}Jg50y2k48m!|cwj7!jkwdmT(#Ug-2+9rGm7BOXv0ld zobXyJR$>@QpGZD?bXiYyMbV`u)yi`|5#*3PY3657F_T^=2@#9mxE~u9>L4U93wPbb z3pO_0&$z3DpTI{T6tuW^X~n+Lz;Z8Pvg7BW+}C2g1^>7HJkH^g17>GYGaAK7!erBK7{^)!xq+kPWgUU_*H0ea+QXGTGWex2= za?Chx=ZhgViehy&3>u(J5!fAtaH9o?3&4x1XwT5k=0rUnT}aLynLKSB>d*qBx`{KO zW>?qvX-x;Lo@sXSXrksU?SKR|@D$yo9f2T!NBQ`-69IR``x3kiuo^1~1HlJ;^NT31 zR>0LEV~SsmS&(T#6IgVh%l_85Z82nVG4>71;Wq)D$rkOUcsMblNSCTsF&7v+E z+lI?mGBCVT-RIR^SMKK$Gv0X4tf$Y-M;|fhoQHjwT=zVa_SWFtmgVEX{DCePs~6`_u}Jv`Y!@D0HVaC* z^aJp8TKDgp+BU}kc{>RZVIeve7tD&UWCrx#~a8&n@8P8 zQDyB<6a>4Po-HN97G~_JdH0@eXOndfRUlCS009(1nl?${4<=IrZ#N!`I4wgZIRF3y z0X^aq_`Tbl}j$AsF6(PRs8}_9e+UqG#5C*C8vnm!3XuHj6nHunk_>+5$wP z>JyY8&Li8sTqT_i$w_aN*E^k#-ngZ0dG$-jVjlG5uiULTlj+dJ6xPan#O`#q_GpL7 zprzWF_%2LK+2}&L^Iqn$*vBCIbJc^HyBWi;aupm{YbW7gRCS2+vbO5DQqaj@rBykY zpD=H4`c1vv&3jjsZ4oK+>Sgyq`;c|lL#a4bpej8$LUBdi+2@<(--r(n6X0qx&W~PC zNXsCT!%v_kMN}_+5nPh@>6-!?vSCE#Qr0nGF_;ALNLpU}dvhDcyrc$U4JbeW00uch znpjEU4<=Iu5j@{^8MH*~uHnHS;V>YGHnXhlI{pDzcTKep8We9VIZdoD4@XT)_KQsX z0Y7BS7|ra$OaPo0@P2+g3Gf~qW7n8PH+rtsUnh*(PF&LU z*#*1ilF>Y@$gK6;n?Iu;F!ddy{;ZG1+OVUXfP}aw5srOAPHlHXDr3D!v62v*vbf5# zAfsbLQUl~}ZUk*)yuTDV`U@k~qC?+*sqZAIbd@r9U#CF}q&JX~_;PIsKpcP(dcyyd zS$W|@Y=I)cwu5B^2kQgDy58CAd2m~!3MOu>8Cv~&=>N6QWL|MUU}vs8G+Xbyc!#`a zJtRGFv02jjG!Q^)mZ{$Gl#j4?DJ=tu(LQy`I{#hqjy#SGnSm~P5ckT=?W_rYfBoYm z%}QJdHG8aZ_wP3tIrWp_H4aTb*%J8PrXNCm)=UT}AmzHTM@s;-u)Folosw~gD!C9P zZhEej^T24T#}bpLzzy?TKvRSJ?qbpRyYn_C%)gomiI^cuN_3j^#A-VK5RY=G!uI=? zb*en5OKZP-dK+{hp7!>T&Vm zRkdZ`A{4|J*u-NK1@L$lEEHJZGxR19b`2_Gs2tR^txngfsFzsg3`;ud)TIzpu&h~H z85}fV2vZ03$EZNz^D2Tul1VVbg=xx67IVvMr0UHRtxpW(k1o>3dVA8gJXRYzU(BWw zrz$-Z?}3djJ_uy5%dZM^6hdZ58KQErk~mN`ea$=Fj0MUT;AkvdcRJ@Ab~i!jU=~nw zaKTC2MSqWz7R=WjxY1cr=9E%;72X6gXjdt^e|w(${8d^`m!@;&k$y0P4}RGgJM`Kz z2R0%ZwssC#uv$Rjd@OcuYxpRo(rLO{+}VCWrc=3V^9>_GW?iYzRi}a=1_x>&kDCvc z7-R6ni(2gf9TXz zY%w-)957(xkV9qb*wVbCnnK}EW2+n;q+PjwEwio+$QP_vA$ocA+z|3*{b@rJ1Rxt? z5@@ef6%Edr-bAy<7x*8+C$Cx7nrNXtJ)!gQZlqnTlXE3Pb?|AWU97w_zwjBPMPE~g zHEfZYhtaC1hl92|ofst;BQTUA++i2H$U=M@_FtEmwczb@58dhcTQWJIL4ah*xXA{f z=)KR`Eb%9q)Ojoj<7pWOJPyg?hE0(GUZXlbX*#Kf09Dx}cyt=kYpIgsoKVt)lC6vb z4adg^FUV7CM`SRj3EF2K(K}CH!qKT05)4H`^E$w8AyitYDFX0O4kh`+^*S(DX)4qd zXu8#C-AxJ3l(ASTF9G(b{)K(mYa|u*Y(tLR-!){3FHLOU&-0vvlA~cz#Q=-1o&+Aq zg^*;yL zH%7hkdk8%b^`j0FmWpCS{ zKL{L)NcKZia}H#rM9S%~W#0&kn9Y+T|H`H0wSt#6YT+gdcdqP_X(IrdTCSwB>EOJ@ z^}vg`1CbC|JsRhb0@qP)7vJ_glPke1g?hJrsM^V=XPZJ|x9o5H>o?mjw z54RS^zdNVFsjb>l2@$awk32c5EfTl+z@Ml-u>Rq?-Gzcs4E{gmOAJ3XzY zF`dVP+gejUyDW5(1_M^&slCjdl+fS@$R$seXqtof7`G3OFg4{HJofp@ei0>20aQ&P zrb}a8%^AHzLbx#fUOX5%i)Omm!6WXMc34nF$aYHM0003@0iJhiLf=y@C;Ug@V6>mJ zH6nu_fR-Sp)IKk4v!j?p5&Y%=vyLz6ng4kP{utq1&!4-?YYVPJblQ`*P!W(v0v|et zBU+1Jpuh0vk_e+rYJMGg|5j)TPgr!195N5s0DIf=SUS;lzFeLZTq#=22K*1V_Kr6u zcCu$iRC&|Ev`)S{OMa*mr1!$;)9wnqA0$hknu`Jch*_j%>qJNEf_aB7C>!cT9TY`k zp_IO=a!5a+l4_z?I&i({( ztyy$6@=2C#9?DZaS_zKJ0`|u>+heN6CJLKJR%i+qYSLMqzl-#&2gZS3tEVI# zzX(;u000KSL7IC>;SVNL0&h&cG|!x3qQOxKe6q;m2h0znH=HvEG`{{tMhBBa?yb-M zAN7frT-$#S=5`Fbo)Idi{mU)?F9UpkW*JIF-f}R#Zz0DXqrtkIoK6AaVd&Xn)TI=p zoF2wNcRnOSxjeMn+be=_3C9^{MzS0sOixRd#7k&9($M|oZQ$_9Yg9@LKJ#GqfH`1K zYI&uDCFC-igR7m@u?Y|3RYSyy(cIfqa{}}KbVLFa6 z0PGWH+h2J%D)t_Cwk_h4=vbkYw1h<%=r>P&jN`40~Om@ys0 zh&PUMiqK7@t^)Pc*s;pV%QVUq4(|h74M3h_v6(<4pcUvjc+IdJ)>j ziI1n9ub_8sdOoHupnJSQ41j7B7~XBbf{S%PPauXN_U=9DUqlpI&Eia|^#_>>f%&nt z&pV?8V8>L(UWHhl(sDZ1B&+X5g+Y5GI)k z+G7^!&^$sAbT4I|(-f233b^to8f5DO&%9IcN45$mrD?R5gviK-vW_?zL8c|7VtkWU zD*`qrCQRN4KS|8ocWdxnt@E<~rP$s|U_2Bg!{|f0i$T!5 z^hw+)!FXyst1gyfaa31y5puDbU?DsckwiaX!V^D|m$f-l>W_AdQ3-b(Gb0PBfv!>} zQ?nkItS1(}^h%g&It5k9p-Bd>U!ZWcc(_X*jX3ew23;A}r)q*?)n^1N&rcka0Wf~; z6uh4H?`6w6orY+4Bu(S32Byfc!;Zl(#imeMKU!pWpCUW(7V)&5eQJ|ta${>=GBpXL zD>d7pzb&mWs~%zI4`u*VIU{3n&b5WDB zecf)cim3_B5R6KP2P@9bGI{Vp&D9R2e>Gl3R?&!-A zrfb<_t@Jl{b_b+LL4aBqW4*xCDAMr37u0FZvyEc{~{-aQf1gs0MP%M$2-Ef0+ z-&;AK5ZzBQ2R>_#)?SEq5QW1(bz)slf_@rOjFe57+KQz&zD}9 z#Uy0;00{cmMg$Vbev zimrXB6cxWYcF<(I27)i?|D>!D>=x}Twn;rr;jU;wP0VV%#=(?N`Zq#QO?AN1vnxB6 zXH|W;DNB&AMHyG#m_djrm?}mzx*%>BVkoIW6ov?Sg|9V^s%8Zqe_pebF{CSEyN&e2Xg@NdK>H6Ko7V2hqReQKpEO8iONB{rjuq3m8G zawpv4N$bmOCpIv7hmA|0Q<09$&~(M}uWPWjdVA0b96`4p)-C=%Q~p#vGI#~P&5{69 zM|d8S5Q`nxu|}cF*&{+Qw*p4#$U6+fGFGgtSQc!Mv3}MfJ5YIKcu1t^3$6-k6M~(X z05O6PF+lCX%M$F^m)lV2!r%tGEkw;09?M9zT)8R&;b?kiWWBY zAFFv%z6j(LE+jsI8cP;zkUO4b{M%b*Gs8OcEB5R(J=znvT73i3qv(Ah#}fms7)33j zDFvACGo%0j1v){RzDcM-Xqik2zXWzABXNFUMr8Q;Mgs>7yGgSdhVwQu=A~^2Ja?GM z76^l3fImXv;(edA$R)Ve!Pe^6xRYX2mI^`c}rl^#K- zVoE-}4|2~*Gc-{5#9G9CD@WM~S5B*4q3;mVHqHX^E91SXPol3h{4>FSC^4nJT-+=4 zL5QREZn!|1W`(~xt#|6#?eDuwfyOtj3Y9VodQG_Xbr@c>u1xtr)!=b+(N1VE(QF?1 z2}zsf2i@G}=<6?n%dTE1?;W8gCQL!S8@>LSH(r6hrYPx@YuPZk626D^E((_3`->-O zbeMtLY1S==^LX)pgCi)jW3rdV%5>m#T^twXHS9M)5$uH7F!Qspv}M0BUb9Xmm9CCy zBU0AIsW-qoU2l=~VZ{##lGpRpzv0&_B!`7l@r0fl5;^9r(U<#=KXm{MJM{X6HN7PX z8Pq+Q;;cxfbHi>TpBA$dDMQ9X4}cDp)*Vz$*x#3AT~l9QxX`hCVxr^rvKE4VEXYr8 z0vH6HO2)VI8q$84D-z1$*dH;ZKcXB>4mvMquCUf*CIVbQ2~|xHmtz|*$R=bqG~2OM z=Mi^WM5=-dELnp?06%1VP1-2`=T<3wq^KGo-H1Dfm198I{{j@xJ@w6rO>Pjjv3e}` znh_vOJ2?E&==ZUN-Tf8$nuz${nb}A(rT%m0PF;sVU;mu79xd1Cb+9DE416B{|3j;Y z$+Q>I2v+rcam$?yLJD7=wAG{vn6fega<^ZPm-|tq%Kji@u&Wx8#Lqgz_>S_RaD2;v z<_y{6F~F@E0~ReSA}qi3_S!u?HtFxpeg>` z^XrCs`@s3ll}E`F=Z-%BE44QGRNG+7GId93Lb52z zM=(f;n6jWYMzc4=1X9($SL|d04#+Xh6%(b!5*%ivh57&MAZVGJY7}Y>o|#SEDA8)*@ofC z{W=w=L%g}uWZm+{Nmy@}RkuTY$hxI;9h0(}xD*LRjw&T=o+IFf7Rba<#*yPfO7U$(Nuy1Q^7 zk=406!?Hf>2*=>7F^lY{5>;+tM@X>gf>~}%P??lrS7fPo3iTDSv!TS)iZLFHs0#~M z=|$x8*QN8(+wCKXa#yLel;K%At7aRPXD}#YkeXoSHY7#3zhXEEP8kt9@|LmLiO}?s z2Hgd$0YpWftLx?W!M;k5r!Mj_oTvtX{|ZaGL6z=()0_GPdL~+M7((n_%f)Rx0We+G zF2>|={RthMglr3ou0%TxNWLokJ>l67gjQ~H!LViv3I9X+^_Q6fWIR;04p=l= z&Z_ewND&YMTc3CnensY!ADTDdhqs!L9bmNW94b+V)#=o@wtOdUwv$ET;G=qh0003g zL7v@07XL7`2sdeNl<{0k*z}7W;}Og1x{j_?T7shlew#`?U^TCl~(9+i5DB^<1=v-??9T z|Al7nS^{14@Xl?kyR~&ptbR&@eDO9Qf0aU^RSEKO{XIz<{u*?!$ze9AaaVB{2%c*y z%L>hHlwfa6u#wpt&UPJ2Y2eGE#^j_tkK7;k+nE9X9iI$9BWcaS0d(*l@g-hzQAVe5 zdSD`9?iZ9{y>{vA<1ffZg9t&j`&MHz+7lGg9*Hs*O?fGR2OwWsK ze6}J~s&>MvE{zo3*YgC3Ko^3;v3>7oxh;f6xt`iX;CdI_ITBJOF`PUE--Y z>3O7p0003~L7M+bs6lL*ObNd74J(*}o@boge(;6y^St01#4Md}w=<5%m`%&L85+&WSVM}>BICM}y?apipcn3}uLU+ehplnPodnC$`wjh02sc$r-S_bPdkdcTlsF9No#$0qw^~I>x z1HH1@N292Fg;+}P&jxg!FCoG*o}V-XgkWb7J`Rfi-dWl)$oIUx+>(yoB9!Z(GlydFvOCZ5z4raY(=qdTGjPl{N)XdZrB?k2=^U9}Rqc;)3)A{guFft!OWVk$ zC2@fO@rBF6m}jDm>+6*3>+aUujQ=b4VJm>`v>}!2Z>xX+00x;sn*vGU3Q}b-CjS7? zn^p{0AIKt|7<+Pja)cNkk*sDjcl1)Y(9Xw{;~RIq8=}3Yq`fbjM!%II9*h&7TuPZ- zOn^EVC}g;HkCwqyTzbXSR@`&Y5~T@ObFQgQF8`{UKy~5-emuN3f7KR|uHsN2#&2io z)&b)e=npA%`uXknPo-Z9BW37j2134Ds0u=3<`U7(U=Pq{dV6Z%Lc^NRAx*=5Qs@DP zxkrX7e$0^E6^?DJ^Q&k2TD@q_vs*sN-70%i&xzf6rhm)=7QwvgPE>Bw^DFf!2Inz_ z925ZTi%1SAfp8D^>-6T>ISPc0)`Z&3EF9I!Oh)ssm{jegBhJgZ2xl-i z)oDn#td45hyty5JMRZRsV2Dt1{4jG}5zoR@_Dgq z*zQQ|SWN%$XQ9*_ikZiYy3$JOJ*V6(c&sXj#jKAk;>1`AsR~Nx+!DFh>%I{61f9LO zTUmUS$Pk7n@J+Nyrfx=c`D`%ACdR9koJ%qJ`IR(|jY6rmyUvV~2Ec$Zxx2rM*j+yf zsjlDpBrNGmZq`G~7~@R?)>I{HIZ*;$tpF9bv()ldRB+jw zo^I(7)#!=iX7NY;sQz6-dZJ8jbiX|4ipKVi>2zOT#cm1^azrnz5Y@SuS_?8RUfV}yTAsE!6Og{@$cyr46JSj{w?BKA>cUmhvh zYH%8I=&K8k8>G_VTH&{#T;HgFkiSudZ~IEq4;y<`gm zkCo$6ULGN)2bQ4rhXLPwOnPR(M7E$4!oHV*Wv$8#D2_T03cOF(T7b~Zw-$RmPl0rA z>VWz1IEoOzgrLkplWTzyfQ(k%%!R(N-gL2PPF{lZ{mUqvSGtvEYph~9Ws5Os=N#(z zrOP7@_1X>rg=wYuQ1#I$x0hCje&UHAJ>fO zf2i!}O9*^OsnV;cLDH&I%X~`_F5vwV6dnQX8Rgv1B{DzvOZFr3Ekh9};}*9oH%3VX zgPT7qoQ1c_hJFPlCa8uUZ{KlSlh?$!HReg(vOnF&eIP&UX8tL zU;bR*ClxI`5NczST%TXCS)Ydi8b1N$_WqrZH=8J9KK;0c(SU~N!yRIA)q%kCr?CL7 zr4&IK+E64CE}!tH0000gL7yQ)7XKJ7VY~4@EP^8Qc*_!gmz4*Z=j@@Sp|t?3lWbI%Rhf> zzeJ>QT18O`?-g zAUC(>pkvYXR8K`+2yyTvw3BPt5m^4H9;eBa1i2sTc zRhT#$Ly!HVL&$tq9%6H~OiW(z?qjZ*%gSGU!Mhb-_e-|MPgKS)enM#%S&DFP+F2>F3Xh!kC_O)F1kQg?uxZ=Rg+^Ca>Y6t^1lwaVdp0#D^D?a2xMvN8YQ zC-YshB3WKQ4kdPbD%|f}D`4*Qo6V=GY=w6pNjw+q0YJZ-5$ft8bHT4n=c9pW`A=1E zOmvfnOPgAgfweV+IX8L?)t8T%S%r@|SMV?xaAMbZKE{Epg6OGfG%76CN{iGe1nLblaY>|8*D z)=Zl${rXktEXgMm&)FX^^8_4iyoU;AQKi*o3nbvag7eoRiTW@p&j2%ArH>Iomyi2x z;$Nh`niSti!{J3dHFU_q&o#A+Dqj)^HS7{fB?EraWB{q~EsQA)0dSiBUeGh#IlVyQ ztDH!r5JMxkx%p8B_ETI$vm+=yX9%-I8Oz%VMoZg$qbzSqfFv)_>v=e6Q9Z%S*J+F3 z=PtVmqA7eDq4gsgm}2%%8YYgFX8Epo=%mYu`3Vwq*T%#{~n|FE0;_6v@R&K9l<-1#$DyN)LS%lUxEr1 z5Oj(X`22cIr9G!Y!wjR`G$wF0+lEiW+*aGWb!P8pV9I!?L!|1N zq*kp1vp#VJSPX!Gb3MKNE2+KWe%~v8IK&~XuNKu54YLZ3vG{`}X-yr;B)*R1I;;OY zzC(=tC9pL+Wf;JBp)^kaP4WOWkcVD>`jhWmr^Vq61K0DS?k&(S6C4Q9u(8x^4ks77 zQiHR!Z^}A@6yI3=1T7g7*tPI@SS*4ncjc@2Cv}?@7b)=seHaGEsM0fQ?#yTc4EQD* zdQ=O8i0dF3S+Y8~2uC`HH1CDupaj)0%#H=3qH^&f{3UGNwttFV9mmut@7QqLRd)2y z;W(*}wBu3YX&*{2R4zxMxJ?x0cIu9#7_@CK^c)!-jWj-q+Fgp!^*;IG z4-MFKGH0^AB!7c?615P_TAF@8D*e_trGLiW9fgPcylna+j&8$K1~4}`J(E(LhRRVG zXCt!+#+|+qfP;eARy}9%DWfI{;A<6KgL1x;Zh~*Hm15~&0000!L7!wo7XLLT!sr_v zJ7+Fd$uuVCpU@f!np>J_Sz>*mAdx?DOJaLM3#rT_9+og2pRjpK+XUv6KN4@5H_-q9 z0LuZNgJ}@ASnph(2Vj)|NBGd})AJjbA67ofp;Z4ks!c`KZPf@>G7CvBGq>1moRy+XL<{E zJVBd>NvJ_=nM@SXGk*YNZ9o>~7;b=!vsQqGXAK% z*`n?s_Ami|%H|J?a;t;FV+F=O@016s`@=khucMbXEk(5joh71+YnjJ@lg_PxJ@Z5{30}#|X?Q zs1qT=V2iVQZ4+Qa2V-DCQ5tkM%NWmM;Exdc-}43ZuB4GK}Lw6@%xc zeS+d#yac_l#fO#(L8w}RJdoVO>HXQcYcPKx8e3=j>tdmHdKn&YkL?>TzZ_C3Q<0pz zKwH;W1TMvc(J66k_>Ax&FQx@4w9BrlE!*yqgO|y6%K{aJB73icXXD=W4rF zKhjAnC%8zoIK?uqP$M&eV)%!7g~mF*r9kjG8}l$8uE!Oc8XnnsWPbkWF6dRQ-~F1Lgw~zP2A$U89!u;Y$aS&WVq7T1ULyjRPuSYtEkn%@DpG?dsLQM1`SJF> zJtkFJG%SorCeZfi;Hpdr?ulHY;nyeeRzG#le#UnivvM~%yB2G!lC-b43uX*g<*9D#{i25PoadH}NQdt;z+Ar%WHyI{ ztB&nh%dsl>bz&#aB|h$33_3!=2?Q%NDB%8}cr8;a+zS zgb6kw*H|TpY|lOa%9>40uq-+5zd|Es0(Ft@{$%8wp>??TvB)NPHJ1oM<@l=ZGB?eX zOIq&E6IppVvt|d`jZxc%FZKz^2#epyCS5TVU5kPed8R_8Hdg@p#)Au3t#-&CU8YDl z>P_fYlv8`AXHNII%zC{@+|*aV7<2VQU(Z$tQh8-o4qqfmB=aQhK>z>&Qvsi*YC_)- zLX;pm;!9xNtY!4=oE`XGFUis^KC(-m_!NB{{ zby--zARlzoW`e2=nx>sE`xz@#tk>T4Y-=y13a205_H6lOx;NCNDa}vP0Tg*CSnibQ zKED^+%bY!TF7oF78-dx9S;9$um!Y(5uP8GHKu&fW#SA)BDxw6t31Ls;pa1{_OF^5pET6j@eYA1(=-nltmKxgYP)`t&*; zowRC@RPz7{sSBc-rEtd_ab=j~Ja*>E548ahUn0ME4$z8)1)Vg7YG@U^6=29tMz8w} z&Z3bA;&sh#oI73$j7gaYh5V?M3x%lm4X8ZL0}!Y#R%I#Nt~oKbH;=xqe-_8SMaZpoJEzDt?3-|9O`@#*_0P zBT@tSFkux2s~tpSIa++!jD7wr3~y*s&+bW{qs@Vx&$M(deQG7i2BgYw_nOr zoG|0cyBPvW=u(+vgf#A`uFbcDlz5nAYSV5jUTA&VNM*6|lboixlP#lC$RS7t_(O^p z@~JY7C>@X;kk@!Wub_bFdZ;svWioNYWim+P%Aao8P@=i|^WkcwfUXJv3>A%4Kk-j+ z5l8s@ZL=hS9)cy(PH`KrIloBI#GGSkRN-f1e&et+`E)=w zAY2YsEqbl~WtKHptS?<^S>vIKuN;0j^!9_q0PfOj>@*VBgc%xlM8})r|>1 zzs(BKRpv+|%wDc9mOB;0G<=QZ_W>i2Wv*>$|?2Ai$2w?O?&p@Rfz;Eg*w}uDjc(2c|vZZa(IeMplj58Kr!qLeN606At7Ii8;8jf@c z0000jL7(bE7XL`50}V3j$Vgb*eNI=KshUy0Hl;FThDzfodT)3N_r85}Dz&phVE_OD zYyqALX%M$q=Ej|YnC?&VSJ$ABzBzz0&tX12x5XjU!*>fX*bq5hp%fF54t@Y^zrW#a zhH3&WJ`vVZ;d-DCk9UZt678tRG@zK-a$cUr6lcRhp;I~fIZ=fQdtaLrtUI9$#<)nN zqk)euhGxE%OgLI?7$a(GS{88{Nj&xjcRzE_(Gd)=rsV#;>78sa3W|cMmR68rdEj73 zhj=E_2Q=;r#$nq*$Xxkq(JWH<9<=c`R;am3BCBd91uAZ{MT|?I^$~q~%yaY%{{LbWS_lrQR@-@jX@xAKz(4MuWJZ3tFmno z2imr+(>5A3$=m%QnGO0nfdge7!_oE1H~w(^&mI{n=Ej5c-HJ(@MVR4oV#VpD)HgIG z+nL()HJaW{J5+~q(599EfIeJ`@Cf3l&>S6tJ(MwWMi3-Vaz|SnU%t`yzAs zzlVcXOm4Ml$~K%!vXY?DdV;BL5*p8aIB?K6G`$#JM2UUViwbNtdkl8_D63x-2lyVs z4hyAB+G~q6ikYt^+)>fd!wOuV>EwVqE%P}fDtU!?Jrx(FPdDzuX3DrvRC;|XX5a2b zrlAKqb0~peSJ4?}N|d7ZlczIvU4nm_SzoC535<|uIZa8>_oJd2ic1Q9Pb=6R!h*9f z;3c*;%iafw`Zn#!bg%x#UWj?N5<4A4m;e3crV!`JM0{C?GH8k%WIM!=t24zyHg@o< zEUg$cJ8$b_m+rWc$vvkJgbs$>P#;xk?9fu%d{n15+53 ztyuEtsVpnCAcMdU>efc2TUBh;UcQKsVhrofeI}bjp7)e)si5XzwM`u&vkTN?V&i{bu8tB>GyYR(QR%M>L#T0YG2t4 zt7k0h;*ti>NxeGt_yo!;ivl>T{T6V4zfTPvJj@WDa?^rhu{qJ4OW(7Xv}7U)28L2Y zbzWl-GLs8XYqy;_oX5jZtZBxg6sQ5rvRBXT<;bWQTS_}V2!^My+C>}8kC%+unO^mG zw+DxF@d-A~gU8IFke$o$!GuXgV$wzo4Dq57n9+21bq8yFBj6bmz4{+aVYyLZ*f}^C z8r1dOtn;Xra1Ia^i_RZiCAYC;TWrlS5~Ev9Po>mwLwJO*?2f6}5jnbM$4Io+r`-{P}nYFlju>}o(0e@sL zW7+*lA_hTcOIIZ15vPz#IK`cMzqNO4v)PWQ5K$pZ1XVNs&0)1pR{p_Es&l=j=0|NF z0Am9i3V~<^CTtX}x!VtV_@zSz4$88bZ8MHGTo++-H^``0*@9oL~|YA+LB zKi{TL?Rk`=#%@zQCbhjw`q4Sjfz!5nl$uwmHtKE&W1l&+VW1cC4q|~|5at1iUb`h% z5@JGXj@awn`ZtphRWQpjy8{@^IAj#f0cWPV{vHlWx(X&nat1n!AX?{&!XAvDzA*|S z0a+ii>ZohL+s|(9CNqM6ORjz^{>NdNdLQRH!-_((3NaV9m*Ef@uN2PVLp#XQYxy4e zmgTlzn`Hr5;-It0srH3tD>=eS0(rU?!%zzWAzNkGDw);OO(b!si^dDWn@~hbCcEId zwEv#tS#YB+L$n+H&^BD)hN(!Oe#5vV!oCOH!InA)IXC0%d=fAHJQdJ~oRH$JzJ2SC zkWckKm&6l62WQUyXa532K1z|Om%X9Xx@feDW1VOPB-G%}5lKN~aoM6J@OAT8`u-HS z9rT8!pqWYSL!}Ls+LgY9@Ba4}2>K}6KQgE>za9ze_wB{dtBL^9w5F!gDV-?V{Fjf{ z8)UkG@Ku-56&s`yxX`0P<31$u$OsMjRU|vxncgk+nOV;n=8ngMaJ7pC(&G_h81H%> zJeW!I;I)Kr?@Pw}2zT$8|I2=M6Uc&N%fz?sIJ8L?WpEelFnA%nPjLXovbg@0f!mwU zEimLT=vl+6-KhIL*z;laj}Kqg4;M8KCt!|J?VTd#!exiV6jV?V2>m*U72F0#wCgFE zD@yo;wAv^P1phB*5o((s&alY5gZ37gD(OYlJrnz=!i6lv@oR$8Ly&)uL=kBLzo;Frx^`1&Nogy`*`jr9*RdnoS+>uGdwqvkk8f-8e@H zQa+L^De`nOy9HQ5e-F?oel^u&?!$qV9Yf|GalJ$X&2Cx{%gYl81`~MevjpjSd@3?R zJYGE-YrY54YB#HJb7Ss;zdtt76ug6;-!aQc#Z7CZMecK&$QEL|;IMhv>!f6D zL7i0hI#<|26yXCExj5HU%ioD5y{_Yu&&1;4x5SW z_^nah8FkE&JbhH8sX2bNgn`K}?`eM^ge%B!b|NRg0wih(2d5cFppPOJsI#9{gRa3t zy+a<1wofMkM?v&Etp96hsCqL`d}m5=%U?4vCB5gR5TUpKD=_#BjsrpA_Ch8gP{n1$ z8vnx4k#6M?jo)FFxKk{ES=ibk`jn*as8EK-%D*TPv~Oxle2@wVsX7oe|GZt2oAxS& zJkso=prU%HuO#g4GyiJG-^t!W`5Yo8SN9< zxjBJU`6nx|RKoiysZL3JFA-F*-2vB zqHF~ng2jQ;t}5&y0PYQzi{z1c-FDtHB<LZiZipL|r|z$auB8ucR#FaFeQA3jN6+ z+ZrwjF14NinF)}U;oGwXVzC{bCe5ajaPvTCK_Cy$kX^Yvgo^0*E!4wayh~|Y{FzXE z+qCsrfV(JxXs3_3idy?z0T5!IsP$etV1Nhi4B+x%<<#$3F~_$7QYY5o9tz9n8I_ht zZ7R=+#-KF;S0)|0lgO*dbko8M>oQ^RF{bjG8e8@0APq<#HL~= z1SY$UzuTw+Q{5SPx-F#X4UZPNhsVpkRPGBBfto=_7-pWI$_EO(s+I|}83tyS z?p`hq;AzM6|F`5s$Y;=vjt^8Hg(!7P_&>Y1{=!&?*SLiY3go?Xy9H|hGIQ#kMFZgs zNZCF#{@ID&wHuiuL#jKI5Asq-5Wv{G0$}Nc`yQV;u3s9zrJ(VALOc{q?Li?hZdBw` zNY5i6*&~$q^n~-g0%ie!ybuh3c;wwPuduR*9qHF0CXq~}G= zl9~&vPDBd%W zPX&+y!Q;$_fX{F4x|@K6u6tG~n5a}(rySw9MD5ndKx@!7tD?A(l^zL9Xq{T1@TY5e8Mk zO5y%SmhgN545WdNCiH&H($%cvef#a=@8XTx`hn~LT0$d9vg}J!;D(n9DPbrF8(J$! zYa`U}Zf$+^FGM?IHk!ysjOa*r$~-$;%6pm-N$NJ)J$o@@vV0)^9^#zvv~iS|vFr<6 z&e^IBwC2<{+J#)xGPuM`n7i}x(}{(7-igtmv{)v?hGKCZ9zMV$2UfyR@!+Pnbdq6E zvLY?lH-v<6=&IZPO+$gH9UFSoM45!09JJ0NBor&m`)tUwCHl+#=yXZquVkql(x5T1~t*JkbDSXdKZi&-Y_HlIZzV81SEQ5@YHd zAq*)5iC}%}p;HW?NP<)!)fv&ekClD*>+>kqh3EGucd^(x3o;)~W3RLkT9E$Xkz6Qa zr_@~jSg^XIKNk%to_(EqkJv?l>Qrn3N9;{(97pn~M{J7&jB>IEY3K=gdHJ3U+oP6C zF0$(wn9F7#@-qWsZ|BA-EGBsls&#j3p`EoZ}9*C053tFa6%UUMu32`4VCXguzJdA-HO31XQFS+mWeD2g;lY7g0{vQ5g$HjYbyK;bLKm)=<%MPCO&m zuHu0ppYz;dkPi;ZGa5pY5f%tuKgFdvYc5>U}*8fIS-R49L>xiR>$ z&p~fIDJoms0uVn#)Fb3@tivg1m-+cdsrr!Yw-Wg9(KjZ?3@S$usrieDWqlEGx`bnBr|l1n*vss-qYo9 zxDd}x^_}Erf=#XV5B-3Hk(#*902p~L9hBXaU|_SM)5b53S5k+hqhRN?QBOX%$q2?w zE94ly%wKfaA(TB zuov4)xniDFyDg2q?xccy(>&?=PmVb`b!te1#`*Pv)r=Q zly&zARe+DKyri7W98e>+&2#mY|3qMphy=);y*o3A+P&0bWQW!G;U$muK^g75%a&Dd zN8+7+?kM}B3#CZ5*|!E>zZ9VkOlCzoAR#*#DVbXU zNk1xUbHt{P55l)W|1lRoLABt#2DmFh7W91Q0H4QD@`OUN`Vi4GN`U$pEoSGAcpO&4 z%Z4voy&m(=UTigqBz=Ol(N_5eAW=;_ya63%8NB1 zcQ2shFrsub%T-0m9`7^B@TxA^^C{lIeg!Pf;oiY;www7H*4ZA4@XgdQV+4^WPps*-|J~mfZ=;C*z6iN8*YKg@rg|Fu&eWr&NyA+@ z4Pe*Y|ES0f54h2f7U=L{%E0^E9l-z=I?)IxGG8TQ0d*nAk|-%Vjgw&2=AAaxlA`5e9QO^`@jIEwzF7jrOAQ?@WENDOTzHZKd(CDvI(g*_kk_&6KPU+4_vE73N^ezCV;nS);>+m_y-JkCl z_)X66!n(;tv8?2^PEH&FE)02s63Hhrm7XvqpTwEv0C2!5kpqqFeC$DesvSf&>*O1| z1ZT0xN?`D64#ZU`O)ToJaR&NccQNGb9m1VH%6S|f_!#M&Awd?&` zg0;BZd}y_^wJ=6^bJ%Hr{vp6voRK>FNI4?JbM&20SKp9eam`@g7OuV*yvvb9Hvv+S zyrOa{MI5SURN8{vPc>v%y6Tj&q?-8Rc=PK_C&d96^F!;H)mxs{JWmfNajBi-0mAOO z(AB#QvX)H&8qBXR_MAd+cRmrJTu$lGifHdLK@icU?q(q%`+C=HmvFw!NxW%%1Di}f zsVY(fI8%gN`?F)JbrHvzCq{XBsuC2bb&7p}cU*+zAoYyQ^lSCbFe^sNu<(690Wg+> z18gMjY^;@C<9$$a%OL;&2U$Uyvq|9!Qe`kC{~z=OJ-f1*gvi(b6g_Z+wFIw^zOIl+ zlDjhcZ3>n81t!*)>J#&EFP)#X0NH>mWY92@xm=qb@LDO}ul)fISQs7PE1OYNf5g5o z-Rsv;tUZ@WYDdN~f{qInORb((l8-Z>)-dIz{hyz{UkWysV8?DqR3!8=Xx>Pj5OU!& z83F)ShzmRRvKj5kBm3M<`FtLaB_UxHtuH^E^@9wt8yP<{e}b{q(%qC2fYx!HqgX+x zaZML=UH*Ab$0NZw(0xKa;`Td(&-mTSb1YYa5vLES9>|hI5c}I)z8EeIB{mSYo{U7YB4%M4yt{RM@*`n{Ku?~ER1K! z_^jLgB8fOT>pPFHj;hM!)4`a-dqy(INP0bzPCK{n=`@ML^d`7mWii$7>2EyL3@AAL zOT44JrQ1{6+#FS57RSbAq;RQ1OF)qCCc%CI*pe)j)MRgfSsJQO4az#Lm(V?-Eleb{ zEkjjLX6GO0S&c&?qYVM zw`@fAWGkrbrl2kw+dbY&B$=`P+rnx0WU7R=As(?{4)AVu2U%OLf1&~x%I}n%YB-0C z!54aT$mXK=%Jr_hfDoZ~H-YC49>*i8#KKcCBbb?j@{z$6V^5J~!()@Bs6 zsgQU+lb19#*y~FNwpD5ts%x0XvNK-zxm~202=m;yBoWev zt8JFQ1u@@)XTas#-d9Fy9@<=#h52i8b>64W!QnQDC)LeP_KpBEV=@Xf$*BMf2M-{c zyKG%q=A$6~#?W>z!39wsuPNMl>C1j@gUoy6BJQbBQ|csiW03Uv?RROS$@fYs}l8N}>Pn?D|z z2KA*n@56xJejsq8#bCXX?fd!RJeFsQUF#NL+mZa5Q#1ECIN`*ax~Wk;IQc!Kg3xgs zJ3@#ySz)x8sS%h&{0H5YZ-98+?yn{MTs;62wdA|Yo99`?m*wlZruRzhg^3H5c$rHw zZf_6ow9JJ!OE=AsW}HI|mvwV5uqF1bhL2Kx?bN;Jo2wX{Vl;Npx>90z>*fC5eTVG^ zpC1^PEa3EXetS+hlFN*iH)hRkfrAQ@M-XerW07UE$xrxhbBta%-lccFj2=h z5*7awK3n>(qg^st4{qLpZ+uaQARBok%e=;-Ve%_LOz^j4a9S$PzX5=_s&dBSt+BW% z)k^^b)YUM$vzj@mEngqVkG3G7@|9$Q3lG6TlA&i=?!~C*2Hb%tuQx9wAQfia3q80Y zB*qA`Qi4@ItJI0r4%+wG(=0mbq9z>ak&Kl6+eoqdyoE|zf~0fLzt(*c;$wy?Wn;NY z%CNR znkwRMJRae(7te&~5VV4k#qo)SK^40WYkvF0Ei1L8=nCANS2imfP4AEzpHo0wO*GAU zX{5dra^Rbi`%9>)Qp{)%hwVuKIoCV2&*D}lhx20jGy93(03-M&2lPbymDzDN)P%<&^U+gHdVrN}aLO#deQ9FKlnnu82tD z|30A*T|0uw6*Jd>$~@GHZ1@f zFwpN1M=JanNYfP{KyF7em^g32)ka_W~iQa@uou{>d^XJR)9fF(B z?#IZ9);)b3Z~;{_lPrr-K7_^)gbcm1>l%{?<4f#X@UDSv5z{_Oa(JumtaGs!Elr!! ziN(WKjaL|4m%BQbuI3A2!oF&$wbo}TDZ~}OhjrX#L(Ht-1li(6yCe41OT7uds_pMr z270JLj>zp=J{9> zDjjIibYge|+K{nhslk__n1&|CiwD?b?HJXCVBJwUvZ~um`Z@sm$)>ZnZ8Wu&PE@E+&xLlnk=jTxO|$@Z7+aj}E|V*l++K`px$V3xKJ;2H!%s53EU%4>E5GluED=S!yFfZOxo0 z4Ilh<_#S*WlYz#Mj+UPcA|CHOVFA3UxMG&6oh!H>@<_9{Tp-8*t}#tsC?uPy27sB2 zCUt_w4jouO?Z}lQMB=@JBuZWL|KQvqG)^eIftn1r(QYT9D00-(0C+xfMU2x0c;)P?T;GkmmD61k0Z<=&+uB~t3Zrx| z%)9jeyWpSK4!iwNm_6|Rzvu^ObgQ+>_-Sj{)WY~)v}A;x9N`%>s1==nbffr&OI*fy zy`sYi%fsc4I~Zp}DUxkM`y*b*3TXHCdc;NG^4vElF5m{;00|mMxJvnf;%nIgOPW>~0*^5W+sXaP0T{FfzPPVe8-9I6IX z0hU*eUbU3?J+yw&HhR<*hI(eD)xui$e4uF5%`GlK=yZD8g{s-9(=cM49+AQ)Q3HU$ zN8A!aku0{Ia)#WjY()ZM4zT3r?(cW*E2G}l=OfDf{+KAn>aZBOb`&!w`(?-jqY>DC znc>WQdcwq-o^xa^Z*LKtj!{+w))~?xrruOlMiXNxX}M3`(?u2f#eJHbT}UFlwb zlMcwM42)vtMm4+ifY0eW99*{z!otEXr)zxo%skoSz0U>XDJJ;Pk zygZ-nL>u%bX0;%-)NO-zG2xk5;{bBLX|j#3zMklGWLq>kYJ%nYVd*H#T`RQg!-63(*tO0hAfCwvi2)8J-Oj3 z)8~1KxT*2M^yuDNHVX2f?`A=Vyh5mno8r4Dy?}V0zihI=&p-8#2!8iSuQrQ1kF7TF ztZ;jCF*-BCulNHP@Cw|v&*3gl>6{62h7~@&4nMs}tB=BX94NZMaCBU;a*l5IiM^>0 z{V`8(pl5eqM}!lFC(ix(4C6v=ZTG-yY%hj1#w&IkI2`?ZJ{bljnPJ-Hdv zXc+(i0)Ii9IZ3ENXqikDMDVu;W7~i@Bg(%4F1J-aGbO=g$miy>4ugv495x)Ug@aXE z5^COHtX`IZd}Gc`t#PorVr1mY(CoMzCs6E6M@Le2TWkr1fS_Ql1T;I5cbF#RY@{mJr|S@aKl5=Mb+E z$$cMe)~rA`4-t55?Y94YEkXXX_hrgp14@9JY>VPS+9re_~erqSOs3=|~z6XiyKcNixwDJ6WDIwl} za!!i(&*r5~ZLApxOrO8VE?@&FX zbe-sYEiMFW4rs$O=PQ`r@DdYDCx}QR$B?)O7~*S2{f?@lg3^^UFTDWwS>z&(EjYq0 ziodj+&7^Y*9K~NonDZ0&VeTL&$&X&3tab$LT^topL&@Av#Cf^!9~N?jM6Fb9!;FSH z-qlTSUHB56x~nGoSy~~LnV{QMfdBvjAOW9PYC_*D-pfT>NC&g-=VGPu!b(oMnv-&= zolWnzgG}EO<~%PNYyjXM7{?a1$Ean{Zw{^)(tb=oEA2+FxsUk`)p>$s3BX$waTmwu z7-eD#+q95u?aAnoLG)>I>ydt+UNNCIgzpD)Ve01(lMomupFL(B>AvF1=J6C18>RDB zYTS$ih+FFrsusc%V5x)3TLUJ3V(Azh$Tpn%NQ;WJ0Ci~ZEByoD`(z0LXqs1GsUuCZ z*fgP?{g*{2HXw_bK_5$8!h{rUoV_1o!12h9+QvvaiYc2z){787dGAC0M=T&GZdbw1 zsWJ7q%ad4jZIUW0RV|B{6elKi55X9N*RqmjuTlC-J>vDK<@cgv@?P6gsN_k?ikl>% zd;kC@TxE!Y0RPd2K1j-bAIt#+mR0(b0^7Fq;5$JR5ZnHeQ`*I=|8muS#!m|YKJWnp z_E_pQ(`1>ZkiRD@N2}9^kt`uN=0il{i0Ia0J*@G(cLHr^(}!kSMRD=2HMDIl=bnO4 zlcTEsSiQYl!FY!3BBYikQa+d$_UZ6yU)37+QTd^NK(v4t&jdP=8vSD_q+OZio~Ez^ zi6Kb#f6u$JMB6(U9V(T^8%(vf%g*WV2bW-->A#eEnL>u7bKHzZ0~5?CBc=Jv0fL(Z z{OXSn&3NHv4nzi92}|R{HVfp91yW$9%ouL?>p;34X*Y>2SCA4A{sV#;v(LC$lW6gg zB({datxeR<@gY%2vvRItBeu4U*&V9cOfy$DkcMX@jxc+8xH#BaTFTK%<0vnd{FxEj zkeRo$1U`H)sod&K)DgqpV!1*+DfMQFOr(POXY(}kfedKI!T>C#Ttsm>1y;vfv}aQ}l6kA*Yimdadg zb1sX)5C+}V zTM;{EI+;+!P2i=ciTYm^jT-lkfWS2-gYzgtjMuNpwl&b%GDS3JMldS+@a*WlG@m{G z$h92LK4lhTzK-y3smgRijd?4vD0MBcG)75xql;rWI1IM)ASJr? zPSNRfzbf|*t_(jtpaITC0O4Yw3a{H;g5@ZDmFv|HB#7A0mau9lC~&|c;M^+^P`f%i zNzR93HSe=8?%Tsa_DNC=h3#0YETqoEy-@bEw!p&Fi-9{2kQE9sbo9tT_tsCTvLRi1 zu2P`3gAm9D_CL$Gwz#lKQ4mlau3HcOFw35z>TmDW9s*eLWa80vOYvY*aTIO^hZMXt zKS^l7QouV?5hgowUK0l<>Q-E-3x4k(XZ8&Ya_51XZT0nIr!+0lJ2Mf|`I{+fufEU*UACIyZCK2YIi%(QIu3RnvKR>aHT)1=aiffw%*7Q0 z?H>4Cg{L1x{x&=~nL@xG3KRxe$DDH`q58}1^zu2ZcY9&U_uFzYgah#y*fP{t0 zUDg|M1r(YLryupB+qup>I~$p`?XdpS;G+*&S>(lp)A;iKY#?63v7c4}=Y(M<7viR# z8n6T``YK&GW^mUuglZ($QNA_lSbtD&SSX+?bR1uC`^Ea?%6tAD$fak5W9`oHH{L|h zPkjtO9G%1ud`=-LoJzg1TvqWH-Y?q7QA3I=phx_pkn*e#D$-aK)(-kT&V1{V<1c*s z&lBS#zD`q!vfH`29+?OT6=*Nxiu*9#Ec*!=kP*t9A(W_-*E2wNf+g&p7auqv^nUio zJ{4CJc1t|H0Fw}3FWSTLDFc?%?&4S}5Iu%`$@ffi1M`k~ifyvB5UPImqf4?2>EUHn&8Y(s?Dr3dp>Y^9MYi3{^`XY1q#BE5GTjFkBzM zVBUXPe8*%eaFmzbfNWy(f2Bf)N+J6#-L7a@r|JvwCMRA@*n{} z2Wh5vuqvby`$Tz|X7{+NNr=AaaMF8B)UIh?Ji|wcIl=c2!beJ<_6AU${uT8~fvjnI$de>^#fAOAZWVzhcodK}q&mHG@ zlN;Mw62$DBaVm*|Aay=BGF>tdJ+1E>5N9gRw&(KG`CCet1tD1q;dX!2OsVW4zjor` z_fYLg=}j3DIVb_o=QN+!q}UUm_4cb4<88g3eM#m!^t}%4nxqDqNj)SFAC2w9tmSU* zwvcrOTw7`cnJLi{bMZgC{}7fR-Q<>AJ{BFnXG;cxzEd7q3S-9f$=$l3vz8biyg8U6 zZLoS=bpD(*E?YE#+x`w9RZ+7KD6^yHwbo;K9oC{mq|2i4is8*gZEAvguA>BOnD8Yq zCE=0e>Ih&6UljUwH=zM3fLJ57-7(m6jt)I3%=?-jh_WpblM>ZON+*6 z9Luh`o)5D%Nm``C*l@5>S2TBc=bvOMkAa?!N$Sk72spsV*y3KDO#2nvz|~gAc5Yv#zI(<$OH;BHj)FOcglog*Ds|7TJ3-Zby{}K%_F^8Gc|q-k5?0&ixbn(!@0K zow>`1MF(`PE9lr!NA0pVR^j>_U{+AbGqR?M@4vK4xED#6jJO8Z<9)YK&a$wsw}wia zMurJ?hH=#xjtjFHMy-pvzh)dW9#1cB-~qtStCT$sd?9w#mPNHnw|X78lVX}eyNYhh zTUKwB>W9^SWy>PyfNj!A4XXoO&kp4Rw?;vTi(_tUvlr1rcDf>15ZDAEOY5fTDm2c@DzD~B?JPI*o$ff+J?OhAlc!EF39wL z2w?z!_%-YZ$iHugzmtTLTQnaTirJgwOa`+Eym~tM^GZNM+?^{}Gv)@U;fk%n4viCX znfiuCr9&0jW)+bUbEYJ8`JwKG5tpp{#lX?z#W1Yg4Z3vpr>;n?N@Q$2u<@m|Y~wmip@~BZLpe7bt5)Z> zcf25>${2H4>P_PI|K&c29n17gx}aK=09aL*M@-Jk1U2e`ZC&zlI!QbA0J?tA(B;7` zIGbOUlfEOb&xo@@W*UY98tq0OE^eVKGKG^rv^+ggBv=yQq;kODp|7F08A+IpH|Zr!8!T|AWlcJp@ES!Z$8&+pse`6AB91M+fCh;gnGsTct>=(oH z8Ye5EP2;k1=fHf&YhDQ3aoo>L(uu2&cNDtT?rcyIOStLvBcdHZ=1np4X7;Q5ZJ}RG z6ec+0&^zDIe&_HV)3ZH7S{_u3>v7Zj? zOW<9-8Xa2oYjfhC-Re`>W}P)bygQnUgtd^-u>NEq9hQNTvr$8R_=f-E1O`3=XIGSp z*E@xuPPa`1T5IH)iZ%_!^a3U8encSvf-Avy)cOU1#dgFeLWVxyN9kk#Wy?vi+d-t< zUFKoiY`G9l%;y17kFgf~?|-48-HO zNH_2cWm5dk;=W8(O)*uJeU3czYHWm7KDZ@Z?`oy&=ig-1X+Ivf?umF4Ym2c2iTHV4 zx0Bzw^wK3Q4{6d;yx8kc^BFfM36Yqd%c`t){+bzQoo>|zce9`>5A+p0%OLRK%z&@a zWQk#IY(eqVp}_c4HD8f2*^1&_M#x)@HAE@74?q+a;J|bxVw&itiuDCZ(QDUoX531B zSs|w1=|?Z_4h^<^KLWk#Se(jo2C$-l4BLMlG8QIIz6yw(`Qf-UF;(rBqq7-Yw$AvY z>uOa^JYT_-frNQc#=oy)Tbpeewww?ReR-nl4CC7;b7W-FY&9Qrk4SDoUhp&UkZN9L zKpkpf${3QCCs|EqQwV3jPQ~wZo?M~&$a`lw;jxIg&6i`ahvm;3Q%rYM+jXGAb3Z%6 zm=IdVWW08MZ_ltNB&BG0+)@&g?p? zi9+tL9aoL!X(5~wt!8IRepejg?8eFkr>I5aOqdxy+5_K?XI7-fX_K{j8g)V7R z`#9oY%me^3)4>&L+Nn$pIhcf0X>Rn0d3z6TPT{PZNmQo4U`Y z+8+!}k#joi^((Wp(;75V8&))>%RLyfF~$j+)srs&V1O+~Nv36KSo&@`N_%{lRfFMsnAfJ!#oY4wMW zY6Ns0BEb^zLD3qKEZfnMLDg8$PEBIYl}0~l$~dh07G($2=O)|vO(%l!wG--tPVy7n zJ;*##LpY0rptdfQyT)^TfsmkIC;I%}urOcVOT1R8V}XTMUB0Q3QdwEW?06wk(Q*QJ z>LpBk$@ax)e>bp1$PupJ)P`YNQKEw#%u~i)9&L+=0Q9+IzwFX^e|qpgGOj7BrWuDD^jy9KhR zvQq#q5c-OAG{@+Ig*!9TzF@faHW>-P&GN8PAM86jeiHb(ZmM&pCd~~0QK-pBLj#C4 zDYqy3(Lq-XR{Ooux*O#3I(G^{vK@L5}!qIlWfLT1lM;ax$ z(ClJGXf~od<1fw>H@V-#{vqXm2Oi3|01Cs!1w<%T!Y<#d_ACm9u&o}aGf8^^bD2eu z-)O>%VAa+R>F_T;=h>w#Mzj|@k^-t6hODUSS{m$}&(@TR^p&a4am> ztYm}LcLn-iJ_E{bS_CAiVjO-hzxuuhgA&c3rmu&(P5UBOVLl_<)YfI#I+nbs5vUWf zCI4mqWWIdC5Gee2WZpJAHV+8bbmmzJ)OY`Q#^sb7PyxR!s&Qyt>5nDnU^7c`c%xlg2 z^82F5yfkG8k>3Ka6Cb#KF8j3OOPQW(fj$Edb&EeSOzl`S+pKl8eh@HV#}v zuq4v!~{%j(wu-zxPxxcw*J08Et8*qt0$1cO9AuLIqvo}S~m8U)NIBjs##*Nwvn=iB`0J+l}Af<*Xq z=nKUrMjEEb9o}mlSde=mli(a*R*{-q?>#DcTkmADWuY&IOOvxnrk(b>VM;_(SKjex zi(#5o<_g2@xPHvjq+4q{-mg>6>{^Xt4@Xf7__Px%L{?fJ@V9*>aGb=f@Nj7+b$h?# zRUB9ZV#jss&<%SCumG314$CHcZYyc+QjI9pmiudK)nDAf^D!`m9JKs%DgF!@=*^+8 zf(G^1(Cr>m?EA+O%sptxI6U&r~H`qJ8Yr1 zJhFC#fopm1n(bNFtK4BA^^zlRm zer}v5lzn?os$x)gQo&mV{~W0x46=ecHX9C!bp3;+7DhxnLv$%S*dOt+bER3bFxf**UJg;Yq3@;gzQ(fSD^Dl6QASJwv zcB@Z3ejTOuzW62=ede)hxO=pXVr1RUPbt{SS8ifC$s=T5IcC3@u@~CWH3(bCbITX1(67F*Cf3X+ha0 zBmv7)YGbe1<@4!0TKvRXq!6t1+bw*)Zb3?F_1%~i%I7S3asJ!YB!ykX?BzVHD(c=? zB1MjMG#Ez$<}R#iPg z28KlNwY7nb&>Y0PuFq|^0*c_q z^}1j706er)h?yQIZa)|w>EQzJW{@r*>frQm5bM5R&EviGyGT0P_4kkka%+$O9UgB5 z*aG3O%@;mXVB5MiVdcDb2-XMMh>ye>)*8)+U(y{7%n1SmewIs6`&&EL69gzN1zLRih7&HcWni= zv;}_%`zH91j-m>z)!zHaAy65FKeFQhGHYcOk5sa`gf@zbkDpxizlZnmM)?WwAFb)R z5~5|8@(z20T3PQ2JmM%@j9`K@+K1-C|>*Wy|C^(ENE zh&X(QYO+)01O?4P%A>rU2z>}c`q50~Ev>)*zWM{cX1K%NO=UqVnRhN^ z{n&h@6fJ+k5(Mx8i|$cgXUj=f1T#_`HXbXYlO>doa6dC)pph+=fjASaM)ouzjfkuLEOFQfU zfhvB28aN?Eq~08^lMip&iR+sdkgp*sSB%D}O*q_=K}!#kJq*|f1$*$axqOa}L}$Ibsk>17KVy0QOEJBIERC{?_BgFw z%pSNiK@2*;Ui`;IJ!oKZ3((41#yp1xUeOxJu0oZS}m05sxdUHFznXHGp|pM%hE%ncPDT`1p`N2oA#VgAB%6 z>?d4zH(svH>r2@9O-RcVeS{ePnw2CpiU@75U#K>E3EMrPmD)iXol9$it4zTcKn1y= z@c0j@`zmjuyxSjG+lhop!-xO@Y_HyV)u6JoN17QBU0Q%_oLyvU14KlvgT{sGYV!+2 z8ZmJz;P?Oo)$ii`KToRu!Q}X}yA+F}pXLd30`P8EY%rSu7un=*cs%xe0!BmQIuzuN zq6OBEAGY|P!U(d?)RI82RTm(+QGt@g1MDYRY_TQEs%hud+NWVE z62Gole4u-R9rld|eiSEo&LBVKD#E-?w?8`mKaBqLdn7u+{+(46R+X`fBZ5^TBPb1d z&3OM(N7na8O?vB`{D0|Bsq48T(n_!O>&+Ux?JLIZaCMVkc)ZX(+lqcQ@MmBl)(Hmn z%O648YM;xulhYV}yCQQ}nS1_fWlqKz7>07BTgw@cmfKEeOl!^MG{+3DpKzH8Pn4S^ z8G7xUG(&jIi-^riRU0zMIoqef@vPK*KJtskuNGp=`2c-{1JRl&=vh z$@}%7z^E|!CML9sZC~YeY+n)3+LfRcke`ztdJH~&qZrVM*pe-cr_7e}62s&;MMrX0 zm*&_<6XZ7pnATQAu|hOn6S*L+BG++*9~zx`gK@gP&w@^6)q75+>W>#PiA~S#xGltA zQ2KPPc|{_&1uMxWczoIjoL6LEQF`o2BR8IS&c{T5G2U0iRdnlr!fs&qoRm#8F=>hc6jowGFQQ-`?BtOup zv!XS7a{NRvsbm;rFpgGciWrheS&Wu$7(zhIqxEzAW6YvidH+X|=5nW&Zu2A(y*qHP zKF%=MZX`7t88!L-qEgQ~y|S5vw=@kSs-?iDyqhtUHA!aZogs&PVbV_h|5ld~NB_WN z?F8jvEEp#*v^W)3<@{J)KU16A{u$8Ht1r_>)5JzXhW(Kf;+>eorF)PH564wV)Ii~U zEA2dFZO5tE1()HP7c^dQ+IVeB+7O%@s^Z(!Qq>1A1ZSgV>L!aki})b95>urbXX=9Z z?*c<|1Sf7F(kGi_MtcIpv>7!e=|7X{x_-0+r@myl?v-s-d})s=Zh#yH?M&XwbT&CX zF(gbcg^;DRDtE9G!=)9A*mkxlojK=0gA65jZ`g8|^(-ryHV=Ud6j!;B#bE}|Nt~I5 zuS2V5!0q}^foYwms>g=@I5R(%C;L;}d8X_rfew@+i^u#E>9`lEPkZrc@JhxR6IXik z*V{fnXvdgwUS-$snBNbVAd@Hvr_~|m&KxfdvZeBUuL`@O+-^m_*Y=1q`k{=|qZ$Uu z=3j4v`~Vj|cg!*rvVLfseM0v586Y$FBF1svZP^bY|Fs5= zQbQegD)_J%#Ih;d*9Fe7Y;r8>R^OCPk1@uQlGcJ4fRfrpsCfssEO6%=kK{Jzxuj42 zz=nwyayLmAp`=_iJjA#_rrS#jQ$_8|3ubB3-e-vdl)60Wr}~?EOx3!j|3o;j5%Mo@ zW~L}smamoC^6Hwg3T}g^U?)d%W)iwE@MLzT^Qvf1$HkNqD&~{Vl}B4~^Ccsr*V72P z-+9<3;X9}@rk9?~Lm18a;Jz)R(9?#u`Pob$ ztDZfdZl&<*gqr^rRaud_wTi=d9#0OI6^nztRn z?_swA2#}|!4YEt(o|xp<@*5z-1BAU&87 zE{E&{suwG#jQm6aUq5XUxa;I5SS`zocmIxvkAr?c#F;5Ap}(!DDb+YG&1i1@o>IJ1 zV}d4yDuvaaKwma=1Hq{Q%A14+W`7kS=L!sw5M_@Q^!ydyuC?fw%|Z%FwO z{ZT2ciAX$d?&l~+Oq-p9PTAt%XA+=hKZRXSiJRE7y6ag)Q5KSsDof0>>;AOqy@GVE z9|U1^#f)ggGpK5_h7&QbTse6HO`-?PQzIl%94OR#PyzmE$WO(*7_+AQ`KTw}Fa##- zi1haaoMIXkcDt4vifEd=szFZNd?H-X0dbZ#fGvb5Fb(*ITu6QgB3oV{wMl5W;w!wU z004LkneFd>JeQ%h`Onl5{?pF&!mc|5ZuyDHD$@E7YMhPXx19pA;=FQ3SYk7{^jd~y z0AMxgY9l|3&hsPyn`BL_vuycik6GsuYSVSH-orJ|0;n^_7tj~GzyVp=^b>~Y*B&6& zzM}J`O*@aM>ru@JJYk(OZd}41#5NRSlyAa(w-7&Iawc@JKG6c;#I8mLVd6+ImakJ; zru_^QDm$6%NZFvhMN$branN{>joPp-VZp4h%3S|>*q=y@iEo)JrO`$CXoz{FAXv8w zxa)HdEN_kSu@nAuUA?Q#gouTUW?S!@pI%p4?xuHMa&?#(XyIeFUEJ|(6{DDX3LU6) zvM}uE0!x_3Y3z$z-`So6a2I#v?WWlBQ%8D;KWj7}K4Y%miZ8Gr`io@x`c8ISThN?y z%hYq00=BO%LYB@d5YIh01*9DJb_YUOd&rw(i5We$d0fsU7*}z*neY^0$-=8{fdBvt z@j;p+Y)Sv1?BT@3G)L^X4w8Y_HU=6aiYD(sv$!|DFto@NWKvL#pM{=z7(%V38n=m- zyR0s}I+D_xU$s0M|3JEze33V=ys2u~u92sM36$~d?i*8j-$s=}-$p_HG7;6i?B$p| z5vn)GJxcJ00#$sur^$mH+_j{AIP<9%%MV^w%Die1k->8}rA)?iWC@rgI^ck?hCzss zGoO4|g+UAT_eK(PqF6ylsIsI3bz&=sj5VHn;3v%164^mo3BW6{KyWy^J@xRxobo?K zhmjajGobW=UhIJ<5-?0ml(TOZ~61E7n8%>7%T|EE#1qnWrhU*T*w_z?< z7??;hrm<7T2YQfZ_-DQLTiOfc$Cb%blY5D=6)XV&ahKR~(=603_^zM$P#}{nC@Cs) z!I9e8Cv!xEXrFGqZh*224A&-E$P-#E|0YGqvy+D@UEE;aqjQMes109ZPop^Ir6nxV zc~U=d0WXHMeMWiREwzP873>e;?H)Wx8h@Fr8IZG5B1h~%c(9VQQ*`$>XJoE4^KTst zkyCpU;X1{18}AuU?-IO9P1HsDp?&-??(xt%f@kzL(4q%|#Q;m_gWf|}aT>Sn>8!1cJt;J8WTy7z zW&T?o^*PHd{JWW`MC7Cm1-bt%C?}&8Kvm`ZquD6r^9mgCR6yH8!<-GAL_=$ix;Dp0 zypzLjiG0pWc{GUW8jv>*N2@V!SAdS8tUpynV)fq|(0KWq>J6Cyz!s!{T|5)#OGF@PSACgl-OS%41uxszwg-?O_6Qm-~o2?XjEg17c(M$ z5N|n|aB}kmj^>@*yb;0CU2%r%72ejniY~xG5Pe6JX@}&7UDyJ0ho}cP0WZfuEOXlh zmK6kWFhgCx*XLV4|9q~mmiegSIv@*Jv2hR>vwy~>-=sHMX>Zut=CGnnMFYL-Pl&0V zX9}hk270H46Tx*IbA1(FP=qnLD9?<(-dqwQ&2s0my!J*wLD9q5)w5hf(Uz($_3jQT z>3Rz0X>ug9vm0g7tq1w8jDPzI{iAwzZuLjJ90|p1-~NL(UbV3hf9nLkaFwIAwyy?s zCeQ1O=^);{irEMvzFXYDY@1;UB@j*VWhQA2x)%GIAbQWlBoGnN=i zMa{Uj4&Tm7U7QMA$B_kRs56*blwI>7he~XuFNmpwM}o?vi4m{KxeF)g4IsQN!^4^= zfbEXG3gz}g=5sV>%v$(-=nC|*BfE2`WhMlxDj8Nm#R!U`?iAk2}x)oEj1Y0}wKeU^as5|F+KbSj{)@!ZQZ>}yt|V%(o+@;z`8h{-E0+|q^S7rRAN?H zH*1||{B4kJJm5<7**P8-P|p1k7JsbHuEli6{k|oqFFFoT>-3>(w_PGXJveyeh3eYv zPIGjXF!=KMlJ)G?rW1e7M+gq+nX;ZB3b~CJo;f}D*S~C=Zf@rNKJj}bdp;;%;ALq% z_#w~tS;L|k((>mcdJn~lC5U8;KYOzfGlh?v#I=PBj54SDlSU8&{JVcg@vbfm>l9e2 z--*_-f=gZg680NAA6hK^vp_0#5}JHqNl5)L9^oSYfcv84lST?}5*j|hPWs(FOS}f}BOep(WI#%y)RZWeu z*=WgqF)4Ap&^4&f?iX8P`oF@~*x-J-c@wBg&5)8r_^pOZG6Q$zJ|Wk%EqTI!xCOFn zMd^s6{k%cg%4(d)jHcb2qdUh*9d`npgsB$|iS*16uI9}1z_n`>J}*I&cpsqP+l>D$lJ?TIQ>Y60}>&CpejZyuZAQA zt?-=kWN&}?1|3VW2+k?EmWyQDc@c`-M8W(V-Cq}mZrsdUhsq7a`V-%;DuMXlv zsnt#hjFZTLpb5P{=W2iHO2RLV zo`eY}gq!H>DZjA2el(S}S5NHLg}~$iw?;_0;HVJXj)tx;Mk4@A&Uu@}9?k@*W??P5 zOAC0np5*)!j`u>xr*%~vmU}U>Pl`re$E0{*Cu&2~Szvu;G4_qM7x@t?ItX4k#yXC{ zHT_=}VOpgXNn+Inq7gG8kt14$mOjMQW=`r zZ^@8tfNS)VqM!=ub>GG%^4o(aqSJbf%>u4Ts_bOGWaN$I0uW_6mQxF5p6+(o|VS%_!a`i1=^ znw!tWcX*PC2a>lQIG$k1Zl-at-|)OK13#~kCvE#VYmpf#Oa5N(x(txHda&>t8@^t? z9nMFXUzVXYI1;qK0L%+zJcO z#ekL}@OZ(i#V5nQT(VWSU|JE>A3SO{?j-_7^=6o1eROUWfG7iKeF0Rh_(A%B&^=S{ zj_FBa(nP=whZO#CRKqoUuQY!f+h8_nzyENiOJy+Nz@F^RJS$@K zo^>7v%EYO~^G*4EjmK0)e#Z)16{DV{R{)D>8xS;nQ?oMC8}IOvrkY;64H2dtDN~fu zd&(oE*F3|%(B!F_yo7+S9kHs8W66~()f@C~fooITk3`Hz;xBJ4vG@nwy26q#ej!mE zx`iM4BepIzPXCK3`+B7e<{McF(bN-*xKGLBZYQsxI@3U38O6Gzx5LKq>>f}tcwg?J zCPkR?kRG6St}$riExL}6%>(7oXgdCQtjrf}igb|1BM~U-GYiomNz0*F6Zi;)uD=2P(I2e3ZiNN^{LnMZV)#p2_o+s+ zF7t|Wv@M~VGb|uHviyy`CvlqHqCR2~=Ka7><%Wm4!+oK&9Ec7MA#0qlP$@rTQWpI!iP{+GmIgAX#KZGjY?7g*7P3~mnhgiS>002lq zoM)2S+U0C)%-WLK z)gW!*fW!3g#+@7reY2;!7=)H1W_6-~Xh`%B%QpAWr}M_AG$SlQ4dd@ng?6w#pbvR4 zAl4p?vw2|oaZv0Or?Gq$f7B+zpg zlrYfUWNr6k)!eRF(%{dlWXucvVCju%$5LTl7ch5Kb3f&4{E?-n&|1OzOqt-CNkut` zK|~fQW?oBwb9Yu7SIp?u)MVKN24e#_xlR{kz5e2*Myp&Qn<>^(J_(MSH4NS!EH8^P z1SrbFj@N0*H46j~E1RIjcL62H&4c+4Usc8gvijiF4@T%ep)gBu`hRPGISQtf4eKzA zYYhVv4PbgX?XWeTb5{m*R;u)lj~JUM{+MIv1Py6(gmD($z8EbVAlwZ_(&TXdL#DR% zi?6^d6SFEI0F#%0rERRwIdsim(!B0WIr&qax)&nqtFm;GkB2W^bfjTt;{1G43pH_k zIKBEKUX4XGaoXHGWRCe0VzfX+V_u@m9={a(BL3kVMYu9Y_N-7vFm3*gW5B0S5{oQ9 zlpC!j34H5yUB6a|b@#G5cD_*SGPUaU(6;tdcOt^{02Y<`CQYh6l0RF;!1SYV@$z3O z2Cx<6+k2W|#Eu523MMx_niUj?dfEI+>7D=6I)um5Vh)#mwVruUgu>+ytIp$;>i6zG zK}G)=f&wCW**AgI{i+*I#+zd2*%zml1BR!dMazD#KgkW@swD%y zDYNSI597{FiZ^875odILlOQTe6SH!o2IUJf15!;#$r_W;6Y?nx06yg*#|ex9W*T`F zi@X*or|{oIkULBvltAQ|*u1@03L{?mL1maEJpa|q5*w^Z+srL%9-@!Nofn~He!3DAj&&*szuX(ay{;&BZUWtoW5g%>dh zt{qf7-EE7DZlJ$BFC78P3prUVFV*yA@4JS{$WnzRE)gts=27`u#w_WPQK5D^Pf#Vf zLcux#u1LfPYp?}KcEqgzk2|<3{FHh+u1pP^-C@CDfChk{8W5=7sR=Z`j0(A z8cuNsoVlAkveC35k=>@8gnD}3(_Ltl)G**!HCu@seH?TRf%b(bA)P#(gCcSA#v}E* z4YL=ee614Q(Y;TdDmt6Jv);oZGte;6kj!KA$RP%}Um8UJV%vGp`?KuyjJ-1n@ho+u zB$EaJ z`{4sBEtUT;Ilt_ioNw-3A`a>oWeEUo@^7uEO~XETsDD$9QiQRL$zA>paZw?soYn1_ z9f1g;%U096o<>xB93uMP%%wm5ibAly@;znz!?_GvH{Gl|E%4KH(s(Ig#Cuqwas>vt zgxRe^vzR4W%5|P0Hf&+q3VE=9MlR@sOJwq15&Nl>5c}see3%_gxH@P>c@}1Rk76e$ zotm*bxwHpq$Gy8N5wp}&ZKK^ zZg{LIuu{hGoOoKKK7v_d3C%WZ>TpjkeVP@5_%}ZU&<&eUaXNSTku_>=4*LW68s$~7 zkQ`l@3=-$;O2a=a;nnV#PR}e&d$c!JbUJ+YD^YMT&1*J5TuQ&rWtkjqrl~+_*imv( zwt?m)XW~Z(uv#L%YQDV0^37!V;m;&Ns0WEZMwcadh}^M80(RzM=>o;~!MCucfWP;P<5THnKTQ(_%h=LZ@5O)8+_Y7#i=Rw+RoXQ+ExO^(_QR>!uZS9SDT zL<{UMA*j{<&}7V1m5Cq4h;~tPQ$jWd{>*E&zfnOXoQ?-2fo-l>AA#;Y3W=4jtWRUQ zlHWj3NZZfxvQDe{6yl~4J?EtMC$C6s?X-G{Q_65aycccM&iCKkz`$j^xh4-f`UQK~5ZlF72blxac z{Aj8Y7T7cR@UT(1O<!kPrbuoRVUy zHd^CN#0uQXvrP@sCsT8~198HXpx4$Dh-G3Pdv}Giu_%~5%>f#w%xu5_001#To`ppb zEG7Ri6S6}-n28h3KLRMbvVf%laqdu~qD24zkaGy=8uckZ5;JhtpqE8}0000o0iL9E zLfFsleAp4?*w9iWXb;GUi6V zQ5E-g$h;bjIj=j5x^D~#i^n^5n+4Fohk^Lt$wljvj?YF<`2h?%q1-L2RS!O5VHU1EM4GdHaB1HZ0Sk`)DFb*?ds-kx}}(Tef3>aO8x+#JQtr+ygB zfdntHl6f0WTv$+6#vvq@I3H9Q!NquUJD@R}XUDN8j7z=DPA4Kl7RNvxNATF>y~??W zeS2KdAr9n1TJ}cRa?30~=r^@kPSF5ueDA|;HPy2dS$x9(_He0~o!*qeUs~4j2sOJZ z-hR*~cWe8VC;84;6MGVG5BkiB=qI9-qrwbVmb`~&`jg8rzkx!Q=ZQNpyoZc83MU=Z zfN7eV-@lVi{v2yFSwaKdyv#+lt`H-2ZmgYT8oAy!8^J9c^$bqaB%GQ+nFj_Fl-g*l zr>jU>=pOuzX3W^cupnyC*NhJec^x6`Vmqw(5k38h;d%y zqBA6GtZ}gV7iTRkG;x7Ss^M>}{sFe|KA*MHxt8$v2#fn&$bbL<03rdN=yXEgGxXB3}g7-`F!=68$62!HjU3g0000D0iNn=LfM0V#08swD^RI7On;nvWR&_N} zLIkDW!8k7KlL^0Gt}WcY%<%AHJy?$Sl?wA!;~ofXfUG;R00M7HPb6VsE7uAOU({rA zy4IjCYKw2sQ3{Wn*Z4V<4@}GOXyu~4HjB-Tkhl7_r;B)58}FfAD|9xsIVR5=>NHna z>OA=uED*RGcxY?GM@(H;$1>`}6HPk@%}oD81uI7U3N)Lv3F`{yI)&9LD@&Fo8+G@- zu-Tc%pW5)f(w$Ea51M9KPeQvTE|IgMc+lRYEu6=ld2#`Nfb1KWw95)pqvZ|Bu8F9< zoezvJIX;Q!ClqA9!)&7%_Tg9ceUM1wttK_33MqB2W6%%<|CwhX8MiPCoSbZ5^NA%{)0080%?^3E`HBW% zD>196NJ)h|!TGXqXtys+Pgr!8R($BfiRYNjryXZQ2YNq=Qv6RkzzH|>&9XCL{Zw{L zBM%_P5!CV(XAwLQdRQCjXZsMZvG1Y#jrwWl!vm@byIWo}x41t{a(^3&+4ogqEK{!? zCAu~ASiR)~Owk4d6<@8v`g6FU5gA9f*bQJ)jy{Fbkq7dC{-?PFKr-VKWxpSP#C_)*Hl0K`R+y;O?K8N^Cx8DU0F+8f?h z<>)@CE^WBUxH=Q*rf5l3`>!14IP;|wP+0rdf(a8RXEOLyDuj;!DN4ChK%K%2Kx*$Q zkMYZ=!C#k!0bTNGBiN5`PD0bngii4hBenLr|9&$*e3?-+@W)+L9IszeXc1?Nc(oVW zL@MH#t!?gW@=+Z1+vEC?haQimLosJo;96;Xsn*<@-j?hIZ za~Bk%1_t_!V(&O|E(2s~xdtf1!Q~MWSAo6m!b{gY`*arO388eA7=3)M0AFx7Ol*7@ zsW+7j!iEn1Jx~mJA3papv*-`F>vf66by`P-06=S7MgqRD1mq4^w0kbd%WfwlchvHS zjTnNwZc&a*>)!8W>v=6JQN6t)tPN|;?1HL@N8?2shYms|Pntw3O90 zCjbBdH9?;ZMHMV1|4|k6Ov#LI)ToZgqfqxl!u~cL3ti>|9?>A+Gd%N_ZNHG$bAhSW z=#aUGb$0*&03HFKEObKOF?bN@UnJ?78z$%Mq0xxVnF@l)(hl+90000g0iP~vLfppt8j0xAEWiGTnA0p3BIGfAjHY?(|7{{T|> zkZI!D#4X|!`ujJza@x5K%v!;yZJFKjDr$p9p2g{j10nyi?0Yv@kyDZ}Nt;o{7Siht z13PPPl&(tX%tx8n`v%OYSR^OVv>Uy&Tz*y%%`1~wSdtLJ`P@~0)^&BpD2-*U{R+;E z|JPQ^A@4p2B5bO6Wn)iK_t7jj*)qV~Z=*MeACyHOK)`z=l z6vmU)7S4yGp6~EZR5FzNo&38k+uIADnNvKlpQW?Ws3OhhQJJvMCBzj+q?7U<)c(SX zXYZqbWI-j5aqUd&LGVB#`^&zl(?}1j>d6Y=8xqGULQhoc5Qy3-)f1zTjsADtr_mhqvdf0$0000g zL7!1Y6)YkDDHaHlp*1m0000L0iSYeLfX3CT#@PtuUdcuE| z{bwkn@l-s}uDsVA5OOe!($EoVof)8q7aVC}YJb-*IX1~Z$8%m3l2|L0y5U@sh+Uqa zV#qHIkPV7E1X(iKyk%pKjR3b!F0k70#E(f$qo*Lxb=bh>Oz4aHkp#}8YlY_4^eC8e zRbPg++Upw0vD&O2j4?)XSE?GYR5OLT)x5iLRDo)JTi1c5W{J85y_6*1qNmO_456r?yDby69N9JcXmmNT~x7~m&hI-5#k!ohN*$dr^;nh>liFU)=&>IBl^)*%DM{C z^f%JL$Uo49bWw=(b-H{yrk6hbhoS0Q4blsccHYR?Qt{HiW<^ zA#T;7p$vz9Z&+(KFsQ~&le3W=r)5)Dml8VH&M1SBqO~EiGcul zoEgn+C~_1=plh16M7PL86PpRFzd2KVLvFNeCf8eylqD!#RfL)Imd0YATbN!wJe6c- z7GOk|APPKe0K=_Qm`49TkPuuW;I7ZSZZFD1(eU~OMJ(W+rVWb;dWfyGucY4^K67-a z;8l&G0`XSI35O!J5hY>jSzgxF(A9txd0)xXbyGCx4j=`IobC0Ix%R=md5{B$(Yoy5 z+d)xR341)XF|MANV9-*h{BNzjYu8wk$reBkF*4BT90ZDEn$cSRh0)BSzoaee1$?QpImB!*x6z&pzv1cnmRXt$Klshcg zCSfV3m>&#krvLx|HbI}2MHMV1|3AWu7}y)Dpi6D(eV`Qo_DW!rXB?cHB@QCz1llSv zdFU6;vK4^#Cw44+8PZDt001ljpR{yB-$-6xP7=<_HjbVp{dQ&YZw+Y$H9ku{MSJ_d)`qOWUyYvTK zR`!78LJu9g?m|@9EQg)TNf7oB+9`PLRD0Iwa~?AI>1g_8pC=Vc&VvnAFco?aZd&se z`j0jE;Di-8HJs+WUOG*Ixlv3nHH^sH-{YV!5|ORAy@HT<9w$Pr!b#?8?u{BEV6YpD zXaBwha-A<0!+I8@u-%IlB@eBAi@#bcc5{&g{VA*8FeR!;nSIC>YFCIKJRLnF@gzE& z{vX%PcV29&iy1U;T~E@+em)T=L;!`J&5;W|zxrUckM;r1cbrS4%Q*l503bo1*+msB zCI2wPW}=@K3juOs*vrM|F~ne!smwjK84v&f02TqC_;f=o1`_iu`wMppp>k(Mfsu?b2G$vJF~ZTLQ!3( z`+e>vO~fR$C=5qGM>60&yVJ#Hy~c z^NL#x9K8by-L=>=d)005mqnj=Z5 zL2Q{!5B~rMe1(9NsUm^oU0SB_1~QymYm~$`O72;cf<1KHLS+mw-|cHTY)_{LfTpfs zG_kl0#+yGX0N<8_0SBpElLlLc4)5=42NnYjOW%29#gXv@CZ^&Mc+Z$;Pgdhw`yuRpP@MlF80000L zL7qWH6)YwH3KLQr%5!tVCxO*AZ~y=R2?3s9bVA<%5pe(j01N@1VroL)1Cxj3M?nAp z0A@j&XGy3*Y?(|C{{Rhl3q1TFc5+8hBWWEO+h*~^S3FR^gNbEOO1P_GRPL_W2Egn_ zbpeb=^jPw#h{9lNFQz-AkWv@R(Y{yI{FK-3UMkJchF~NKp^==G-~+7Idv~wEP`)5T z0000FL7s(06)YwH16V4cMnHf7000RAo}_d_-vAMD0000C0iLF6Lf-?Eod7i;0002E zL7J;cs6lL*Ob`D6A<&0F7JLluNX=ooHsab?w9VN$@*fRpP>a)(q8Q4>`vvs371czT zR0~xDsBsThkAF*-27aL^1%Wrk=sT2B+}K&?;~(+dU~-Sl;b&fCz1GiUPMEjk%3+v0 zvLSj5+_tL1m0LpN2+^J#87X!8$jZ_~h<+W9JyT;L-N1srjtS!K-4sJL*fLIsDi2}= zmse=p8woeuEFfF*v@I1RSO}=`{}06CSQ>}n0000JL7vG)6)YwHOz}ua?bC2J-76pf z000aDp6GN!-vE|QRlc(y_fdBvi$3dF!NvJ_=nM@D=029#J z0Koa>x=a*!zsS~ec2Us4M$H^gY7k*cDYT`ssjyS{RABQ zPYK98f7Iou*KNzOF;o)MtA;$NUe;ViXsF1s0000OL7xpp6)YwHKi_8_PN^&zw`7}n zfgL>|0000F0iP^%Lf;aWc;gMp>>V%w000aDpDt=b-zTBwlLJWr002)xn=?tML2Q{! z5B~rRqne8}><5)bY}f z)AB3PCKugT?qU4EQ2%7fMo|C&01iQ)QAHIjCI12|BMpAc000090iSSmLf-%pZ~y=R z3IU&TYC_)tkjUu(003b@n|Dd5L2Q{!5B~rMe1(9N2%Vr|cRB978DG`Q9OxQFr;P!J zFjPfpIq%tSPaAlaz`om==#G(zQhHex^#;_D)q!I!;#Px#9BAJWy`kG!OV&#eOMQQ1 z7uX-@Z*los0000FL7$aH6)YwH0xKmifq3Bn000RApR{yB-vAMC0000A0iU*NLf-(@ zR{a0~0A4|xyGf`)Y?(|C{{RPkg@BZ(-JoH1;Bi0EpU|hXy0|HBBmx`heUG=9!PH8s ze_k86X*HdPj_#?Le@q0!)TM_3^)@R4ezTfFX$cDE_u-ncnYg5&OQce2dMbRM0000B zL7&-06)YwH01ub|000RApZIh_-vAMC000090iXJ6Lf-%paR2}SMM0bYNvJ_=nM@D= z01EzVJlF)FJpt8n*fVZtfEG7Q{510S|00{w~_;fc3-vAMD0000hL7Hbts6lL*Ob`D62YiKqlm~<*a7z^JCWY+#?op-H#p*6|Pw!0z z=t>{}000X?o`ppfEG7Q{510S|00{w}q;x{x01{?R~ccVGWS{vY01RPdBvs~}5Mo0|+Zi1KtKFcw z4$6Hi-N_LyIR5`RzlAbQ;#sc=zWbTJApigX3_+ijMHMV1{{TH(#{d8T2?3w9bVA<% z5pVzi00{w~wrWD(01kEE zc14ZszBWBF18(A7M{FDg#o18+000X?pV>tfEG7Q{510S|00{w~_;fh+000090iL9ELf-%paR2}S2?3s_YC_)t5pVzi19Cx{t4XLqY?(|6KW-gZ ziljgPF&wxxJbek4ugNgVt?8H5E_WlY7I0Aqv|F>cY(2i>rGj3i-@>`2u0+=gcWRE0 zS(^TlB9?5U@AuI1B15@RR|)UzNF#tc9xb{Mn+g66gc=Ei!g~#iovd<1l$~!QW_^TF zOe{YqPQOJ{tk@f3ZCj+w-8qk@^>}zn;;N*bcY!BH-S@rVOi}uN=|IUJMyZN5+ z{LG$CDijWEX1VO74;J@k3UUnGW#~<+Uj9|d(JG^zwHZT&;spLX>9BaCpTg89FHb%9 zVw;bn!^sYB+7LGOF%_m$&_?|a((Nrw*PR~-(>blh_5rcEDCg;cPiy%_^z}7QZB_azuPwy9xwKKMqMzTp*ubFE|smHA*4G=h~7qflc*tmszS;q)o5E$!u z5DX}B#3vD~gc~4;#yzu2734=(a+fb~3N(2m)YWPg7Qf z>8pN9=UZP9@8NryZa^3r^ihby-kf{%>((`2pUcjXv>9*7K=Wkq$vE?lgKQC4)fmf*UunN*-F@{me0AJin%C+Rw1Io z=2OnDWbA~@jA^%+OP)6oH7l7bW-ud#LO6PifY#EErcmhYi>L5gobh1-!UtliZ6fnl zu&TLL<&E2rOM^#+DtP0MlBU0r<1X5^s(WGp55~x=p>XHH0000)bnbG60e)} zqzZ@e(@`;)gg)731@m`gl-0FTfs(b9L5g%>VKB7nADp$F( z$|md-I{|0{+?<){(%-*Q%nZ*9BZhFyshTCw%3c>AFn(hV>j5hGP`pL`O;?e+h8*z# z001CCpAAJ7EG7R;FT)WtJPcY^EG7R=d2jpC?|ni`wQW>0 zYk&X%015%0aCAc7E$xLb0000B0iSYeLfOV5<#DpMHMV1|0(HG9q7JN)Rs8_000UBpR{yB-!1KhF8}}l3jv?D zYC_*GVX)qS0000NL7Tfts6lL*ObA2&0;9o4t2n?DYPGij000s}pV>tfEG7Rb=~5l& zzEaedH~;_u3IU(^bVA=P?S(G@000XCpZaP--!5UW-hluB024u*{Yj`nY?(|HOfCNa zU3h10003100000000000000100000000000000000001000000000000000 z002M$0000000000000000000000000000000000000IC26(@9ZVQT;Y09000000000000000000001000000000000000 z00001000000000000000002M$0003%0002+0000005D}_baMaz04QZ_b94Xz00000 z00IC207C!&|Nj6100000H6Q>01ONa5000006hUodX<+~W03dB-Xk-8Y0000000000 z00000JOBU#FaTA=0000004-=_Y;phq00000004GrWMu#V00000000000035LWMywi zVQyq>WpV%j02AzOX>Mi!000zrZD?cw000310000000000001OpX>Mi!0010ha%E-! z0000000031000bia%>;~00031028cpbYg4(005YCbaP|?0000000031004+#c4ILB z000000003100000000000000000001KmhOnNB{r;NB{r;000000RR910000000000 z0000000000000000000000000000>O{{R30GGTUOLjhy}4gcW)8E0ew4XoKgLHhv^ z000935C8xJ1Hkb@nP33`259T!%Ob!4000{01P-|baZn70000000D&n0006200aO40003100;m8 z0003100aO400031000000003100;m80003100aO400031000000003100;m800031 z00aO400031000000003100;m80003100aO400031000000009300aO40003100sa6 z0003100IC20003100aO40003100sa60003100IC20003100aO40003100;m800031 z00aO400031000000003100;m80003100aO400031000000003100;m80003100aO4 z00031000000003100sa60003100IC20003100sa60003100IC20006200aO400031 z00sa60003100IC20003100aO40003100sa60003100IC20003100aO40003100;m8 z0003100aO400031000000003100;m80003100aO4000310000000031015yA00031 z00aO400031000000003100IC20003100sa60003100IC20003100aO40003100;m8 z0003100aO400031000000006200aO40003100sa60003100IC20003100sa600031 z00IC20009300aO40003100;m80003100aO400031000000003100aO40003100;m8 z0003100aO400031000000006200aO40003100;m80003100aO4000310000000031 z00aO40003100sa60003100IC20003100sa60003100IC2000F500aO40003100;m8 z0003100aO400031000000003100;m80003100aO400031000000003100;m800031 z00aO4000310000000031015yA0003100aO400031000000003100IC200031015yA z0003100aO400031000000003100IC200031015yA0003100aO4000310000000031 z00IC20006200aO400031015yA0003100aO400031000000003100IC200031015yA z0003100aO400031000000003100IC20003100;m80003100aO4000310000000031 z00;m80003100aO400031000000003100;m80003100aO400031000000003100;m8 z0003100aO400031000000003100sa60003100IC20003100aO40003100;m800031 z00aO400031000000003100;m80003100aO4000310000000031015yA0003100aO4 z00031000000003100IC20003100aO40003100;m80003100aO4000310000000031 z00sa60003100IC20003100;m80003100aO400031000000003100aO40003100;m8 z0003100aO400031000000003100;m80003100aO400031000000003100aO400031 z00;m80003100aO400031000000003100;m80003100aO400031000000003100aO4 z00031015yA0003100aO400031000000003100IC20003100aO400031015yA00031 z00aO400031000000003100IC200031015yA0003100aO400031000000003100IC2 z000C400aO40003100;m80003100aO400031000000003100sa60003100IC200031 z00;m80003100aO400031000000003100aO40003100;m80003100aO40003100000 z0003100sa60003100IC20003100aO40003100;m80003100aO4000310000000031 z00aO40003100;m80003100aO400031000000006200aO40003100sa60003100IC2 z0003100aO40003100sa60003100IC20003100;m80003100aO4000310000000031 z00;m80003100aO400031000000003100aO40003100;m80003100aO40003100000 z0003100;m80003100aO400031000000003100sa60003100IC20003100aO400031 z00;m80003100aO400031000000003100;m80003100aO400031000000003100;m8 z0003100aO400031000000009300aO40003100;m80003100aO4000310000000031 z00sa60003100IC20003100aO40003100;m80003100aO4000310000000031015yA z0003100aO400031000000003100IC200031015yA0003100aO4000310000000031 z00IC200031015yA0003100aO400031000000003100IC200031015yA0003100aO4 z00031000000003100IC200031015yA0003100aO400031000000003100IC200031 z015yA0003100aO400031000000003100IC200031015yA0003100aO40003100000 z0003100IC200031015yA0003100aO400031000000003100IC200031015yA00031 z00aO400031000000003100IC200031015yA0003100aO400031000000003100IC2 z00031015yA0003100aO400031000000003100IC200031015yA0003100aO400031 z000000003100IC200031015yA0003100aO400031000000003100IC200031015yA z0003100aO400031000000003100IC200031015yA0003100aO4000310000000031 z00IC200031015yA0003100aO400031000000003100IC200031015yA0003100aO4 z00031000000003100IC200031015yA0003100aO400031000000003100IC200031 z015yA0003100aO400031000000003100IC200031015yA0003100aO40003100000 z0003100IC200031015yA0003100aO400031000000003100IC200031015yA00031 z00aO400031000000003100IC200031015yA0003100aO400031000000003100IC2 z00031015yA0003100aO400031000000003100IC200031015yA0003100aO400031 z000000003100IC200031015yA0003100aO400031000000003100IC200031015yA z0003100aO400031000000003100IC200031015yA0003100aO4000310000000031 z00IC200031015yA0003100aO400031000000003100IC200031015yA0003100aO4 z00031000000003100IC200031015yA0003100aO400031000000003100IC200031 z015yA0003100aO400031000000003100IC200031015yA0003100aO40003100000 z0003100IC200031015yA0003100aO400031000000003100IC200031015yA00031 z00aO400031000000003100IC200031015yA0003100aO400031000000003100IC2 z00031015yA0003100aO400031000000003100IC200031015yA0003100aO400031 z000000003100IC20003100sa60003100IC20010wbaP_>00000000310003100Ec) z0003100wk(baQ$D000000000000Ec)02BZK005o<00mP3001Ze008Cy00mV5001%o z006ZB00r>?001=r006oG00jg9002w?009pG00hJU00l|_0065100<@k00EN#005o< z00;m800E5v00EQ$00w~o001=r00Alh00#*G002+`0099200e{p001ih009~R00^W2 z00C|Q00h_o003VA00!Ow007Yd00+1L00C(L00ImE00@r&00CV900A2S00&b5001!n z00BY(00!Xz003M700A2S00aR500Cn z003A301TG^00avF008s=00Ped01P$&00{~J00C|Q002?|00%+<00B<`002?|00|8M z00A8U0034100!#-009F4002Ay002P%00&3_008g+004Xd001ih00-6p009yJ0027x z003hE00Kt<00KY&00wIS008*_0021v003hE00avF005H!002J#002S&00j;J002?| z005Z)00gH1001!n00BY(00tBQ002V(00ChD00o=?002h-00D~t00%Pw00DRa00H^{ z00u1p0027x00B|}00*}K002k;00BAx00p`L00GAU00ATb003bC00T<^00r~_00AHX z002+`00vtC0046U00*D|00B_|0037200sa600)Qw00A@r002e+00uMw007|t002P% z00Bb)00#*G001)p007ni00*T2002+`007kh00V;n00mS4008R%001ul004;q00BDy z00vb6008#@004df001%o00h7Q008p<002G!003tI00LG300ET%00MXb005c*00e9R z002J#007AV00mh900E2u00i#<00E5v002h-00A2S00naZ002P%00Dvk00(ja00F=N z00I#J00uGu002G!00C?O00amC00m3{00A!m001-q00XlC006T901A@;00B<`009^P z00un(00B?{00zVW00Arj003bC00lk(00Auk002h-00CD300x}^001rk009*M00oHv z002S&007Sb00lq*00CD300B(^00d6}001!n00D3S00%(;0034100CM600tlc001)p z00C|Q00m0`00&(F007?r00yoA001)p009>O00M&m00ATb0487n01Nd1002w?00BGz z00=Yy001=r001@s000sI00Dmh0027x001Tc000pH00bNW0021v001HY001rk00H6v z001rk0015U000>P00V>o0024w001xm000jF009&L001Na000^Q000pH00BJ!001BW z000&M000vJ005x?000>P000dD000mG003$L000vJ000dD000mG006uI000*N000mG z000sI007GX000~S000vJ000mG002_}000sI000dD000gE003nG000vJ000dD000gE z003eD000jF000dD000dD002n<000jF000dD000dD002G!000jF000dD000dD0024w z000jF000dD000dD0024w000jF000dD000dD0024w000jF000dD000dD0024w000jF z000dD000dD0024w000jF000dD000dD0024w000jF000dD000dD0024w000jF000dD z000dD002Ay000jF000dD000dD001ul000jF000dD000dD001rk000jF000dD000dD z001rk000jF000dD000dD001rk000jF000dD000dD002|~000mG000dD000dD001}u z000jF000dD000dD001cf000jF000dD000dD000~S000jF000dD000dD001cf000mG z000dD000dD00VXa002z@0027x000&M005}~001Na000*N000yK001=r000{R000gE z000jF0015U000&M000gE000jF000{R000&M000gE000jF000*N000pH000zobYpJ- z0000000031001xm002^TWOQKw002sEVQpndI$TI~ZDDRfV{~b6ZbU*{O>cB*Z*D_u zX>fB~Xl-O-H8Ec^FlKObUw1-sWM45bZ)Rp+WNCJITw-!{b7)^bKL7v#Vs&J6VE_OC zT5V-?VE_OC00000A!uZ5asU7T000000BvMxa$#_AYybcN000000000004-^3b94Xz m041q(Z*Kqq03BpubYTDh00961002y3c4jp;E;KMMF)%S0g~m?+ literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/video_dataset_1/media/Jessica_and_Gregs_Cartwheel_Competition_cartwheel_f_cm_np1_ba_med_1.avi.mp4 b/axolotl/tests/data/datasets/video_dataset_1/media/Jessica_and_Gregs_Cartwheel_Competition_cartwheel_f_cm_np1_ba_med_1.avi.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..7bef99846a6312aa5af894695545dd2a60f71db7 GIT binary patch literal 187017 zcmV+W{{#R4001Cnba`-Tb8l?`00IDMb8l^Fb8j+Xc4IMZa5OOh000PPa%E)z0@FNg zWMOmw00OQC1^@r5+(qfV=GjQMmMqvH*(2`ncrrFLAT1zcZ*pZIF*GuqG%rn1MMo_(AVF3`AY*T2Wn&;MAVY6(d2D57bRaS?Ff%POFflbC zEg)!gbZ|N^FL!r$E_P{TWp8X@ZZ2&a>AUJq9WMgzaF(6}cZ9Om`WMyGwdT(xJJu)#YF)<)! zVRLj}aC2*Ea6K^~V`y@3ZDC(=a9?j`W^-k9JuNaIbZByAVPtbXHXv+oZ);&_WnpAr zbZByAVPtbXF(7knX=7z%Uvy}4WnpA90WMyM%ZDDj}Jux6@ZggdGY+++% zWIZq-Vr+GCVR>I;Z*6d4bUiR2V{dMAbaG*7Ze?U&X>N3KVLdP)VrFt-ZDn&kGazDL zaCvfJZE0jZG9Y4KVPs)&bUiU3VqaouVRJn&AY^HBWn*+bF(7wkX=iA3Vm&b+Z*XO9 zUuSP{Juo15Woc(N2qGBq$DYh`(9ZggL5X>L6-H6U|iWo~6-b#y&6 zFd%7ebaG)|a%E<6WpijfFd%YcUujFd%VoZDDvlHaQ@1aC3BJa6L32X>eb1VRUJ4 zJuxmcFd$)ZJux~lE-)|vfB*m@nq`QD0e@FmQx^Y(Y>kmjjhhkh^x_K_3pvPR!l})U zZwnJjl|zNP2e)z|be8mvv#qZ4p%79s)BmRfoyp8rT`aSBA^1SNvQcm3H<**Ht?{EE zlc9HF1FI%b6k}@^gBa-K#7#`w@9{Ajr_n|pjS!oyX3C406Jj}#L6wEP`QwDh8`gi9 z_wXc=E+9Hky0Ez8PV%c#Nil1#oUcLvmJF?0A=)}h+Ye*GA zet*ghOCa3g3T0c)sfixp2$gzrgTy#BAR*H65=_-gwF~Y@mMQ-%gtXx$^J8puu7E$7 zS}U0lR*Wh>CY2zcmo}~%nTzFVX0m;D4bWR zXRZ3BcyAQP?fRzytY8LjJ`|mP^t)*{HE^t_K9#AvQ;>RYSJBqTci3RYdrvWgQ@efB zS+Yeg#smLSu0s>JwRi>HUsLM*g*LOLBrxVqVjPk zc;nU^J)glbWFV&#HxLP;2mrs*B76OfS_VkFWOUMrvCeYsXTa%$W~s#DKr zhL|13l3_4wv&|(f-H*w*V>ICDoD^4PCB}LemP084RPFxEaLCI6HQSa9J68%@Jn@Cj ziLMssE(2CMr73lb6YWbM2^JoSYNz5|pmyC~f1X*hnP9`yl?rkJrC8Y+bDS^cS%Pq4 zXkR~M<`{Q0vohXN6w3o9HSmSsP^GVanNPkN;9Yd)pMiZJO7WpLbeN+uL8DH2dV-ly z7X6*x^IeD4^r<2-dvAJ$vd)c z`VpWWp{2mfr~}e}D#`(8wQ)~JRnANUQSN(NDC+GrHDqSU)>Yw3s+kyjNICMyT2cg& zks;Br$!XG%kROIn9;qh#y5nEe<3Q_~{1rIqahd0UXuFN4z&^L$UxR@y+`b8ccHSee zzGtl8KM4X<&w=@BGhi{NtUFh)<_jboly^Opkr{xpPgnZ(?92nH)rrz3Y|F<;;$z@G z#oLaEMJr(teEg6atE9QF`v)XYn+OLwqjNNIz#!d)NGglk&m+D?YCi~Yx)2Y){Y|T^ z43PQ~OEC_PXb_3vav&S-qVCSoDjL+?;rPAs&egpAkf_>8|L+P;lOrGWAT7mVS$@0* zk;26#WTsLc9GP=kU;VFci*sJ)aH81TSJLZc9PL;sqo^bDwXHv7Z+OdRu?6Pk4S^h7 zngNP<$4>H}GMleoZTYz~c)8HIO0g z5dS~NM&>a#M2Z_4t6per3c~orozShy4<%YcG&z_$1ypQb-kh&+^jWOhdFX~f6N7=K zwTT+Ev@#Sv|2g5tMPVBgzH%p9ueO+{M@O!K&b zD0&xZNo*uL01>&V0_?2(66!iYqD6JQmR#QBxdfAA>T_d$`YQ4~*}VFPP}n2Fx5(Hh zq8d4TEKGAz`&puC#bSkCUK8l&1ngb6x%|_UtGVO59J;-Kw`!q~F^RN_{tra%(~7MGNq1N)q+;~Zvp^coY5u(d>{zm``S4nU?=N9pX_NuZ&<|%#mU=9gsaA?~zcwP|}^avV+U`56r0LqAC3Xrc9az z3d~#h60qiPl}Svs*B-Kw4q-&vDVG=Sx)83jow)&H|r@eUg7~Ln?IF7|B25lcDymbu7Km>!j35sYa&6l1&X*m%C&ij|tdte#$|Yx(Sue~x=Gy%=OYF45uF?(&XL`F( zS)W`Bc#Xw@HYleBccCYrc5Wt)lY|cCB2LZ8T2m$P8+7f`VI#Lz${abNs*-LGuyjw9OCwTY#^@qcN-jvG#0O369QuOP~WrS zag3Nrn!IvkY#4{Ndw@;!cjC}6Gr(j$6msT}MCz4FMV}6E8iia553-Rhx#L7kuJpa- zyQ50~U)5-MXS}avrXXsp+RR}#x_lslC0xQ$PK(u&0Sh{94lO+Go1cyW%U!-b}9i2BO9BcZAh!vyt5%YMFW zU-1R0xIW&jwNUM9ZQwGv{wv13rMOvAjRfGZVN|6caShP}!b|rm3?pPlJ(fDF$-=g6 z*UnNmx8Nz8)h}?SMoeg7L~S21P( zP{x0(Ff!4_3^~Dzq%8c)yW~TIF1*{RB337c*ry<)?-=fn*A20_;yL$ z3$*BfrfFDKl%tI+e$W#uqrbyU^gj^)JmsyejW@EF?c%%qy( zP&y}4LWb(XE|Z!=$4ktv9F6)GD%-d!`axJ6kPQ-F+0f0oT zst72&Nf=lLE4sAsTGD04BvZ@M=AJ^_`}yXWSa_oyv`Gzo|eVy?_FHf zVRw0RMie05ScCq?>76(-+iMUX{36!B_(BYv#_7H%f(a!{Sw+J~&xsg0O4%@s+C)Qn zYytX@hKFz-DBBcGr&h=PSrC+Lz$#8OXFWPs7S8HIUh^AO7Gas1E$ApW5VBw2_s|~j z@{Zx&{=&nOKiq8@2AQ)jDc})ukK}>|RvX|fV1E_1m|XRkL3noKc7yj4&!Fn%eUxF} ztV1E7pHr9_`tV*Jyt(EmBv}D&8{dzws>n%G!T(TjUAB7K9(VGF>$NE=-g{8A^nBb^?+a*=9wfBT zm-t%$EUutNh&pB`8?lMo#?~zEIVSXyjKO}&g`(jMJEQQYS0a-nh?bE;FtdVhw3`VP z!4W2|_GRMNYvS^pa1oSSYLUxn3s=+lNSNW~IzDJ4D-z7(D#c%{no*)~qWlta=fW@dq3`VG5eMpZ`F;c7}lhNJ28{ z=Z%qeEjHt5XdV{xA8@WuQSfTi9f@j4+NoVQCmL3Hg9VNjm-vaV6&?7@JXV$YV33?A zc}s#yQotioUn%YvtBwE#5FNh_2wCpjbB8&ynN{ky;I20=qA4_^!=P`+PG~mFz*is9 zujbnch={B!_Y-@HV7aRPCqcdiwaokiP2w%2nI79P&U=J*3Qus%gyA+HNSnA*c90d~ z7u8zwCkkV-*=#P}4Z^h!dR_C?BW@7p<@sV!tJMp8i~16aJ}FT0{NRx}AtyHd`E~EQ z^4#sPwG**6lBj|T+LID6n^Ie>F{p}h0nW}+agME&JCnBcq1l*Xf$xsYk>N`DyiK|i zV{DNb&{l{IUf$tHp?~xFE#ph1=ToaXasFoY8i7sGkIXO2Ad{JHThZGSjTdN0IzKd6 z?2($hSnvu~gs^b9VYKDMSr1Pf(hj4`o>?ONV~!*1WdA8*X=cpGr%k-Ysul@>oCLSu z)A`S{0n#?|2PQhJ*`Jp+z5bK`G@ett)up)e|H zOfrHmWVL~Zngi>Q`bQpZZD$WwN=YUvkO(WPS<-=u7Yu)Lfl>vv<7o{i;kUP(_WR?S zJx!fCz7(KuyLAm=CgP+aqReHs4>Gj;@x(Pj-s`8px>5;MWgdb7`f>qVn1>N~`%hp{ z_j_w~-D$>r9)999pKw2_yQ4(-`e{oyX{V2i4C;PkToa_dY}Re>E}L>X3orkFhY1>_ z`L`voT*)Ev;Wc&t0mxA4oD#@M6Gi%)B;nO$&k}rN)IALb@Ud3K3LPWJ@Hkpm9gNgX z9-`2K*@I017g3?6=1!4NS6 z3H-=JgnnTQA0be6y0+EbQIGSQLBH_w#E!|)2Pbqa({l2LHskQ#h)LfzU@HSF0>HT9l| zQZp-P`mjntavh!+M?z%A_kl*8@6yg;Gl!+7BFy5AD|u<_ikNcUTUzJT1MsOxQUG5e zRLTL%Sm&3^P7=iDdSG$`gdtgWM(+i?y#LV+97OzcerJ_ni0C!7e!Il7((+}llYXE= zISkD8Lj5iCn^UCoq*bz8xf4t7eB1+ z5UlE%2_0-jrG3mtpbUWVcoF?w=!Ti*-YR?S;{O2AS4&CuK;5|4#o3ToQ&tD6hS?Mt zD+Ws~sdMzC9yQkX!um3d$M~N7NXRsqW3%M&NEzuv!PO~QB=NZD^z&zL8^$Zo9LgX&-pb`4JtYL$x}LqQiXQhQOJmM`*yA4O$0x9q^Sz z6?k9)`xh~8qf4j}KNg;K1pkIb9WZj`5)clF6o6_ReXs>!tAP%W3lkR=PkuV>64!th&y0 zmZei2TVNQLBiKBEUDbX;9UmU*9T&&p_BQNs4UntvNLI9d6FoHH#9LR6{s!Z>?JHOJ zXoG!yr8P2SI(ee3rIN2j)yWT;TlYTRx0-ns)~z%Pq#$)Ii`T4vF$jZ7h*S{lg?=LG zf$me*Y|yBl+F1?z2y@Rie%;x#O%9|_Q$lk@HfUS?9ff7M(9sOM8e4N@ANQ~xxq~+| z?+*d2Ioe>a1)OR3PUo!zVUM%s}?x4tS=~g)(a@S8Y zEM%J2Y-HyE)ZQV6`me+-%D6%eo2|IAyE%MlR2(otyU5@PaaL7tUrS7#fR7=nKzfhC zBm{b(xd)}-kHy2GKyLJAF}EB+EeCT|#5aIq-7iL9R3wQ!XH2wRHQ*Ez711@}<~jQ46~X?->{O0<)kX(cPfdCO8t?6%>G84vKM0AP!14@`(ij}ugWtLl}a+Q()- zAe{`q7sx&Hj3ukEeK)g^a9NL(Vr}H#^e(0EXhAI3<6?36QJmvAzDp~^#5>zuZ5+gs zkd~#%5Y8?jV2v1fcBAm>R&%?Q{ROOU8*c=6Fs*GLO@w2}gDzDF;T;_OO%KC5MbMwh9 z^SC#zlg{dpet+N(IKk>v3gPN;3*zCWMmHpr zi<>JWsZN{K^u?`vUrrp;FWUu9jYl9D{cxg|`0ny*w0xXfg zsyR9;;ed(SA;YnY7{meIsF6}bqjw!-T-hE#@)ke741xi1%WfxF0o(>JUOH-@5xD>12hF8bK%OAXJ@2`ftR;+A}l9js=Wo z1U{v%e@Ac>8Xe4iW`0fkIaPl3IGhgh0$fCgi^}*fk~_O&d28`waYZxP=K3Tm9AyP> zdxv^@ra+oA;r^nPc>LT%mC%y|Z7@n@nc$I_dkk7_T{)!_ZDCZ za*+hEzzmC!b^%ySPU$03BF#1XY8NEskVN{jyU$y*rEAXY4Koy#_To=yP3uLE#2IT_V3(i>1jr=$#SGK0=+5-1 zVcD-~_rlq(yYioNV|pk}{QBImi|lTJ7XL)5|M@idlR{5dYC9v%#cg0LL&euN9D2e0 z8N+fK1j8IUwt=72?m6Ej5|S55-h`IujaRi&hW>b5_8$5w)jU?meXQeJY9`gaJM4^I z-yOi%7@_%QX0W^i^DfSV$K+}JN=8*dkE%R4VY$p#2J2a@wPGUxpp4m(3te?M*Jx#9 z98N$_A=K5RxJoR*Ed=2$Fz8^6_XNBfRnkDw&v=wR?E1S!Y?V>D~e~ z71gh?e9aiqceg^w8*7xEnlfbTS`mGqL_ncVHdi=;xmYu5f{gvp$}rW8#6WhpAwXQX z?Uvia>8u-8>ui!71s=ygqZ|$ABh;MeD|%y5LwIcpYPxm-*a_YW;DOIN6wlf4=}Eg2 zc}#g82rm_39A%+GY*D@+T1EgK@<}o?EPU0KgPIGi)C2TwteM%p`7O-T zOD8soHgkq)x*jC^yDn_|El%DsS?h&ftuvUT-eG6RS^M*e&+q?4v*cZ|Pm`^{Q8d_S zDMgyLEy|w3E>6c;HG^olqNEXZq`a>7JCjzeHJ_mNB{)1J|5xBXePsf+)A!PQ9AEeF zQ=(|uDNUD8HIbjap(@7d*^o53iTQe&hcziZB&bTkT@h6VKBvhmVMz|rUBe&JsNZ{N zJV7QV!-*8wv=2uWq#fmRzZVg}o``x`L4G0M$ubGbU!<1&<}~S*rFddJDCrjKUEbuD z8CBj%$6^Ss;ws0A*lb!@;}vc=aUpYIRnhUQd6;ha*wsN$-If!gR)mb6D`Y4)|tpVaAT?RfBMjKS!bbPtpL zss^A9+`w9wmoNA|auvAZO0|aGMvu#&CavUf&K)awxQ}nxlUX7(!o8qP%VMoX_IeBg? zo8t;NhuGcu5<6I-irJkcXUWn70XV@mo4o2cEm6h5@7i(WH&&rk5KQFe#i$wYfA>w| zUUlhP=amW5h-vD{q~gk;bYurb!?~6px%(`VXx>76*3^sS2u$c9%;9_p?xxiU^Kz$P)UltT*E*yM}OOvPR$iiRZi&FC}vWF z`3{kCyg{5MWrPsTWz)16z=)AUcXX0@w5*$0VGBwgj(W^9*#3crZYk*$X3+sSedHOx zSo!efiybZYP9QLq7@jPXlqs?d<3&Q zC}iGKdnmIW5%k~RoAf%GoDhmkKG})6s19W|+jB#wh543yhFvkE-We` zb^z!lpBebPQZJ%LN>|Qhp(^G=y}SYwdY8-)erfF!1$qAVedA_eCeV4Octtqv&)jNb zggm0wbtx(EU3%rcf>o9)DY#$!`FQ7Wq^KV;Wr}cT`z&S>Tjom=MWUe^smoeZ%FG|` zfs-J6-Mv?|PGpAujpfXI991VVhDJa>O=JtbrwM}T!O(V&qpNOw<;lKGS9beQ5_gu} zy*0vrRR^GC1~lRdIK~6O8!KnE2ZT1a4@|(jS`V&_hIcH5Rq{e|d&g72l!SLP9Pw z)d8NKXtBJ-Y46>_<+R9>wh`W_u#kSS<)UAVfff63L(LQGwR)m=X~tm=RRxDETJMY( zZy{UO1}WX zS?!0M>{lpZEzGYBii;@d;|%q`^r!sG_=Rlf&ijLUrm$cGxm%Lneik{KaUvf)`qc~l zq|1?%9df^|me!uox5|^_2Yv=+yRL2h2W3zCb}RZe0VwmxAekQg0pI1>!3c@pQ(?QV zB?yY&(&Z1`d@`%rCA*9NqjI2yH6z>r?f6AJuFq6O?HVU^)0$^zg_d3XxC^aw0*HDO9n(|ojjdshp@HZQw~tCJg{|* zF`N=V_#>|YIYb4?H`2XIpeMP8fdDGm=d%RrK?9uoeJm}BoJI{+gfN*DcQxX{N~7cx zb!liB5NkGBgL9^sLYm+xdc||_JhcLbT(T!v;d<$LVTPlJbTn|BiA{MhtPjX_)v^ed z?IM3BIV&5(&TDjFDlM2NZ}g5*BHz%>35h6_sT%L)(XA1~BzW^2G>pOUg}z4TB(uAa zc`EcuawebMFMdJO4CSy8i+u=D3GV-KNWhdBcXFCFJ}VN#DX0NY&n<5MdiuQ{$=w`T zja`=sBKd-c3enRr;Ey|d)z9AtJgk6ax8U{%F(a|o&Nw@^f==U!Rz)ypc7)BEO@z7^ zXbgnxqyuX}GK&Y(s(Swgwau3(Z?y(j(RyOyA$4Rxsp@KUN++d6UD!O;&0b?+rQYr@ zhYX<9uMNM1&%0D2gy#t9s;vD1)0eshjsM>>87Q4XY(5-!nP;29PZNhjk8<5naTJH; zsE%q2Z4y!(fAQoJuh5H;s_>$>xZS924ly4~lTxH{c%&sU7?_drG+0#ckVo%o(vM4M zK(6(5hfifQ2&81QrYA-KwsRecXZCcoUzfj~jh%XEe%CXp<1pUo}J-EG2fGzo!s%6DUn>};U=LeQ70@LG?VCLx!#;vy~ zxstXq9@Q(kA6ial3J>sGLoM0MlF-5}Tj_uLFn#b5$@;9|P1+w6y%Q#Kn~ux7U>N6$ zWORRw!|eCbqOr|+t(L+jlFv<(db`e$BP?blT`5P$dIFvbJMR{CX2)-`nc2e|c&=J! z%jfooVRcn>-4%zsb3kP3E^c!9Kkm!-POyCmz^=d#e}tHY73+Zj02bmwnj&mO|C0#0 z`2GbzGe6PtD|qadeZZ)2LbI;ZzS3TQ2=L@`!s33hrL|aX6t>n_WP*i=hGVLTib*#T zZWYc`T^g@{lP)z!Kq6U%@W_n1q1PelN-D^u|DXUG5ZP3iInPSc!)3ri;eum2T$%K* zRc+Z=7izb+Zakw!v3pcfoi^@Px#{_uvriPI{dk+FAeNBOhy=WI4j`4(DWnSsPGKv+ zXa>`!e&3g%dcsT9Y(Ncd@_~=HB?_Ah7+hpI11JN-w~gd^4Lnqxy#LAI$b}$~={`b@ zFDRL;f_dJZR*WoPQNh(Xo_hbiE@^ze(D~ZM&CC3fCGa&DXV|bsRStFolr4Q+;biT6 zcI%7_xY0H;Y&mJQh3rGA4%idT@p_qT5}DU3VzKUqO|46SY@+|XS4QO%=v(7i6evcr{(%*U zPj2nnY4NQ|tQCR)haHe`0u(~X&~F09E;)or=?3-k@`^6KYvDpz4QM~l^OmW@o$7)& zAzu3qYzJfQH)c_$pJgNL%)6_$Fyj4s$%)G>!jH&p_!J;{eH*C6?J@6!XGL97r-q&F zlZ)4VB879=7z=(99aydmw2_G0s0q5oV5y?(cdYadEXB-(fBdf!H zS2**^f)}QaUZmL+yN-fVqyC82`I6)_BhW{HsjvP(Mu?^UhcmlyiG>vfUEm-4s6B* zN0x1|hoR~PVmCql$dp-KCW)R%5v{q;D{Dv9>p&){Myq3uU*2+i(F+fTG{xY};@MUX zq2K|n19p6!DVTQ3G;B2Uy{+)FCzZNGMH0%9(jxR@^)=c+P316fx0ez$`(YCoxobfB zLP^|ND%Z1+I4LGG=fB|!{(Bg=3LP00iFob~)ZWctDc>$7(DN!|4j}n56G?YuuU9 zHis?(Y`Q(e23=Q-W#}c|2O7^}<@%?raTer1Z6Z__16^Fd>Ux#|2>>a!BRoDBr9Z66 z+&rEq%?1+m=M>+Ie}_mLl$UqN7D0~1+Cl=JlnmB07_5zC%ao#O?Ms%%RsXrakhhbU z1)lI%Wbv7Bq|^V|FhM@FqHQcM{)ceUnFve}l38s7O80)0jo7iq#yy-&!ll6tg_^SO zP13|f^s4DcuG{KDCPBs3-gDY_mJmYVYzc7o|L3`Ou(snKK{#lJ6F&VPo%~#$r#{5aR=%QsMOmM zSY#5zxmT6mFJVHdU>f^cnhb_Rs*jzK@v<0fq_oHX_Ja#PMx|roS42ehQP-^@sJ&>j z`F~)!XmWfCGxhIiX)1DL-^%qq($`bOZ*8({#D0`QZ_Fx{JX_~^ZRL{`gfvB81VmZF zL)ibdtVdeQG4>k zR0jB#M&{4ztDb)HjV_!Kynuop0h&zJwEE@*A0B9zzF@qj6@hj2KsiD#-0yVteSHVu zMhAC&$i$W$Qi3~UAI1Qi5Ck}Z_!z>~8rdocz00_lG@e3Z>%)*e$={TKG0+EV+!@*= z`%O(yU#Mrm_c40(OtQh+Fg3GS%U?OPB+Iz9!bn7s{ z*j1_^t;nhk>;}CuhEj98o_CUeiCaY_$9s2?jeIOWj~;r!LM18AXU`*l0+SdSvN#eb zV$>76^IIzi2b(;HW^jI4*cqoMFjhRBhe*v5Lv*eeF(MxGuB@4o9RoT6%r3*0;0yrM z2qGC(U~n&rZA(E${M)Lhgj)K%dH^{jI$x-ssGKkv4c#11N@w;_02yCW5-H~JLC({*L zcBh?CHB0rV4O#hn8@)jz-C?VCkkzFZ+LiKOMx%!bc3wU>ziO zexXS?AW$EsrE`vjvAaaaXZ^UA?^2Qqg7JXEN8ZokMn1nN#mwQ|45t_C9_?iHPfFv) zhN1PbhX6`pxxh*QshU{qIcQ2=s*+J;aB$qy4c;e`)0nm|lKXyXk>zU8$hWmAAMY z#`ayi_Sjnjw{XnpW3+wPly8!-R>!Dxbl4zGG*a@=#tO{JZOT~vQU&PFnr8Q8*Z}D7 zTB;A=P?Plwzk<*f`nVr<4Eng>zgm#TIfc`>I~;pkvi{^3vRW=}!W!G6Ww6hV2DQSU z{!4#*ep)^yn6ngs9kt`(sL+aozssl^Zzx*&FPPzuW?&T3-Z&VUeO8(BBBJ=^^ow$5 zAyY5UGL$aUxMd%=A5<=JL{Mqa%HeFVpvQ*Y(;n8vq{wvc@;C@Gh+&&N_b_EDNHQHl z0i|w|&29{>7*~E>T^`WEO74z`rG^nWdwA?JV9*82>D|{DjNe?6N9<&1fjoSeYfxBQ zr*SF`I``nX$Hq@$g;Zbls1qJ3Fj3#<`3$~gpA=ZJY!ZZxNY z(`40Z#_$YlSo_J5;90$50BZBY!$w!>5QByk@XeJiv2!P00h!%)rZB&#>>+zd{mR|e z$%!lr=Gf43$Jk#Rp4&dSz2NkcGyWmVJD2EdfL7PDswZ~;a@%j~GcyWBIWl%FShdEX z7P$DRFoSe>B@9v#-PcQz$vpTisQ&=hAjIYqsxFCn_}63hH~tjaOq&Jx4@gTs4L!16 ze165T(>x1NPA`4|isqSm^>_{g*)~+D*v`}7)8OKN^@rV^+uZ$YBt5R9bZM!m*YZvl zVZ1O892;z~c)1ZTurn~dKtEZ6_q*X;R)?)~7B0Vsxk#E&r_CUbmKP4>s=8*Q*OtMi zy)^g_-kiEqQvkLiaZv^RE?&FQ7}S}9~5;bwetv%T;N}5kI;*^l5x{7 zRz)kAAVz-&+bUQw4k~h6X4UL2^kPMT(06m}R2k=^upN^Ud_PsQ1X}-4R+*}Uw$c?~P{R>(Iliot-S?Q5ATIGr#4FQa9vK!5_(yIW zd@fxaLhHP>^I{0)Bqod!)4|vAB54Q`5=PvK0Jxxvo3E~he$LAb?pkMqBjsF3AFLki zL(8y!P~ZTp3>bH|1F5miS4lGFDsVj%5CR%lm4sCOxLi|aaxcu_9YTtHG=98ew%Q0d)PZ8^OVaj}3|9{}!iB48( zZL&|8;DB^cB<3zTv0kT^oBZ`We574hhQMrDoG)MeGhs7E)xbA-kWBa6ewugh9>g-H zNrH2Uj)UhO#q1B$8#*Dw^@c`tPMOQ`rS@{Q`skI#jbJ+ zBXOy(v0S+%Jq2)k3A`G8K1G-7KtAp-k7!4>yxqf9R)&nBkf;`diKdssf$326XD~?baQJ}+`*=w|Xh0cf|Y@34fP)M4Wrl_c!Kw*=E%MA+lu{#SFc>y+{6&ex&A+vgbUSAz+vcS|Qz_=l2j@YQ?WAx?s=RQI%i-OVh2mKl6O2f@(^AD(B;flVZPp%qLCLbi9BlmDXl@fK{Dl0v*Z z|DHQAU%bY9x3f+GEn>|{(Jw&@Oz4Q%ZzEWcY^!cZd3=)l-t;O!bHM75)7^u5{+O$( z6zAqdcgy+2Umk+g##t;P0ofX0iWG=h)j|Hr=7Dm)iXjMcy;wa>br^g{eVXLZ^i6gf zt_ytZKBL%HH{vMYY^GI_x*~DW-d@IJj#Bn~#Lt|(lveD=5^)SL&a~go1?6V%^2 zL&%C_joZy=!%A&I=-Ls(@9rKMYSy)wwgZExi4BcfIsM@yv07`4z+BnKL{hW_0Fcua zLjQ$Do^6YyZCG+FwDn4#z`qsG_QPTzU_vohkKJitZx0rZqB&pqaA?&Hdt$O6pDK-5 zDm^IikFybp#Q`<>^b3C8tSol86PSqEnkKFlBshQpdmP6Wz}PrWF(IMVGv1b3*}%%Z zY0^k(aLVD2H4Xs*A@By@_~cTOT0*KTC}{*a$Jgud;cy+H-R9f|>&kdn2d+Nt7Q)7rPan~=*>3<5u*=q$8cvj7lSkzwLxPc zjuikTWqkQOWWLzdPr!=WfgPMdmraPiO$rP2S`FL+jmA_#U}{|R#n$utNQZQtt&`xa z)={W9um_OEpk4&f$4#WCOS>nBjRk6g z@U@l89AVXKZ2um3*vA0hl4aR|Hy!^Fn~UL>NoOpM{UTE7IAn z(U-`3i;nB;-8X6r#+q>A|kvaXC_th z`6?t~2Jlt1t~H`hm2!{hirPJ)KTItD%5mG7JuK3Ri!n)j(>Tfo-n`$7VJc^1DlQX+ z1I^sK5jt}Ok(yynI&oJ$${;cuNe*Mhs!42wR0?VBCC>YqT1IQc+NHAn_8vJXVJ_V| zwRJ_0@HCJC=2o1|{dzF#0dx)O5+t(y(=~GVkIHRTq0Oy)5(^N$-GizrdU_^9xx3d> zHA9E0WpsdJ2LCOxdsz%O5T5{H!!{&=Cl*}q^Zd^CMYFW{CWHI*2_9;?MaFn!L8D_+ z_;ZHEG^>;&{{7#xF<8*i(8zJ{O2$RO*@Wjn&yh{X%J{jnuK%X##zucIvWk&dPlqFa zts@X}%08U2EA2Hl=61Pd#hI1q8 zFkpF&*EnuWjr9g}PJ6PwMe*1-3-x+Z0Y>c*U#f4ahHE}~pwU@)*}J zk6v=(V%feQuo)qDCP55<)~uPu&U%zwAAJ=n8|D}AJURaa<^bJJN&U1Wc~QT9{fXFm zTswR6S(wH_K~Vg3{roOT!M^uAIo-X~!@-8S8ZKf6`v9nsvWcf9;QylU^AyFRSBq zp!>qll(%VmVZjAx{2`B4-e^qsaM0iT(IOUtQ4E!g?q@hnQ2xxHf9zUzKU#zC`OZDt=Lxu$L+^*|O)u0J>jEOz3#Jg&}w zDTq0zj2&D?YJ*ypq)sTqpOAZ_8-0C%C{i3Le=K$1O*}MMk4GU{ti^@4>;=m=;zL?? z$-7@pJSfep`<|LJ?BxyOrbV2D{)NZ0l`|VzW{f}Mihw(vn=h+S}rHw8DPf!ReCnX zZC;*Y1>886m9GOuYmC&tkR~0`0A?RUS;W}sErk*L0Sz68t_uQ6%hQ%ll2Z(A&q*Q&?1xB2v(=ci!s#jlZXj!~7z_fpL}Oz`PCBG0Z9Qvy-q z=?pDr%3~iZLBNL&(+lGaJo1v=(&T@1%=(~1=c_)3Nw21aM>6mI#{)54IoaK>VXLL> z(~V$V@V}EC^wPtoD*C-@iMUR#!L=zFB-PLImIfGYa}e`S0Ktr}KVm!zXeG>AAqM9u zy0CQ}(ZJ&$C-kRzzg^#fZ7Ye*<#LS>w>qaKrdEv84tJ_X3_|246E$U7Y3`R^bCR>& zULZ7YQ3+h|hHz22aZY(>zt3~QZ3$@pD$x3{JTO9SsOi%iAdDEq?MxT?hZ@nGJg~u1 z?Zb6XVKb~Fnn83?$?^{1)8Q>uPg;y^^CuV@?55wyARvA$$i}Jyw-CSsZB0ICHm2F{ z`7;`KFumNgcS; zW(!Y6?eHoJDP4jYo(VGND)FrC>}#F%iLh;cgCcc8$cgv%lU%0hP04{-Tc@**v=T@e zbr!I87;>jJF`R#ReHsNi!ez<|K`(>%veZ5}U_v%)YS~=)QbzK6->nSzNE?~%0uk?| zO^Qj!m1=`t3WIXJ{h0OI04=!D`shrW_@#oDGq22}FXn1Js>KD5HDg-McnRb;mJ7Y_ zSHmfa&ipJZ)&zE$twl8Hs_+ve_n%vB-YzFu$6blx6~)iHvsXizep}}l$rB={UEF>( zg-=J>-;XnzkmO!klQ!WZ9ovn=WUpG*ED#zG^m{y7YMxA1(wtc*J49*FWX3To7M>Ev zF9DLMZ@PEZ4Y}f^7P!}$LxbFVfov%1{&>%)H44r-1dAVDYBOWF` z@R>jq zMq^=Mel&JIX}ETo0l-}^;?1lxW$d(_@O@7f!cY_(!E`HHOL(QTyu8J5dQb5en@U-w zr)aUsTa#H@!%w!nk=SQ>>=q)H<)S~Q{63fp6U0|m8?^BO%~OZJLqGyfqe&H~QS{8J zLfb!Ycu!Ft9efGt3e}FI!D$w3+)rD7L<5Q%oMh@;gNt3l%w53h-TV)_!xbLA?Xrtv~R*pnL{8O zi6{lr{v}MTw2fRJ?+x>Z=pza>U2O4+6?11iMM0#5qGF!lZg~*aApM=c_APE3=~b%M zyx#r~G{lN-%~GFx_&=KGtwJ#B%IFn>l+UiotM~>qP_&W%g;qK0qa0S()h4ZTZKL4$ zpx<-2gv9X2NC90{rG=K!B$gv_C%8Ri1*BD!^k_3>*MMeeth``15iaG#P@_oG6a8;=;yz`s$V`ykCW)4U$GH^0B3D@J zbzYK}j2a6T!$MW2$>^a_4b8_Zf)nfo)XE?Yx3t20&}<?XS>JWjX2J1 z%rjb;3`6h_jl}K$rY;R_`nk|(h@h!?yTeqRe=6I?n1co+h2Q{t=|!((%!&Q0Ucyf${{JpkiH z)4-%`?yy_(UBKvCtM=){ADnY7AGuEzzPy}&pt?n%b>G~~kNh5VHMMosLDjPpX#k?F*w zQkMuWFHD&uhS%qS1`Bb`uA6LE7ZIT^mN8%daYS#2oiC{zVE{j>m!YcPOK_E^x~OmI zT|=;iB6fiOeUdB4%o_|BV3a84bxiz1fymGEHHsXQIw>!&4W_ z(ljWVT&vfF`jeRfY*3XnXF1GPeU-SAMtCp{yDL?06bn&*I`r(A@`tnasUKJf(I8Jn z2{2elJ3ewjw;3LkC`pR*ykgxp*fzGh!slxx7_X-cN-IRAv|lf7jamnI(n^%|=WuRr z_tco&L96-=jZHB@f~1B*>7*9_9RT^UG#*z92>{Z$HW%c;h;^~y=9cz3WO&1i01224 zG2y@I^rBkA&ea+(*!r7-w*=T9!rsPhuIa>WjxWCTp45~8`DuFs#i;0c;n$JakPAoN zG@3Fw6cj*h9{`v;z8@qUsZDIojTG?M($L#n+V5YVSRyen zS0;60CT>@A*f**!{|=-LaQ<9nrD1fh^pRu)A-nkUlikzubdfVtuO}GjsFg$$g!<$; z;QZ38msb*@)<-FsZml{X;n8<1q*RSJm z1u`sw=;b{ecWqAcXi3&I$yJ`WJrl4o8Hx}>+vq^J72Z&Kadi>{%s+dY-ZLWKn^tYg zmSG`#C&C0(lo3twkmNxI;DBhIK1WpSi))bxkV&w$lZ_zME~f2=*yfS5?yhU(&MZ8hOon z()R;H*vkuTSd}1XL~tzn+a&*Po{^e_T4?#1sZXc~V9bgEoJyi5$n zMpk&XP{@bte#xqw-Q^JU;4&xY42mZx9%A)d(hy{KIOlup@H3#9PV`+y_yue8^t~`Q zWj*Q2jj<{a6ud)f;&h;@F7YNJu)9T&|6dROE0qClDXJ&^X8j>Ugj(v>zM#%14$D+* zluLmZ)3gU*PlEfH+@b!;UI-RZ2?!sj&C^{UC?}9a{VOdEtE0ZZv@RSVPAK5Zo5Ad-2G%AOQbKiWJTqv9Sb-_8(XHiz_0svz8bo$O!5zG}b8me) zUp;+W7Ta=k%!yXIioxp9Bb5N)dp0o1QTijZ2vEf?l|#mD00^%nwYbIk8HJ0*{@ z`t<4RGl52bSnIbK?mY=tl#p8lxXp z54GJJxpQfd{BD7qm9Gcp-FsGWCtYUs7j|;wUfraXNo*OdHi~rf@72DfhaT}>0B1_w zM9|x1_-HDF_$Kk`tkfYQIS=dw_a7QpI2mT{;>ss=Qac=Ztyi&_EqDAam$Q3NnFHqi zMAWaEu!3Gz%k*O@#UrLTk9$U|e=>Z@21=7q%}@tO$aQ1E)96#=V!4L^NsJ^TalKuw ztTug83U6IYQ`s=~+z;z#)f^PwTi$OZ+U>tL{7@Gd|33Pvx%AoJYfd4SL$uSw!bn3$ z=rcNRAaJ3d{h&eT-zI5fC!9L$7{6MeO3TiWSA45mA*Q&wsxWuG4Fq#q#^I%k8)#m} z=iDj(<|yyft{46m!;rX-;<+M*(+YzS7f8P^*hPG>jGI$4UUZc92MqU=k8GUJi8B;TVO~3iNpo3Z98R zfVt0EyTdLjSe?Fm2!t~|74K;G#D345a?Rm}yjjH#l8YJP?sMF=D+&Z*pH`2D!IIJZ zoLITFN|pd;(XU8&+VFQltOjC?@HZ%IFX1bj?=rO_7pdSE$E5)OQd-Kxebs4T_QPD-ut{AL@Iq;0| zcB!HYORQfHEpv4S@CPjRP7Xd$*{^GV)p^sE6s$bH z?x$j)w0iefY@x>sG>FbmZw$$J6XnA^^}Tfs8h-K#9&MW2xm<7bEr~3RNGIZ9MwHZO z1W$qzF2_4!gNqI_#!&)Lm0FB0SrN%JTkC$1u&ZaYb1(=Ha?x)HDoqms6lok zaXw_B+GZF}#s0K$as0?9a0v=Fsu6RP$NHWsityJvKq;asTeRZ#_z_U|5i)DRms;U+ z3$_8eN~e%I%G4BAYJ~$}7ed1-2)i*2_ysZQl3}Nxo~0o>$0F@P(h0l?cKVBCec>)z zX*YOx^O3^90VoOk*6H7|Btr?r0!|Qk6gMv1VKZLHl!NTu{7S|hSgC@`N6n6+&1b`n z{1a%mqpk8;)IH|d+39$Rr@Lblnk{Zi>%6eA<8p_8_Y6#GICr5FYS)s zl|dBKW#Fq-lQB%wvrV>q9?3-8#MuaU&mX$=XYFYJwz?hIGF=owdjEqs+O2?8id@Lv zte86nTYP-e(w}s&8Z{D1h^IsECm*`hHSQ`~Nok46ynMevg_^94n6kLpMNQDxpwQgC zFwQBnEtudD21ftO73TvGV&P?g$Wx;lK(|8GqLIq_3hYSsU2f9pWU|YP0XW^#Qah&o zJtVC{!WmjZe(I|x%B9!MVOwpB8H<%YsRqp!QKo;l@F)f~pb z2O3X6UW?gUmADFjAAe%>QO6tMxb8nH0M$;-cR)*1FqHVr0TdD=?-aMhMJqvf4|k%O za1OgHLb2!RYNfmWY&iU|vQBUqbK)O+gU+q;POCdQvq?@%87bEXbkc2eo-1)ogt;NC zZ^G=QXnd{1P2j@`LQ)a7;7861FlPx-+pQMZbp~bLnz~G?vcXrOv!}0zse0}#))!r3 zE5*Lo`!^Hi|9E--wG;0B`+a4c4fi&1&$|W_0hf;=%7>pw&kU5-vn?UJo|=E1L zV(nPPYBiF;e(xI-DnNvHwf%LBq><8MlbW&RLeJ%P=Gg~?k0RBNfe7lqBh4TY_{BW^ zZmSE4fJX*8euO{@$eX<97C-TA{+9k*a0-!K|Epf}Wv_bwUtaZqt{IK-s;QyTyL;JX z?Eb##f_aElDVW0hj8>$B*{B!?Nqhr~{rOMLBgJ68v{Cu#*eSDxvW=2z&v`JKTdM?< zNA_Wux*nSJg9q7BQG>{{@!Er?gLatlkbepp)no*}^&6TbhkE49gl2bn=F?pTF%-6S z8=**i0I8&?wL)#2)zBDKN&>KzJS=fPM?UwE;_GKi-_yz~y_jm4Kq1DctoF6}UuqT`OfwnasqLtb|xl7iATkl! ztN7wm9bml=srSO^40K+6_xm-5f1QD zOUkl$aqK=cy=BJm4p(u2xUoT2n__LtSoL&51-~I+;*37;pb4w`&f`OIS0#^gVVciba__O_>U>3=^i0;u7gAxIE!YKAfI?o7bcCxQV4$gl(zqlHT z_re|!@!x>+xBFf0+4L3&+W|-jxz75?^F}2)U%@1^ozTwU4 zj*|=@iZ)i+t4-NcxIW2aC~7o`UOW{ETg0-LbMU9r#5~+|hWi%0ri6%&DC)WK)Q}MF ztfSz#N?2W|s9P)>38CehB~T6&mSX&DUVfAkVC5TVK5<@NoajtivnzJsB6x6EPdJMz zp5v~Gs|4dIbW@V5u2C@-iK`va@N+pc>gB8ft@Z(gjYdp|>cKuE}G5ZU1ICeSShsyu(H;4@_wZ!lv^)8(2bm$AWB~Q(5 zWn~kBLs#{Hf^{R@);R#|pcQrGO0%$N&HW_yL|{YFEli#?<4S1vB8}78@h% zTi|S&x}0KnYLp{{uKPg8*t{Fv)_PzsJCV|M&w`jdT@TL}Xui4Tf=cxp>Xazo{pV6! z;toUl*S!=gjl#Eh*Tnv@*^;mx)uAP^qhC($pqAo3;gjmJ*It1@MwMV00FypLU*Mt@ zP(@Q{wL@(IOKU&*qGsnYTEJS3%(@4 z( zZu^WNv_SSbwawF{BBSP=&~dhJAM+RIirMVWUz!+Z_|Dse=g*ra^3nU`PV&Y|8Hl$& zJ`yMD+8#aDJ z0-un?A&$G$otBKyj?1{r=xqo?_QDmLoxn$X&V|^a6>?`v9y#zkQ_jm|p>FpHJnnML zI$5{I^dtDTWZoR=+$kCX8PsM1KE3_)<)OS}`{)$ba`w|Q8L^50(MOgUPfM<5TvLW> zi3k=t06Z1WQ1v8|)48j3f+Pr{VH1*CE6lt*YF}R$7S4{Y@{1|sc@fbkwvqBC%FiS z+bJ*}qQA{bl)b<@ZPNn#u=5Xj^aoK+nYwj-7u}U=K|b3O#T{dliGDJ?mYs1A|8pkY za5jzXr&o+>Q7LDoIB0(4UP(PMm4g=uKK23!;xesaH$vKKT!QtBxQ}o!p@w8Y`0t_* zc>VWT~c>!Oag6{cj0m`8^|*R&T;MIiap?5FsT z+UKOyzV(H-ml$bzFbYYv_*?h3Ebbf|O|IVP!MCZf%159q?x*vb46{FE(K2>F76l+! zRH^fP$TIiUL~4LR%CtCiN7;urKFgk(9`NN~!&dn{{)z{Nr3pH)tgG%JiueZK9b%Rr zQ*&A1S>p(GUj4{DBHFB#nWV?kaY=`(;<`ho$S5Zcu*#t68m2V)dV{`wYjtzMdPJnL zh%?jBD;KOn4W#dpO-5^nM<$6!!U@XH)9IA7&3anU&k8t>pu^vcEc>_CmghOdqmDvOQ5gYL51gA=>I2}LW@du2vaxOkp!q0? z7r%NigA5~%Ka|FWZn^0Ekk<*6mU4 zCj^+p$RPveBa|=dN&i9K&dib6X4HNqcSKZ7IjjQSGUJ@-qBYyTYg@ktm4N3J*srTf zsiGB-COSYbYjos>t{7ukuS^V`3nSvoFr}ut8OQPbiYFM>$JEVr(!Ev9x%~!CK|n?s zVb7rV^;M~QQ+7z(3^5WWkZ-J$42kL_2wqw+Kz}f0VM-UUz1tU}(#9_VMqh?`vJ zkycosZR-g5hV^7=sr zxcn>&X@O8{wY{!hL4Y~90~shuyGZSFGthwlm#|3FSS?}%596A*y%#pN)e^Tg(&FJZS+U$tuB^t<@uX(Fs{Hw(mcC|e`WCFQSEqI31S#EM+m0Ld+4FHkt7CEmM zH>5E~E*TKD%(QP-{y;&EiKNjlu9^>72o>s1i9%)A;-45`%N?5Sbg*J~Tc(#r5DSc! z9WUo!s}aDGfvNY}mCdzPBmGar`f3r?bh@I<{i~1L5@p9v4&8EMpF?n$+987^uYrT0ESi z!7P~3&ro&VE)*v(Q`aHI8Pi;#t^pqOeiqVSue4V=Rg7oFRkKYXeE|QLaLC?RojeKg z-7e8@|5~*e?Axji*C<7%Xm4%R(pPgUf~Il9hVQ7$=W-nMJ=L1%R?qwHn>yq_np#Hh5El0b-7rT zJyofWi623bb9o2#)MN7BbA#YDqd4$Q5s}JiejKKnJT!)UKc)|k`1phec~&!J4OQpt zJu3Q7`}Mb-e2LJOSCb1$*WWG{>e>_=hhl*AdO{k(Q5t}$Zj$_L7LO0I@5pecL+gHM zq}jL*h&;-15ad>?-w)qFaebQ$N(Y{JPGB4$k1r-r9CLtx$dSvD>{@#y;jaShFy|wj z_&6uz^|a*2En`w&V9F&oy8*r5;wnU1|9H89%Qwa=^z|3(Rpr2VqG24gX0P)ib~LPn zHw*6CD^@b87^FOtEInh{8z}(2PQ(g+Le?6)z7pw` zJRP)@);BBm|JgfNpK#64?D*yZE+`Nb2^x-L+x&fK`)G_n2%oq*&*5XJ4$IsRtNfDj zrWSrTgyTXu!0V^6?Ias{$D=dAKtOEVXlBagvH;WMU%&4kc}Mmj4COjre$(OaXgb6$ zjr}(#W>+XS(Fq|BL;YC*S&+)9ks$4!k5)=)8B9vPGB-8KaAt=cXZsAD5A`H(IF2GY zrmQ>eXK|HHcPy+f+X!Gv^t8Q^T*C31fi@*+Z`=+b=O0gh(mKB=OqW?z&5*j+YN@9g zIaKa)0#LVY5{d~rbE*V64=>anf>PHYsnw)MLxbDf3^39*t3=V(LPfU=JIl7rSBNm2 zqsyM0%5_<3PXKp)7B$eexK7X~_^CmnLj#GenHC$|xnbp|YgfR)FZzxOPj*_blLIgc z``Z9-W^&+2c0h1esFvDP{KP`!23{wI4fvh@ay$JMhY6#yu|O|C7EG=iJBKXrsBLNv z5Km+Vib)~F3{7{^tyR@r6kvxz?4x3D4~s>>D_6OcKij8c|I-BIrrOvqbMn^rJ4bCB zow{LBv*5$T2nEkb!sCBBw14)*5OzUmqtpZ8VIQ9<*~zdOgiK(KPA>+#?OU-tc!a!i zCds)r>GsU)Io;c9jijdjST1-EV#DaGz%C4_42!9Iu89kvvSlHubm*WJv_sQ(89Gh` zY_P}06PprzKh~o(Eec$runc?RUJ{qBcryUD^J*Zc{1vV%fcD`DyTqV{nuPXky=dN) zdMW;7F|EV7RXOUy^#Yi_AZYsz_DSVvQ(^ztAli6d*Sme?gfb3hPZdez;T^ceSYMM@ zyY3`0m!{J=uMtOWcg#kHXS~)!GXGr)UbiuIr;>+!tpQT}peh&&387>mtnFC=2S14x zqcM|5oJOqPI`5@gp+kF)yQ;Y1)`JV0RSOti5=;t|J#QvCMxyuY2o5MS9}t}R)-91_ zYc*a<^ZF+6*>oB3#SXah%h-)q=$!)a5WTGwep`QIe=GW2=6=oeow|{7adtN0NT!2h;cH3uz^FfE!wb4;wo+Qk6I)ZDVAzUp5n`cF6=Z1BY@v z*;NrFa1dErQjxyoubGlw+4+D26N$j7uV<--7F`tKCCaX*zC#_^J=7oDLDd2>qHT^F zJ6H=oZ^JK@`0oJRVe5VnM$ZiNu5Q&)CDjMs+-c+Ca3D`iyrh2iuXhKc92e?8A@9dt zo>ttV>O|{wMiGX#eht??esHBz$C!xtdtmYUM&De^rIH>$dnn^MPy&iIXtL8Tm6M#bB6eEn6RL?EDdtmAbm7;!X>i_@(qXC|UYFF{USfp5;5hG_4 ztv;VKOhIKyPF-h$j7dAG4KN6@KtD3aI+dQI zv&Vek&?nKv@`-)a7rZ&|eW487Cx}J*tXk5jjGK@L`0xxqBYA9Kon>3k?Mlm^3jF*E z9R{}C?6F19`P<(+FL6@u0;Ty-Mr>jkw`*g2l&7C3-@MEXSF|#~%{?W%j}mk?f_S@Q zyRA}AlifDL8tijpj|Z#R)C)>xsiM1jEuA{%a8Y&nN^u3|b`OlyCyqo;DoxH2z{5S~ z_B!orufV#;++<2s8z~m5iW-}Tr;>bS&Z!zGl6`@Glk#=v`)8*bq6hdxetOt+$kE7XJy^< zBzg~zB(-;hmCwoy!Usk&!H~L%3!?Qn5~qM8spSRlLatJR^^B#5U)=f% z5&c)IY(RUrJ~>a{;Kc>XNjFwD8-A(rP!<0w6$|5JMpxT=-jBAoR~7p=Ln(Fuqr8tM z+Mp57EPewC0Wtfx$BbW`Q2i$zL5kCJ^s1s;T;_N>rf9x?XN&N!pR#sjDRQ{Zj(JPL zmyA610lEbU<0*CZiH2jMeg#KzebR1;E&3;-=!o?!g^!0!h72a&VHfzb`ls3Q^2M3h zaK~`;M6Y0U>lN07d0d7`{2K!1WLR z0>S<{rp6Bfw?B>loXHhRsNE8%CJgH8p;P@T=WVR{;&_r&Gfk+vEJat7La6iyTs=?c zK;oU3)6UiD$Ti3z^6f_l;5i_0G6VlSO&+fq&rB6^X+TbX|RpY%6aC!`Mmyf+3pX zjy5^e(1AX3-k%svAMq?Qoi8y|n*dj>VAy0n>}swXa%>C2a|QR+$)U_k7O-WdC3b2= zaa6EH>C=pep2pFsMr^`ssO!6$mw1`!h4XVKc|phbt_N!%olQ{0dwTBx)Jx{V^rYiS z6Z>NKGcBJmgZee8b2AtT(z(booKl&?D1!?08qr)0W52l!Hzl@pUm?a0j+fl_29cip znFfQLA4C6?7FYfUbkZAtxEl91NG#aH9d_!Io{sdU`kPvx?z&QLJCr5`Q~%FD6Q4yN;uHG8sKiGV2}$<3_U+TkL($nyQcOu( zj|6ah-2aZs7uq6;u0pusx1=n+ZCxz>1n>;%gG1i-lF@Ss8jzEbu^n+Z58t>uXks^= z&yl5K#WD8td+FwPeazLYigokz<3uaK=TvbcPC$UX7duGD%sE`g&iMSjQw!8c>_y2+ zQ=b0%AUd4KDx! zM?B%UEN|6or^Sn|4p6M_sHex*ChF2&TFm0@^5r_|_?ybwp2 z)L|Q$0iuCkRsW^SDIe!O7iCGrd)Z5sQ@yx~R?PrFg|BEg7qeNK{R}VNEh*K7OcPUr zD!)uF;*KdEu^Dg9}*2>0)E9f8$VfvW? zTk2K3WF`dI2IxJ8u*@@ImAO(XulR%-x~$Fu&i2F%sicwj?D?Tp1_gYWr3Q*z3#A>& zvAe>DGpbi0A|CdhJjv<3Gms)vDpdeX;Mxl?57miS$2pFmC zC!GWb>33FNwSyea!Dbq>H`o<{G3M>tZ-x#EY5gn$POdb; zN~YJ1Jd#H39Gc?X9yv8 zkl8Sfo8)Rt@rd}<*Ceqb>G&mnNo(!GAqfj9jo|_#s+iv-`(xi_5pJ4W59`}k9P(|S zhHlAWnDXN9sM*-QOAzAr`HflI2CJe6SV z^p{e7&sN#3;_aPdv9<%6A^v<{(`5!o-X!wmYYr}qBJn2isM0naNrlj5*rpSz%A4#8 zry~}=XG48D$)(dM+lmc2C^Q>tr{lWNS3R|KvIy+%M;CgzFFknQKdt*zo@tv za2>=>a&8VjT2})JFCmJzW{4w3MmouSrNe!-ZUJT0${NV9HJVb=6Z}o_gmi8-D<{8W z#uQ2VD6aUbEq-su5o}J{~~=aqey-kqTTdTF%l!V{3bzSipQgO*HMJnEd^p z=G9TfRJD|VY$*Vw25jI)$ADA%2N4V>0u!^1BywbP&1y@rufaisn1s{Y#N>(zH6C9m zIp)gHWyvBO)9uD-LId1$mZ{Yk4mYk8W*)ZAre9Kv`cmsjdgT4Q)-lM-=BSx)nJRqS zv{VtT^Dlf2WPPusTmzYk`citwNzGLkki%$M#I0fLD|7R5f2gbmtyX>iWie8!sSpy` zzk>;*Jj6H`K5U-|*~Y}eQ|k79!~p*?>CI#p8+F`d6!;h~PF+^&459-T1b7b*E8OEQ#LUgR!(l}F${Vd`)p zI)@8fbchEsJ2ql$X5rWuB@Z|U>~vx-`>as#hlFHlhg=jN>E`?!%NrqGi&~?dhH=!3 z%x=$dPu^r>(EIgs0(3Ezw}zKZaOOf?Zj<{_L8}DV9KZ?}N(Yv{^T(rl1&yMEf`UxN4_&j2cQ!Q7&jP==f1VR$co zE4zqw_z5i9e!5zO&el*%f#?7b)lZNml&t$00nymrwAd(pR6aPZ7Z~)2j2~F3*P%g_bsvZx$FcZ&vsuhCN2zGVlvrPm_i1`Rxq$rd z)h7YeOX>U!U>bO$6bSqXL&;yxT3|Ak1;%F9>X&m%)CwHs;j0dYOtCfi%rZ*Ar-aO; z5?c7Wb`)FFOyi&iN6FzXkk(bLrQl3N77L#0OHMa{jk>Sx%fEFj_LW+Kuqvhz^~9Kr z5G0EoQ0##@ymxcEfu=`YlYci;9Go(ageYD|n~v+T3lgO^5nwxbsLCx0h#lJRG{`*R zz^?G~=YAs_-~C&+e2R5XfLf=|GcYI5jzO4(wOHpodCM`HNtC&)Uyk)lI$JeBvpez+ z%#%jmBF?qU3#-$U;*TBcJ3+$_$Il;?ZD~c-$1Y$Z)oUZu*%)v#EI~J|Ch81HwJsf% zcEcL|$_Qk(8kOnYcvGKGLUCHFNJpKn0Vf%ECh@2yPs>S|V$CR`?F}5)+b0aIQn1o@ zbe?4@!_H)h?WQwQ7^-d&=Cu%$kv{3c*nV`zYhaLLj@zB+(-sApIk^)rCD4xGq2c{d zRyx3ziZx=^KU0!7=lmZq58IdX#@jEG&`JZ?gk=Oz5O4?)3gEcg?b9rN%>XpijL+;y|aVH`CX;EquqVSPGtVtk?Imdx(q}M_t<(vWCo5 z&n53nHYFs~+P;Ob!AhW#Rd9|JYcdBzfp3cPh7e_jOx{$OmPlD%t){kkErg_-n~giE zAH|rgxN2Vk;vSQnI=E){flo!XmJ$Q zSBD`G61;VbON~kgi{ldCPK=P7EWNM4c&k@CL+0u$Y9aT%1{fVa6KJbE?|&%ln{|Cg zD(X9#)dP(L5Rf5SafZEmv8{FowR-r%}n*l)%BJNm8VA`80+{uJ|it zYnO2^12HT_an#UUZYG;*VswPKAQIqA3ln#TJM8ZyDkgL68{`?@m8-GTbOyL%ZCZ(; zQf(ms00OxIo~LR}|EJpi(!QRdF+JlVCxUF}@_E7i@>1pjZ(>fGAcA6T#ZxBv1eTHO zqYfc8uuUxXnsTD?5wP%ggHAp8{nz}S2`>(_;)X`SdhM@tTnrCVax0&tX05VC6f&!3 znsXY!2jSoK3jK6;Op9H-htzzKv?0M5ry%mMzpN|Il@w@3Q6-71uoJze@w?G3WKS&o za?vfZ+(|KeAX8|;oKl#de;80%%_+k&@%bhc?f~7egB{{rArX@%m9GJ&Axs~LN_kTu z<_~je@&dwoS|@g_xcpr#W)>&9dA4(4zx0h$8R$2}!R6e2@Pz;DE+t_dUA^M`@8bg- zy0eZ$v_2zzt-Ffmv|9ytz*-e&+hI5x{Fa8)t)1_QCoMARF?@)Ai1^*6IrcIJZnppw z0{yRos1NYZ4Nal2E9zy?EB?HSW(Wr`l@dnMS?zs5W00?_wA^J@_H9_rUWTd?ReDgH zPBN}eBPCR|8Wibl5^D+RIb&3%XHb0P!SP9R@KLPP{!j=mOkz=B!K4_~-({Vb|C1q+1)sWS5y93bcdWe<)I z@C!{NS-q#H96nbERV^7Nw-L7tV=|WUU^mNJ&N`H)>xTWgn2#)yK`fekNC;ES-gwq$ z)?oGHs%Rj`bav<$-c+d<`Qp@^C{5cSWH_Hp*>z2#k>H448aJyT?1|QV^2gC000mSL7J*b;SVNL1wH@S6D0PEZK3qh z242#lPeQku_7U#BM3LS{f$ztuu=G)j!(W)aR}qEz*LpE;0H4MC*+=0fW{^3)oeWCg zn>+DLOw2EAhZ&o74f!RXx;u%;@t(#N+d}>nngTbTdTR8|XiBA~N>|tnhIRiKj>Nf+ zY7g{G9d^TiM_0CA={$$95&l8Xxly!MxX+92wyyz3G8B&al}SH$IZ7sqGF zzdmkI+tkL{;f~KR$LDQ0HKm0D%ZBo2D-o z5FcBc^N*!RZuua<$#HUE3g}?pqVD$A2Yy!@*8r^;--A>v1rXqSaE-w-VG2Ahr^#0J zYn!dyI-NGO_8E1YpfAfOzIu`+hsFk+8m>sa=W&3CCNYb^F>SWHuKfg8P4~?_&sQsUC?q!Mr@I!}1(=?go=Gre7vfJG$72Dbo!mL8SU=-HKb8q#E$oY2Y z(%S)~I$W*!8Ednx^R6fa>rY+#b1+*00`!M$i6e~IQ0Y+@o`&CKK8@IbvHGs9hQ)E zB`wXiS5bXO~e$K;X&K| z74NaenO%m@vt7qt%w4ns#4>bkLK?}$NY*(exC`r_Nu;(qxfF2w=RS-*h#LZQ`+{WpM|EJ z<|du1WlVao%M4a!{J0N&-UQGz`UZ%%-MG@NTh3zEf?|!h0G@eYosDeB>Z`(;2F$)u z4!r;r7^8@y3^fpXL7Va|4wh=_)nL!k`|?}y9fuMtuE131X-~^q#q6FYAa6ce3>d5-DCQ``(xyHP0kW{GV8h`N(yA~{&;CdmWO!~&G$P} zf2r~#f~Cj3Pp~VvOmE{~S}?d6b5YT{l0hqKX`8c~XZSV6?q5o+4Tc>sJim;lW}a0r z9&_&Z+~En-|Kr3}Qu6MfC-KzW)wQB+kiX}gNTRW$#-F=IJ4gQ*QmOyrLgEwzZ(e`Q??XKNQtG`Lj3>d zQ0q{nvqr;-mwLhJ#AI9sRWSuvqrK_D4lVIwsht?TbjWP$An1ltdkU$+Q}b;;G{^wF z`QFxnrc8>0bjO|+gHtRop;y0zlR~s+q)mqv!Vb-!g(IaOH{MvK%BcTUv5Hi!78l5# zi85IXr~sSAciygV(%zkbM78+pW?>C_)89x7$h7V78v#G~jls(R#8}kqPlzX>27tMc zQ>i(b6EpqY{W?5
      w9kVMRh$!6L>0VDIKm+zxk<`!nxR!)hWx?$?WXE}0ini6dg zwWys@i1I2_)8@Nv$dGiltz&$D%$YzYI^U9b_X%gR6e!%+e`2A0P3pI&J3Jw^Si*1- zi>|;<8YPa;-B5dscLUZw2;uAqQFs7}gj&{2n)HnuqQ(_R*}l>KbZzRk8#oC_i#g|hFsgv%JH{DjNy7Azr9 zoEsgrjT)VbsL$lOABaSF=XyheJ%%#;Nijw!NQUj3{{+zedI5Ga!&Ta~O!C33rTammYpbpdA z`HH&FQ@3m0_LPSGEQH{;kR>mK;W}L5h4RaU zM$`D@Gou=5!x(;N>5|Y*H#T{mtV;31mlO=MNSJQ{SgZq zqE{+qa~h3=%wnnRmrO;=ETbU?l;kN=i7#BGRhKgXOM&pv@q_2%oOA zG3_*{`c#VXo2U;m!-c4%8jTXq0?q#cLW!)fv&Ii=%=wv^1GG}i32Zc6-uI%Vf5>;; zMyDjAc^tp*V$o&}pUFZDHeBSbcC{a4E3^ldd}BDeb;tI3B6uR%3}vow{it%yd$$xH zvfZnIQP|6#A&_Lc1z7UzZaop>#{5TmR zvZxhJAr0$VxH9gL@47!-{S;^>yDL!C*7?h5H={1(*V=-|a7#SkSQ3_D*eT{?ybEppR6AL9F?6i*7xn zC35~`7t}5eb#22ED{sWKlg|uw(pK}~E;0%37)J=7I62L|o|LPZCg;!0V_*@^v2wN! zC^)In9b6AHoKgmZIKHtdRVkR^K6~C+%=M18B%WCJe?^%_i}@Z=8eMR!dO+or{*qVW z-J7%P&{k2a=H22WL^iera5B0RMyP8CtLdi*v{7mFbHkXBogvrDVB5=YHL*W^n};!D z?ykE56a^W_uN$!_CmdM{Gc6g6WBZbxvs$m4pBb(aClnLC_X6oFPteAK)@`B7?TO}TgfN{P!9*ue9~!SEwS%h?0tg3erl@cRGy<}A%{jOa%J{z7m@>R?7rj7)70?Hdk@ZwX-b$oF+b=sRZ*5F64i6x z`t5bDE&)-!Kz{8Za%9NTi|@gsT`v_JE_cqsFd;cK)tmb+F8F?8pmpDyKpPsLqa6u> zx-4^9mcoki0350*^V9Lz8;?kW+^eD}GiCP&aN5o5!*dS-A1WSuY(zO?BOy^hG+9e-*!m zQ=^p0iJ(gw;WVIb7a8wQ;iM3Q28S-=+~*aQG&}*8J{aQoG)FMGQ`WWn!OOpu5@*EL zSuV7Ty4NuumLRzyY5;o8gM3fAj$iVy0VMI2gPv%5(k?V4qYoJt6U7J63ruqrPhL1B zkPv9eu!g)QMGfrNx#N220@^->_qy?roQDQ^;jk(?+2*Pi?_c+$v;&clQoI}*hWDz+ z%ELi_;d0{`ekBns#&i0V+S4%=u~Jw+us*_0A~p*k>Ny52p-_*0m%G}BkU-7Ta;>%&4hsFZ$jI4`N2OsC%O5R&cLRF zEe}^OjJIqc?mu3`IWZ1##9q`&VDDF^oG5imrf?IR8yuw|E_T2FTNU7`|3CQjv;yy0 zSP&wqtwiK4>Y;pRQIvA9K79F0^|lZ@Sof9QDLD@=`^QlN;fDC2j;{Hrs$k19h21+b zuTt%Dy}ezmxwpd%XOeMQ1Z(M+?2F1(Gk4g7v3P6_{g+JhG&dV045cQq3+4`=Iku~s z{iHv4L=n6L0zVE41ZGsE<9FZB{%6b>QHj=Z*jc?2KsZ^^Q2DV8&NobevqVHrh2ziD|CMn!4$d2#~|6iLA1(pr=D03tr{KRC9g)b7* z{&u4ttQBEkZ~PtORt6;j1o@Gi@F%97Alc!oP+Z<Xyz0(`>##VfZt_FtN zXN}#hhvk!VE|~a~OlO=+6NpXt{IWbzIb>1S3SlLLaYFs*xk)X*Ux;&|2WigS^l1pv z;O0#o(nP;aSuwNU+kBe~Y0U6*x^$`BMGL|BL$gL zSfPaecq7lys%q3{aCIp?K==L(GAP{|B2-c@Zu2{VIhSo-v1t9{)vSUt8s@DVN@Pl^ zQ}&n=JR24q8W;dVUS%t3c-cgSRd!EOqWZSS>F&o7NV>a_0TZ9Oh+T`EeqIa|~0007a0iMZfP5<)n<>NQ3{=-E) zuuBQKcjTU$Z+RBQVymAQ{xRQvp7gA;zxjV0vI0W`m}uGXa8X^*{wPsXCvycwUYf`* zWyeCWhrF5c6igFCV$5saoSnd4*P=HvACya0=N#PEY*EDa$}6K+M=q3iI(te{R1;g* z3YSk7vGdVuQO&<^!qu!T7}t6r&N+K5C(ecq^q92126L7*P6MS5Ec(IYgzb!m1o=}}bKlQD< z41)A~sX@|60g>k{YU7){%BIO;%Z{Mj4c#r2h;{K;K!BxNn*5JVATqfNu|{gScF5Z} z2v&}m&i0R3qdB|*>)binIsGyF=IY9&qqUYfcKz{~PCbRU)8mS(P$#>)^w@0bT#KAIjkPjrrpa!*TF(Al`-!z1n){jM)GazQFGjWTd9Nqc@z?CybFQ zQjA14@1TE?5yvJLKzO%UWago%GD9@$S4*9`^X1yctfg~={N|pCgD%}xhXyD_{XoiR zz=R^@W%zVj2a`pUKsFT`G_wmZYa`*(k3UP2#P)3@(l2aah4OVeK4ifF01T=@n#@Vz z4<=IuJfE@74Yu^J(I%UvQ8){ZG0y%_F5*^+h2Vsh=Wb1r$vGOdX)RFeDvb2z<0}at zH*E$ZGOBq&{+%yexFEAYWByD`3PsssE(d`;l*#%Tt2xNLEX5;qw@N1@zGpxQOm`uG zV=7aW9l|1%QZ~z+y-GGHA~P<7#pZ}{O;cV97xp=y&7}goCNq1q>%!_=zA=GWyRu^$ zHu6RZi=ThK)jc?64?GY9EC8R6(Duwa1^OyUT0Cu7W98JO+J%z8Lrj_ie!C zEJvLTMSS7&&$>dC(f5$NUWbE3^ysVebW;PiwIRK^GYLCi^tx8IsLZ??2Tsc%>m0tq z00C>SE;1x%r7wD z2x3usG#`&w@1ObCZ`x+-#^XmFS12TNc}Oa7oY4tqbR`#;$I}5%WI#t?4qL8|fVUGU zYe7nkxy^BsudY^Gtw*%p$qI$EF)W1eDmwD*uRjnd@(cy_E79oP>fGI5g!~ip{gXiT z2-Oo;-`bTuqt>CLH6+*kgHe#*)4m;ob+8(Gr8G>08)sA-k6Co&HiS{L1MI2|wVxoM z8H;?y^8!%;msV9DK=L1&W1Rit7FT)7Xe1Tpq)iwDf_FaS8vl8`T}13zJfp~trD=qB z?sv)x{TtW&EsoLPU`|Dij}cTA08Uo4l}CVv4!D8}AZz^Wyci$gw89Ri}t;CjK=x~p_F^lB+oj$1TBjI%?zuUK+(U!YyKPovq z73ekvJ&O2m6ztDxmZ52|skM(_-I3Ke5;b9P{#Ml(^$a@bTD{}`B$Ea`9 zAjlFpfYcZf(d+xh(&KkCNy1X6*VS%4bhYX-4&in^~#dgUM*48@&intw}wZ)~l#kk?X6AcsMxA7=|l-*!b!If+`t<%5Le1nt1ksx4tRZlNkUDBQH~QGm3eFeblme)KpmOKDf#Mrx9y_aw4&EX^*0EKq&H-bYQLONn8)Whd5G zYRkIT{C;wRKXZ2 z;GP0Ag?MtEk+Tl*Sz!(&fT6wv{SnjFCKg;S0gLGFSDN&fqYQ_je=ePS8`i0E<8~A} zrZOwY3ja91C;{Q%+0WgX0Bq%~blsxgQq-F0to#y`?W@t|j23_k3O3lB_!8?c-bH=@ z`5}*@o<|?~JThN0v%3^Kl1JrV;4L6EU$#XQPLVwTq)lXihaemG%N|K1g#hnz1q^-| zy^tMFN7Q}w30>gVht<4EYNe4lLAbfy=?6fwrFjTyKD}x=d$8Ev@g@JHfBV^+sks|P zjBrIZ&^EA$jS}Dv+Qd2^ARFtSw0`#OkB8bRC4}R+mP2z6Fm83*WFoR|t(vklG+Fsh?LN{4@=rYkth5nre*!TZ(ijH~L2g?QjYF@jAzx8(NrTp_18B z#e~OCACUl*;BGJy?*_k7c0fMxu^ICke-*Y_Q-bEpa?XYwan zfJQRYv1dPsy~>9!j(ANu>#^xkr=MQAP3@|oGQy!)!pK4x07WT~8;ED(nOO1ASDrF) zVGocbd>*_T7s|!pss)}`iEm5&w$X8Lb74rwGRAJ zD73Jt3j{BR;8?~8z%_b0gLX&#JLGd-;itAV^S%Y;vhrf8p4QspB`{l7*ZCDf5gTwwEY6RW! z>1%*?fR!?j*p@O2D`)b0x`TX6k^kS~TRcz~rr1`i2e-h!H7Y(|9s)`gYK}h zcy5u0O~gkFQf}9?w-`2I{TIvyy=Z5g)Od@w3oy45NJ{$!OwuradlDu97r_LEF@EYO zMs?Cum=yE^Q`-e``87I25Ed2t_a84^h8$3p%1qHT!fudtr>}uUjD0K?g(oWuKtrhS zI>-Zb!sGe|Yj5xp3tUT44$J$8t`hrEG=tU}DbawhK3Au)#|xh<(_oaq*2@{B zQrS35*DnWxfZ6v%hn_$HtAsve5YZBAvK-qC$dEJiYzrKHSh=3B(9WkT; zzPCN=@<>CHz&}(ZPJgl=1yH2d!y$lLC7#GSDb}`lo`-^`WsT0_9T8sW1!@LOm&QZ1 z{b1`;wB{o?CcvHE?9~8D&#AiCH7d8c2RqWvIU0a%RTP_pl)Eusczt6x4QmTor9PhB zQjPl30K?u(Q7ofxxkqlXrD1d7-_&&Le*R61^52VRz=i{+mseaI_t3pfA2{}Z;uU;T~1O3D$rXqoh z$bZSRh|8lC2j$N&#*{{IT|nOBotx$yv_bj|8v$zcs@b8<)`yJ?F>x|Og}~soM%h8(`2As2 zIj2yxp7_I!F`Fz1YZ{@+I$d%Dr#<|5JjX0S((Jy+0||em=du99sXK&UFqNo%g&>up zhEa9s`4SU;qFDB>|r6YDxe8uw{POfFyF10aVeR$@8?vigXtYh z^T>ycU&iwV(CcSKluHSn1)cfL_$2bkB}C?0JPE~xm^Pa<%vy_e-wqqp9St{#wePV4 z{u^8?(`d)^d)|RdQ96=t9;|x=Mrwy*;E&+@{z?O|zB%Fr4o;QtlJ=bFDP^$v{-RQ9 z&CZPA?BE;!S6x+Mo(t@OnP5*`Mrn3tNPvf;kMBU&9JLP@GN=CF-xjSbwc;|N=uLL2p0jq31pX()v3r7Sf7&Ijg7$8?P2WZ15v2{Rg07!km>duZ) zUV<~duVf2sezl&DK4=z6RCmgPtuy-{JcxVyjB4@|bVJ=cciBlxs1Kw7bOry zB-Xw3&@4KCEhUCbKlA!GJFL1uB{FhfZwpO2^#7Gmy>p7}ogOI${8fbw&KyryEV}KT zla9Ko5!XHWej#GOAHAZj$mnUJ+bXAdh2E`4Q*t|+AYijOn(P>Q+%RZqv3(y_d#y;) z{XnX}od~FS_M$Vl>Z1Qg&8hUPgxdd+jbKlcDEUa+aSl|KD;INW?ARXsULRIx;0Iul z3kg$q6*zabF8Nb&FiBos-mOM9$pyeU^LD20wWUoOkUEUTHMG1NK&Xonac;%guQqpt zD8fNl}{mE)lM|p?o&-b2tD14U9pW z?n&VfCQ}7GpPk$u!Kwq9_$~#Gp`ymZ@m~jx>M7}0$e?>{vQFJ_=rtorpe{`rr_qto z_CRc~oH)ET;|~4iMY&76@W|Gc?q)YVaMG)=fURg-b<|s>X>e<{r^^$%ISD39bZtjF zpdA!sbau5-5VFTo_x450gjdDutGeULB=LwrQNdu*q>%*HIg6@yk>FT)6zlq`s3Y zo!ERkmQ7wt;Mv|gMx^Aptua_Bj#s8+JE;VxBEIQ46HjCr_Ke&0=Ej%~B2UqYB`FV$0U3JKOnqeBDwqr;b2#09rLIq1o?c*5 zjo(moD(swFq{2c9E=07 zHZn8GJu{*W-ul>h%jsAC2>XI#FhBgVcxO>OuG$gP3FeiJY_dX8VpyKg8V+-0TUriW zJ}lyoJIobZxI-JQLQ-`JZ+gl7jx!aKnkk_yIc$14*QRsxDKe*)u@nwiS4vOs8+Bi) z{8q@=h7h|M04LlI8E1_eGn8dZHiN z1bJU}|8dKr4&`TOi`1bR27L(Ah4qx11}8=H0uIYIjRMhSR8P?vQPc^TH`!7$_ok%f zkHbQ_HeuzDRRQ2DMK1s>sVHP*cgCPyPi!UY)tjs)_+tQaOmo5QCDbtG_X6}XsTSb!b!5_;~LF~4eN}#Q)6HW4x14eerbr<$f?Y+?S zl7DjZ-xG0_T}>7u(US(&5Z1kAqzBbPYEk|Ys^FAwm(UqQA9!D})0=72KYgEe>A<4j zGJoiLKHQfSa7>e!4%3zXJ6yzthGZPyIy9%V5w1$8zT(3$I(45B09?RaG&TmH`5rpV zF4zW$b}V~lDvYmA;zNS8+Hp0;JqdnYGsX6aul9Z4IPoFdU%KBqKJ){}UEn%dNuC>5 zBzHc#>`NBE(ldF_X~OYw8i9a^WNL87Y2vqX7_l8okLnc^r|uO$gtYrIc&8SPtAz(At# zbu1oQqo@A1f`Fc0XzwA$Dz~LiPoJG+SKG@Nsg+ZEgOLl{2wE{(2R+P}0t7UCY;g`N zr-d#AVuRioX#h zoC}`;O+PhUzMjy%`SAi`HKoP4W4KC2_*qTtfZjMV?V4jcrAbu?S_&^rZqjLD2jTvJgH4?EPC zwHOHZqSI5}Em8nKWI023Y+tY0(2M_weUt=XnD&wAt2!DJe0P!RB*ANOX70+bD- zu|CBjtwFGzS|Ji}{^7_RY8mA7`x@y?FPFeAqNAy+w64sP3dk{MB>XaX2ca}|1z|_v z_>uuhsodtvWC(0**t0e#ZNFR_%bzaKDg=BnWW}cDZyUO}lFD6fIH3N8l_vCiboZ@1 zW0_DDc{u^@=bjJrro+UfqF9W0SYI~xpk$bTxL0kkmcX0+VGI7)y z<0q(~3Dy}a6x^ocdcm{H_vN|k4 z^PMZ9+KM>5z!C3BJ8d=|LMeK4pSo51!7@anrCU?f3T8+wqIZR6%WZU7MDfRQTJ!Zd z-zy9-Clfy2R=(*GKabGm12OrP7drM*J(dgXCm8(-H)?f}*T_3_jxwE%g{T5hj9)xM z%FwC!-p`NdL=Xz*&_gyw$t_+U$ecPDxysoEcPkLHYs+t!9BoY!nY%!e+15m_$JW|+ zRD&;0EL$f^1*R5K)>M<8dk#Ljfc#33ng&}yn2X*aI|n3Tk2Q;2o5lnilRu?NajL~Q zh$>t|!%+IwlqoM)^4+*wBFAd4D#fAjVv>ts8R5QC#{@;V94f0I@audLINf3(u*(W(Snu!>9krt2!}u=&4o8nrO;=#-Kw9dVcRbeg~VfS5t}4K4RGHNae|| zQhn0W!iM4{ZTZ9_YGo`3l$|I&`kD#5jC5Ssw-e&(xS$+=9L7e(a6R;!v1X6n7!X%~ z`si7bSbjmV)M0{si16=9j7B@3oaqi)Oq-fz`g5muSS-c6GFZ63H|dI^(mcATUK%twQgaFS1B z=Swpxfu?=Rf<*S20IlBdZT*RCM=S!rjUcZR(^HvXn{M7Yh~38wFwP%5O?$)S_hTn~ z&)4&T zp}sUGWfS+PQJfx^pG;)z-=L;nS+ZUy?{c%@-|!GuCI8R@Z`w?ttkfd28yv$>c~|4y zFTq*Y{#6xHan@csrsNoz7VEOTWd~S%8MqT$ewC4t{=Y~hZV%Z6k4vlhA?5PB$uzJH z-sf6X*gBS{yyI0*1+gCyGY@45ovncIFkZ}wM(GPE38uH}-LV!Q1cbtNkIg}yrCXZ0 zgp4Bu$(TbhLfNw-26uGa;DIdGHM+&D=vwI_byl~U1h68-Bb4CwqKsJjrwJ%hPduhy zPC*otSl);D1?9pVk=u^pqP3aMrYRR~*_ezVP;dTBqfG7=3?NfCoHFLX`SoWoMZ*k`gc?Ivl?rqdD@BiCfHS%q2JO#OnMMr0c&n(9ULGnwcT5}Oi zR<6FvXidu{Ub2Zt?0)(Xpbk3>#LG7YFU6syOb4=tR!vjGZ1!xyDzn0Uh21PIy#spx zD{jSqbTAGBU^2SNcns!?{r<&{`f7&A6SKnt)4Apo5*nlw)z${OPebqm`hX5!u=3sRsM6I1 zTaX6`B|jyq;T8c{k9GN$JqX&HPWd3cKU*COPRJaG(9oe4cykj< zuj|@}UE7|&ie3C{9lU=b2OMa&rvv|IK6xHNCosA$_RclEh*yF;zK&|Tob;A@=Ia)4 zOmz&9@%?oOsx45A?V1^Af45T#-p0OT2s=(m17a<@1|1r-0ob8zXQv5OmHic%@hj10 z-4lQ`-I}S)2ii}9V;7{GFSTSUSD1ZYvtE)>%Zo?i+zLZE7}}px;?3zDO2#R%x}|Gk zDV+$+&iwjBWAZ_)0V+A!*)8gckt0V2bzFSg7J|;mYE1?!q_lT`48Y*#HbU;cF6M0( zetcY2IE0jG)XkKUyY0m!g(A!>#2SSia^5F-%YSN?N3XdOE#SReR9x|wXYr3$3a&KZ zxhm0@SqBFKcR#~dKOM9HO-;3#-kxoR^;q-fWuR`F6Sg3ZvO_b5biUBG1G32_%@?R^ ze_@!71H|~`4O%#L)o_Irh5xZ?Yd-HZIP=g{hAnFGTg~vCwy@*b5O$XgauuYwk+)~K z0LiaMJ^Z}@02Gozn-ELk2t&y=I6g2CG*A@sfA_d5kvJ`be91HJ9p;4RgPb^^E14OX z*%y;OlX^6_B+>-ukCPyyHn`-_q4}ymrMOo#rrt7H2>2;G<8P~*)tq5a;cmH6G&ywj z{jE&GZPhRf(3|N6)<1i}zoS!8yAlr+%wkSI`Xk8XueD<-q#b&B?{w2%9f-+SDz(OU zv<0$wsBSXu@@(;o+`Z{(B8ANbpF*-|s9to+9{{AyuD6I!!~hB+$0w=kF71MlhAmtlQ4F}CqLJF2KL~tQu!1{GKP#2jWWDIgtR0p%$@Luo zgfAm;pg>=aikFh-!%-^tiCk4H4r{_?{$JD8Wv}t+i!Svrt(tKpT+OdX7{^h3~g=82P+7d2Dkr{)`P?wM6YG-0j zRN;G4d}o@|u)<{in%=txYk8{VND$LB$iAn^CjOp^}1_sQgO zh*mRvV^m$^?@X)Yx9AquK5qPLC^*@j(fbF`t8tAq&Y)Gqwau#RX_*qaG(G$(d37`T zVzx^Y#P_KBQXup5+2bDmI0ub<85kf78e@MCr|8x@`M)aDSM zE}Y>9A9`Mt$cZ*g!Y@&8aw@to7f(_I=tC@ec>g&2%DnqcksEsd6DRF%?}3&jE*6P$Rt-*mmmic-ns2Xr!Ay|K*_ zLL0ODuij2rs^G~7F6+15hyi=|PP#%tgU$R157d<1GIz?-v!MGCgxycpz=@$BX${F+ z9Rhju>;JQe(U*eI#0)NmX^l;>pc!KL@8U#Fv{nKD)!M?`Q*9gH4~|9H`aM%vm4wg! zfuVYuTs#M}bcD^O!d1)q85Eh09=JQ`MDa18CPt6XOfP@+k3#THvI7I54y=rboi8r+ zCT|I`1}48-Lzs5xTR9x1=U%o24jl7ax_+W#=w$5Z%C@7yE+Ybku+fOkn7V6GTivAZ zYvq|~%%gR2n*2i!$_p`DEH(mWYgiATc1vv9xWk`=n1vR67QVGX27EX`g5gs?uU{`6 z;tR7-Th4|2yiT`4&K&M5gyUdV#@-bFQjNSmC3T^FK%*76)10%&=r`@q-q;vRNzJjG z?SKNkaD((&wkfO#*WB{?wdlINUq@mKARI{)GZqC*xW_0yu1m1#x&;ezc=x1#^+qp= zit@xF8uz-pN1yI#b>X2-y-250zoRw(G?*92{Yrz)guqH@0%j$J&~C4=!o!4kz9kDL zy^jj1Kj?5NAH+_tOz?9r2=+2qdMmApRY*`=YsI4~D;o}zDhFt`r_j(KCsSfa**5Ln zVg%*;01FNEJ@BNDrp%cH04dekV8fi1haurfmFw3Q!GwGfNV%#yW!lHazM*6rc8r1k ziJ?Aa%j(BEJd4@+{>kkG7&+z)sslM-VV?@vE#w0gt!DcD(aV!`fG&JEic)L;N#zBq zKlu42DY^Q(SHYVt4-MoSP)N~GS4@}lF{_h)tmZ%@7uFvfMvi>J8*aUIW0YulSu2mX zRrV_&Ncm{x>E3Y=C*kYgf5|{6~=iAq?O0b;X;HrB^Q6b9|CGH!71Ha#LIVUH44=T^IXh8S)FkZZuXn;zZ zyZ{&{b!dI7gq4?(K+_Aofr368*i(H77KBbjoB>bsg*?-nBGqJGb&pt zuVTd=7WgW8bo5rQ z0E?K`58SccQpIWwIRlP_tEh}Qlr>gv65o_)ak6SHWv=mK&emo7LvY~J#}j%aRxZFo zI4e{3+e03%W-RLEzE@-stc5(%mb1M2dsjdA!g}S>8syheISZH2Y;1Cvb-9qiztWoU z9%^S8R_1`%#nzfka1D<&5X0l4vuU{S0Q-?bt_`}1#8*sDFhba0{^EolFX$q6NB|T9 z2LOB4zkkcUx;Z%T4YPfp4dO~G4PoxEPFHc~Hl)v$>BX^PZ!*NvIMpxfLOchRBRe

      _`Lq|uNId=nja>F^>Cgm z9H#|f-(-tUZRPjnUj>(!6#H{b_@2q3`^mh%|0+^F?3o9QZgl5zzw7pR|MCva-4q3B z6@98Gi_yAzOP?gQ)pwsu-vq63CjU@)J$Aw(HFVwvdkh&It>S7jYfs{HvBSU*hpkWU z+Ke}^J`D*uFrnl_v5o)hl{W;ixJz;#EZgvk?($Pw)%w_%|1J`C!o>POeoOO10qesg09~3rWESidKkP&xdLl_<9#PDAW>vJIk&w@=8p0 z>`&7ibY*e-BYzqq>MwHqllLyV>5h;>5#ru%n4-b%wBpge0+9RW%dOkjk@3+S{Zt9L<@*XnK2o;;u#g-iuL(U;t!)91e=aK06%}(yL=NafPMYxH(A`(6KQBaZSk*(yktCqc2->*d%g2 z?XplA+pkQ~BbBrMML{6O7r#%)Tm?9Faq6A`gqUhM_z*OQ+GCHE3L86EN;FW?vOp$g z4Rn)a#!o|u+n7})ELB@Fx`u;bvl&BKsGctEwc^C%$gJl{G23>Hmkpa1DcQ)Zyc+kR zgWC zwO(lgG*4yKK93TU{?^5~AWd?(%|an4Em!g^1?5Ffh5u?(D+-mws${K;#PJoA%WXU?oA~R^*GZ@Dh zFUs`0`yPQQIS69)JwAlx*rAsVc9MEeW{f%nTubcpYE$S2>VZ_pua=X#m#-gDrEXQ^ zqBi(=CEMTGKQUhBX+n(@!RnmrKju3jq zU>+ah_8hf1yAd|Or`K>)MgHEwTMdP7xwi0#YRp|PyFGs$uC3@F14H!+=c$nk8tlK@ zAypX6^2AwQ1mk@}tx8kp+)J$_O#Quwa#vgIPB)guu8^n*%b0BRM+Dr5DK*MQ2}d@` zig)q!@+~r5Q6A@Qo-e@JotdY_jt!2@Hbf);3q~+-;gU-Yw7P}JtYPX^Iuypy4f%;PLyU(?iY2`#6RN=} z4@XKIW3Xh3MFE^$XC5fx&K;8}sZT;@A>CWEIbez|jgHBtY42sEcrUi%mJR%&1N4c- zwHUw;<_JnN`e6%KqYa0W5nz{`$dajp08Wtj^(!XiG0~P?WDYUc9)XQYz>8gu$JL}w zvvbdi2Cb|ZFdC0_0fmFye2N6ttZ16-s4?&g-)*SS zzkg}v=J{vF*OE zOk8Vr@jplF_qY$ERMC~1W6-@C35U4!W;rf`*R0g8xE;k4iu5HJ_MH|g@ezSu5tQL? z!Vqz6rQ=#MBP_C9D8IRObU$aMjvGYS;51nx?6pSh-=l&ZTQQSoojPj2eU=q-UXk^L z?>HE}9|MmMO7afQ;*9SAGS*zFzk)*C?-i}l?aS{wLGy}c?c)LfPYC_x zA801##7$$u%fYg#4###rx`(FAq*Y^>5l#-UH%iHG$LXfqJEosoV`A?%kxhP3Ok7S7 zbW>TS#UdzdBUviNy3);$abOyBJN%=<;xRsBN7VEDG*zJK$9bu(>teL7WX;pKqIo|_ zrCUbZc37831$!f6F1*}gQ%K!U>@965Irm{3wz<-iv?L z%Aj$ku5frXi1YV}lf@X)JGd?^kg=CIO5%9_2Jra^fj(Qq{Xt+lf&)sJu10jCGB)hKDmSLpWAg8qg8lH5fc zS0Csi(=o|^A?}3g@e|9Elvuu02A!6hJrs;bqsVoO z;|TzoL;`*JNV@10XKYH2cw6VX>~m9rP?4_Cm!-f6fy)3>Uaz%?IZ`_MyR1h4PCLs) z*mgk^LpHPQP4HEgD7t`Wx;X)L`=brFoWmj0o|@-A!I)RQ|FGDYXjl2%G+IQgww<`6 zIOb|FC{ra>&ba}ax6s|_m@}!0Ko-6z!lXRLYtCVu!9$OLt6dkiv!F`5R0yHETxtaj zyN{#FzqKBbqy>2rYHGN1IFdV8Cr&T_t zd1A_Ny7cxZYD`7S((;3q=I^aGqY=(U;e@0lm2{UNk~W{}rvDd+z0YSl-N!*U7UF!8 z`bbdz$ERv13RVMpj-A94g0z%R>d(y&N@xJjW+TBP;B`Ho(O}1Rb)ns|96W{XI_Zvn zF4zCu_d$P9OVqWO4pyq>4FY+D1qxEo3YO!C4tmQcNCi}Zy48U~X3SI6gJ(>1M!|Zw zg`K+wgmo}ka-<*m{Ip7Sc5aG&w^B>T6CYHNa^JTWJ(Q76MU}Gyk(VAB%-vp1v&N;m zcY9b_O+mgbZtotU%d7onA#53sqq!FS_FW?Ks^5;hhprI@`?_otL%OfY;r5u^{r`ew zpY~0Q90CCMB_nLGNconKBQX=4H@=;YK)9KgkVSqU>Us{z4b#*nB4is<$ivKPo2$s# zAR{1uqpj7}rJYnm&C+1MatyINE4 zLebY(kIV6VD_FULTj+NmD|@jZbu$U^S|}D_7yl@`cf%zly6;~5!{M60^=!r^MXpx? zHeiUB-Cyr>P{c64=jIXx6aHuOjmkI(IYK_U+8F?$gts&L36B55p~WzFBQIy@bei06}tZ{c3_k&Zg4Tz^Dpndc0Z_y3(~I%pEt zz5j7g&|41*94S5m-tqpbjW11)jI7m`y%#KJS=}xC=u+R zSt?$*~XK2yaR7Adq`2w?z*>-l|i(9E}l z4k5ErTlK@jfDnmsvi*vum*EdRf)Q*jTR{Kgxi)`5TOaNgxSvYUHCw`{4}4e{K4RWI z4aCYU=4Tk_@YkZHR1Rh$Fq^M?`{njIt36`Si$VC{Hu zL>YJ&isTTZD+IWgJ_Oh{eyJIyZynTQYQ)Q29>Dck@tP|?K=Rex#_d<9CQciqgT{nj zXO<*B8L~~hCNDQm9Eu#e_RcWn@Yp@+p}QrW1VR?aam&akc~zYJ@LkCXuN@Mbt}RB= zu`r*IuL4wTGL2$6BHh-f3rCsvr){t;P2u==TT$ds0MZ9NZ$uzb04Tf1D=cKUu|9;r z)9~4-8)xPYOJX=BCr^f$D3zBKm+3GJJ0V;fl~=l-5wN>{jtTF(oqVN+ktdL7(%b0K zs>iF~zwJ820D{z%fb!gc`!zN3;*$7r($fny4LWvk8U9oN01jnAn=(n^4<=IuJ%60G z9NXO^hV0ocQYn54qmrMl71NC%HVYW@6X?p(hTdLaDK1;NR$1YM@sPCg97*+y2GOG zjZoN59R%>x=kN1TH;kW4yXoMj<}u7~557*B0#~$82_-{;Tpl;9`~xRS>xg8cZIP?d zR{U>XvG}V!c_4L`Bgm?;rnr6>?Az7}BKl;LET49gPV}A|Jj2kfxsnTmQ|8+ohm2rX z^l%Kai4hBpobiy`qy!(>+dM9*tYcXz=?uuBt1LW$1m={$&gxD_A{^1keLj$cVPY^9 zh~+&DC8R7)zt~)cYG#h-+{gGSrD=}D7_=Epz^Dh>#|jy-eu)uoibQffE?})J?oK?C z^#I6jJuudnQt~Q?9#m%BR;T2^n>rN$8dr01hJ&^_te#jaVHKjRX5s-;A@~{y$hg8la5i>P&Zys>7cWvKylkC5R5^)OxXzB&R|- zlCgkh^2ngUtU*kwnd>O`P=&#gF-4{{Y^?8p%Az-ylokf%(wDP^katDc;B}?;p zf$zlex}dUwdOX`D16m_}5{H8mb8O-}e9MibpQfbqMMD0O=twFI%`D5JX45tVJ8oj7 z{s#GG9F3NvewbpZbIdbMk2oiHcrHZw)9)8rth56unG^|XF#hYRgH=x?QDA6lP&66Y zj=K}HPw{#qKgRVe+>aX3qQUCfd*R(yio^b}ip4QE=DUiTf#2%j#N`-L{KPNuYr&|a zQ&MPZ8rqleI60jEK=d~L*g85S^DJL@m0S!Js@22vFtN1Jm5hYKJNgNrvZ{n4dNEFC@JY0Rj5+^ZLj_hze zCkjo)_jqoA`~h zXuD`4><;I==FAv)iYw4;VNKq{326+a+ZV=rsQD@>wWtE`Ifv=m?}ypU3VXcLpAkvu zxVM4EYr>Uys{v32@X)w@VrgYy53PpfzlF45P#+Q2IQAHD%~a``XSA^++SEX1g{5ow zuBwX-sfYsWHrgYl-`ny2l^B4@A)Yf3N}cbvQmysyr#VrFI{nRv8tzmy}7? zv0H@%bCkE@r1^>IxKNg9|4Q9EbkLL?>bl!A$Qld?Agc!eO$2HvZuI=BQg-T|C!{5( z$Q_j9_hM|BK|?-z%gk-mfhxJU#>ra4ij8y2un;HiuZwjQ-b&B^b}X{8SP3xOz)fL)F3p6+J93duYV7M5x-P1AIEEkmEX1-KQ@JyJ>5F z-b)Zs8UGmJ$FIiL1^M`Oi5rE=@d*lHyoufYfdD*_gVrIb^H_W4P%To+%+yqk?#Rmi z1b@xt6;nwa%59%|dN3KyAU|Y&Zf!Q0gk2Q5+BX+0UYl!8n=4{AucYX^td0hRDZ&DD zB_{Ge;^{%p&?Eg1#ON$yR`U~6?qupzA{@JTREizjmHCR#a%tVyAhjnubx8G)8*WnD zqz`raT51X9mh+z;K77>kdp#QQpz=!wz8qwM7dV~qQ2wuNa@^_12pnXP%v{0X zTLeNc2YX21-So#7RBzq);wRaQrdu2>0(7-+EPg35S4Xh6gou5q*qedWg7H934h z#fFZb)B*-W_JEF=oCpcq1RJV0k&P$i7u2;wq>zwcTU^>5*W5GU{Y%aAm@wukiJs75 zwi`#Y(NK`ht!nSA(*RZG?Zb$w+Fg2m+&ypJ4E2mT2O8qW$t$)1tr0tAQ)~GpJcU$2;u+$Nk-c;3D+%zU!B`@vUhUkb;+LbZm z4yv+^6zYgkX==^X;;3iP0=}2eY`;r%cc+8hGN4r-_5!&dN>+gi%vY)0B^Y^cb4d@? zZ!8gJKlrO|Te(VsoXfx(8{5%iJuJKEd(b0@s(T5aE-P6J#wbrCpX-lcdozT#-9nq* z7?Uco8CRHbNj14@n^&&H+pNnVal|Q*JtPyi%ddq#M{ht;6%xNM&ZMB3XR@Z}sPD0r z;t94rR+1IA^+jIerv~U~@9&H|LDj!6~*d-+Lb>)%f#bBlfy} zksQOtKFUB@y>9Cuq23Kx#UvGc zK6Dt+jU&82h|E+=m)94?10{WeEn_Vc5UvJMg{aS1bba@&FmdY4AJt|b@op#(AVAe-zu3sAW6bu+^3qm~uvfUSLB-qWT940+sj?c1dd5qYo2Xs9fJnky_IU0|`R=}#| zv39wwJ%&5!MH4xN1%Mr8Kjf#3ga#7>Q>N}n6bLOp9{;IK4@hrb!0WL@*Ag(%q)=a%sg8Z=G|3%|8hpfvQd%J)uGeZ66z4;fwf|1KDH$=}_)ELq zgD2~0;}ChU@TdjOOLM?|b?lNeyM!*`h)YfNNu@!eY0K>;{|aAP&Tq)GnF(kcI`R`J z+Wz4xQk>J#Cm<|)xm||1+bAy0#|q4P@6c<6E%!nNp5P3=yzJT-5HH%_>>R|#gj zU}p8An?by5l^zybFt0Qi zNW+j5Re42R{2~YX?OpOC1Jr)K2~Q)gZRO}$=YY9~qgY4voyFfBV$paNeCi(eP~LFf zH2T#y{x^q7W~BQ-Wxd>v9HB?6qiV>McUDkkw_x}Wl<+6ScRk< zH}8N4%RI9?>@*DDs?AJE{$#{MB<{^6c7i!5Z1&_;Nl)F~PM;L#W8+~$ZD{>DE(8H~=ToTyxH{3= z9WtX zS|O(9pC*Xzk}J^D%-HS}(Z*)ViMHJ(w`rxLmwj<|*+8^zO-Swx5DhC_PQ3-b(S}74 z{)B5~w@kWsiRR-PMiHLXO(_BC4h@~GSy)!(<=LBxPU%| zWafON*HW=0ZxNAXt7#RIuwtw*zriE8t@{?NGkCxW>)i;_6vsG>*;v;4JD#VI95(`R z4T1R$y{`b`N3EFCE}dD*+0zwhhhcCqhGn6n%zGtC+Ay|XS;(fqRI z<*sNkWRsS(q|8;xMhGe3rXWM9YrImfrWxYX4jEH%Zv*$2gJC|M0~-$!ty6X%@RlmM zWP*6LQwLLLza@M;3ce6no*Zl^yx)YrnRLx9zYKbJ|K63CB0(kst=i}AD%wEKC#9a6 z#Cq8q!R;DxmRDodYTmy<7?S=mSe*a{;LaB@uK+O#7i}Be)B(oo2oc@ANBQ3U(18BX z14ZzKQyvt>&wkRaQlvcL3p1uFG1B#zhuW-zCKA&^o`EPz6qplH znwQ;&xb}Eir{A|l1QvuS93+)16cMt1=)c2pcHd-R0006g0iRK7MSt^_njlXalgUg( z*U)g(9q6X7k^M&A#3eb#(D27vZE)ER=`5?Be4+X~#mywvV=YK{(s`z6bZy36PN+SF zNksanKl>}V^3?sx1~`~>UtMJbyYc(9Nk+tl0Q~Q&v}YGeJ)H%{d934tMkxK`cLlk> zGOGmbt490}E~~(w8r4E>Gfm_;IXe?bOjb7r{DYji57-Qhxc~MtrrAc6iH#{$P&2i( zOG_dtnIfj~dDUtmeTp;|J%}(@Kls(fID_M-v+-%5tg0sp*#y8aGCKaKI5-^@*1+Ml zB$C1O%E?rQuv^X5mC(katf@8tVy;k!^if8ii{*5uctIrZc6n=rOGFd%>Rmbf`hS|R z5afDEl>&G=Ja>uM6L6jh_=`B+fv%JAwV`AcFyNH_KQ7W}?VI%|(j)#|@jSgc#wiUZA)8F1g0ZmPk%)YET@8u&pWGlq-)F1oMdefu!m zlgt}<`YXgGmIfalj=vC2hD-lVuyJ~0iCqj%G)zf{V5C#b=QJ*%>-}0tnQ{#d4DbOJ zlX>ru)`=OkZjMP{2fw7M!hD~_V?BMHkJC+&J?4$MvfKW+77*VHf#`|UM%GzYy-dkh zlQHbiD{Y|*yFBZ*ib`>T000djL7P-b;SVNL1wa4#Wf{~IWQ-eI7jeVOnk$dhbx^Ew zR&(g-$|n#M$3!A&8qr-2F~LkCW2=M z0^wX@hVx$?hHiBBp^T@r3rEjzo8la&=q5)fy02Kr5hDRSkc52{t{=Ws;A06IluARg z)(B>((XJ~IbGB5X3!_4LLW^<)1w0W@XI_1DnZ84azJQKaX`D%WD&0$`fEuEC03q0`Q1Y|`a`Ep-ueZ;Oo^usuGw=4Ala@q^6O(4X z4^@%V@YwHwCa`{~>0Oq3ay^8|Sw<)~zDXD6ab+ys!ljl7+bqU-d|GvpQG6D{KLtk%V-CfB_T0+PwzfKRAwwo)iS~lwHByi&^Q1EC% zB3s(4u7|SEO00KBO|AVwyLDL#Zx(&7_41Y>GVrPM?`iD)vVV*zOE{KH?1L*4R6DTbeZexe#ouZ1uM{Kz3N(p^~t9%%o+ zx+N|Xi1|V8OK=-IH(7;x%C5utuNaGsAI)MMcJ0hJkR#?3T9qO^hhKi#GAp?ZeQE>I|FO zKVZn9uL;EoL?B~l5w@f9d$zCfMRYC1(3h92z)zQgt$4d_rJE0f(8_<357WO1@qClG z&Dw<04v@Nk#U7}lM$P%5kjbE}H$_gd;8bRQKdl@hNI_&%ktCxpsP~13jdvDTG+lOw zb0U$Mw6~a`gO=CGw@99OceR^2YKMwof!6dhBb#ebfUJkBbu3=)Izoa5%K+|(q7r)| zR{7Bbgvi;at?T;|@E;Ln>9MpEgYGcG+j3_PozJ~|u-|*Sc2i$DB$$#LWI5nS#$uc2U?eY z!w>#ZfbWj^_9x3F1^W!G2HfKlz(I}F}-6>>w|1!M@N&sRmPRC zGkStp8|^J%`}bAWjPHY{7o)MlJIdlXDgE#R7Kh1;^WkJubU5(x2=_*ijT&$D{sMqw zO?=SZY=VzVSn=ARLjYi_c8Nn<_?aJw-z@*yq1q%wAIlgLEQV)@83{sN&->&Dt59gt znfOL>5wPaKmhUXPOmKGS=1kJip+R!(mZijPt`Or>o9X3?6?n_XGbZmhAC!8EvYlVr z{wjs<0V}YLzo6A0&;h|2oO6%vr~DW>Vj_tC;F}}sARh1(k+N7W_4MwG6+=2`i6h9$ z^IY~gPO3GJe}(6q8F=(db=jlvOTKdqcv5z4{nv=WWc28pr7)u#dPl#W;v(gu>11Sz zQVv{2P@_k8!y%4ezFo0k?%=ALCX_~1>F$u8z~*UqL+0nD8>?#YJ!*$;V2+Oys_N5= zs@OJlD1HB00UwTAH1#Z>j<`2?OtMCG+`Ysn)H5gzrl>k7LZvr1IQn+3nOZ!V-PDp+ zs{LuaZc%Jezyexrz0huiti253h*rmv9F_Ifa1Mch_PqaCdZlsKQhrTSVCAfgqg-YW`gR?KK%*0l*60>Xln?sa*Dq&YwCF&)*YEjf?$V5N1nYL~pmi#_t$ zd$6c_ADPR+$2X|}&56TFokra(Ux%k4{8;7)(`B9L%YnW`1f=R(IY>&lczWn7DEkp| zkFb&bMrx#buVm2JfSm{+Sy5y0U=uP`_k=*xv%2Fgy{J^71_y5SD~nfs$m>@ehyh#5 ze>&>Gkz@+VoO1HIIEH`r`0KQ*dYKNid}giQDnZKt7Hag^;X1X0J&bm)CX&WVF~XE{ z{K(@uZc7dv|5jRG(OkxA5}KnI9wxGHd)RKCyZ@_F>*XW8GCQ)0!jC0<2gq|n$& zd9e61d{(KJi+FXyV=*QKQxC`tdrnWL$e(>=yJZDSmVvSLE$$EP@<5|l>~9-U>w=mg zm2TzDf!F_16%{K?p?9QB!~!3tm>W%R6S%LXf^`v@S@!C^P;0duU#9cUjk{_ugjy)s z;}B-f^4nCv&VjgK>_+-LsH@c|V^EsooLk5Scu?)Hh;nlhB(=(nf6hJ3ir1F93yGunT6%{0ZSq>jzoMI0L2ll0YwkMEwtfuN&u$#5 zHF8g-!z0BH=~<@8;ui2mn&?a!EJf<*R)T_h5>D-qU{%N?XfE&%W8iX@;Zc0)?=-XG z@yk9x4Zcn-kxC4swu|F0IE4NASCpU9bqeG0x5Adh188sMJ&6ifw?4H$J@`j9Hf#m9 zuWn>6ISJRgG|S9s`eJ~*49a}!`Xg(AXVTdWt#c}3M%{&pj(_K&6BSS&>OeCy*k7>M=s&0RqmxKd6yS0cB|2f^FU60Ak~#+Sx9DD<#^*T`f_DpKZFCg$2jHu2 z?k45Sdml|uKizJf_vm~heB^R9=kY%sFj6uZA50HidXaNaIkO;~yAHkX1~d$A9Ywq% z>Ue3C;Fr=ygM8CNoR5tpKce2NqF*O-ragz!!B5cw+0zG!DS4n+q5K6=&=-u0(NyJB zPB;Q*JGB?ym0F$RO~ zqU1kMwtrX=Q}o_w?sGi|LS-ZZSr)&8fzao+?lA7=T+3}v?J9=b^<<(RzCi#|3eW$R zU`oI`93gB|#r)csXIpF+;;X)#;fd-hSt%sfa~}?;BA3}mftfSCzAHQBgSZI@8C(y~ zTqyq3<*&3dj~(hHmNEcmHXRgxM22qCp;a#P7C3)W@?&)fQsOr8leMFrJBvJKH$P5_ z^v=Mu^#l*XJCT2qGI?pPp0 zlO@{-t=@aEw@Id7tfB!A>L;4eN@dqbb$Ez46nxK101^NR(B4S2$$v#TS}3)`q1Px2 zI6yjtTk2To27P&2&ZphGDwe%a}tJATkT933CpX3B#!9|e? z=a%Qn&jZ4xf?1k{|8l-9gxFZI-tgD3xd_`YB`g#WzylMatc`()rP}66_hEWVHRj-g zQT>K^a2htZx<*Z#yX7=5goZ`%hR9pmNTx0s60(lrMrCvUX4WK6MW`A{&pGcCM{Gfg z98J{jt2fVfH(2MddVms$&MC<>MLj62mQBr_=w^xIk#^dN znU+|)lpNKnrbihgAj(HDF#*;3zYGl$QA?;K5%;Oa8yYfj<#k0fA|`UDESy&i^41;c zZgj{vyzZa89bj~;s`Doh+wEl=kVISCIgD*Uba?+5?09`sMdFxjLT!?tC)zv}4u zh%m~9=y$86wX_JyKs5Z)wpBBGx|{ZTKh{y{$9BvLX}Zu^E3!=&G><>}JI-%od!Cf+ z@E9nho4XY%I&PLxZ9E zXtSxY)*~GTBaBd)y&&dT-2M?ol;4}DCesl8@3v>`b!P?#MIoY{Wz4aLr+`lbNr|oX zG2F$k%*sV1DZ}F0vbVbqV8az@ll9=dZk%L}?JzQZjS52gozi`yqnG`pqk4VN?MJvf znh~ISx{cIM$j6qagfCbFm)nsH2sKuevSy$$VoSac1j(R!&_4 ztZ&4RGA(J=Vuh`-MqUe`5EVG;+eatQM1b*7wH zu3r&pv{4WXv;H>+gu(WXEq~Tm%gJ6KG(0^<5QaZV@hW4R?iV+oV$Gxee-4X%+Z>#| z_M9ON)gb50duZdMu$~xMPoQnrXDj$4f$9F3q@fY`Y<_7ADoIidX*2gKM8O(pc+6); zfKipSkZnm`CqJu)O05g`D^WOx886(PXMr)N(BVz=0GPnnF^f0rQ3-Aw15Y&yW#o#S zBFoaAzzYI_6v3tL2ZoCmD|+GRAzjfh1j8k`GaZVQo%u07B z+tE)d0;gMBm4XH37I;H430UY1rpiC2g*@6KhoC~m1}mft6k&<-MDYo%Vw{|VosTom z#1kd*|1mY0EI*D%RJdgyF!f&;R%*U2d7N36$44dn)(cJH%}Ug72Ix>cFjp+oOF97` z#*0(PWx{YY#;kNN!-T1*->OjAa1*Kn1^TiNr7QWY<(%hXDt2vTto~mgg1sPr{kfL^ zlgL9IP*gM1T$JXz13c(ID@~fi-3|_BpReVA(^=;)mvj$s4ZQuARq56%Hei%2o2ybb zv9jpbpa~2vi5zdd+t0R%3f-kes0I;7c`vl}l&W8N#fUL6;Tp&EPK|$}EYA4^iP_l~ z(HF1Ix}ATEY=cZsY@zw+g(PbHn;(*+!vG`hi;L@f$i_Z*^``Ehs*DGjz=t|xS_yb! z=LgTkLNat{_;91ip<+e3MstD(TgGAiVfdppTZ^&9Fxo2#ICB&iHe4)l*l+r>Bt662 zSbp5j(9XOd0|)0(_F`8GoZ!ix{a=}2d=~?`CAfzE@6P5caJXn}-Ui%uheIjS!@ScN zvEff(q6G9%$`}2i;L0dso$Lj%<^UNZ5QN*XRb-#EFkM*(U8@m5rbY|P&;{H_NpF65 zs#3tMY_m=zg?7qtow!)_o3#BX7=+H@-0-8$ zTl+WV5?)0?Sd$#}QZ?b1I76>a8)ielLdNYnp1g*J1_R;P3pRUzDFZa_Td>w3N8b3L z-mQAVZbuTGMo3q$iz1xm#3h)^O8y!K^|uogM&-Pz<(D?gnW_FE%1o5dtDmU=dTNl#za0> z(!h52Eb@qH0Rmbmt6BV5wgv`;zf&!;dFwv2%+l;O(XY5q6nLrRrN> z2A0_FZdDUjcY&HB^|&RrI`QfXa(mw#lfW}yg2tFVSU2w92ttS!wHCovC11D1Mzy8> zeC+)VP4S3+TRpmf?odT8f){j+`}Y*c@4Do&h1L7yORKKr5<+}-rt}&Tye3@k``F7a zj3jnZoM^=HxvOvZE76qLE|(M~-t^if&Sev>ER`U?P^>Ml!@xB_yE`mZ5%1b5IYG(# z*R4Zo(5mcQ$a9wq)~@$CS=ih%syBLg)I{_>;4;+(>a~xAG1>GF2f0Rc|D+kDEwms- zgs%ctSS=s!?pqh+)$Xc+raGrJu2=bO#8F%~lB^qA%N&+UUk@wZ3!hR}7>Df?{Ycsy z)?M(`NKK|-24Q0A>PjQ)=X1;R;=&Mz#zs!(&r{xgi|uL7hO_>06}}GkTo9adcOWri z`uXYnWUAkz#UcimuzS}px(%~jRvV&9T};yMf%!=tGSuICJgGMHLEX@wIe@!Y^o&kr@!uU{qX1akK4Pk5$r@=X z0)8P5IRrACg4SHL`e-aXi)8%@3_kM8uCsM;*w9rMp$Iy^BTqzG|Xtq?K(ri-@i=sQ`_&VB2km5X-+)m%LOsjFu7`lMgdtV z#N6qTe~#FkS;X~Cv}>2w^T(==Z}gq~2Ej1vUI-3SKS_G%rxs2?Na<%%d@d#wD4gaY z{rGSO2G_jU>LH>@#*v!S-zBIqnsMhOw}!Ehvy2{S0on2=PrahgcYg7Ko6l92L!Pbj z*Z}m7334Yt+f-gC+q8)=6_>6Olh>4Dp@A@h1?2GXPU+G@cut@`M$v`jmHB8|q!atF zS5t+*WE1=)=wppey8(Yr(>$@a(Ya!&TBEQx`}F`(z1~!x-`##~he&%#j-`YV?v{pu zrx7>0DjS@S zFavi1Z*_8PCB0n?xFZ!@!mDC(SD`^m4IZ)QVr?H5m;~w%>kHn@3VTY)C{%lbXD+h% z?Ue0G5+_gIeZt+YN!`m^L^~g?!ZqbU2}dt7j>(r=`h>n}dL^9{%U}v%Khdte;aebUt24s-YW zuXg?Rjc!Ws3761USdHh;^J~7!u3-9xIkhj-yXiF=9Vr-3#NS;o8elS$A&rVb?{fl^ zZUjt^D#re5ZtCveyOY<*@8%58^7rd|)#c=eZ9L>-6X-LH@_MRO-*+bh#!8i`ITgXn z98;<-{x10Dux<)lg1GDD-~>A)9C(emxc2CPWT#k8lS8?sn8ZRXPx(8LX+M;Y9(ilf zT7wX{PvO)(u>ReiNRFIaC^g~uHPJ_R@{kf?&cknkB{yEhij#EzM@W#RQ;=LG6RmL>8q$89A)0GCo4dQ9w9Wv1} z04NUwsn9Dq`Ez03joQ24>7guBfZdV3(^ya@#|{lj>k$*%dE`(|7!^|uc1?UR{8Sd{ zzQ|iCmd|F+dnbx!Mu|^(DV{+AkPn?rV z#5V;_`_MzGF8K3J^T&)H;=RRNXtVEs0L`{vr>)3jv9EhiyjO}Zc-71q@ubZ#B6zGA zXzbv(!1-diO=1k-R74_=~t@;Rh1WHxNnJFKEuz3oz<%e+;g9 zgr^tQ2K4L2IN%IRi(tER=?bDpEYO0xhoodBCTRO($a%dkuYmvn0rLT$m1;zPn3?hR zEU5sLId@QKGd$Ar3C@Woh1C4d)JMHU*<9cN1ahj`g{YA%0@1?|6n)7sGeR>5*)^A{ z)CUVeLS*LNaNltf0Q6|ys?Xlo67=nxbxO)3_df@taNkSW3ei;E5pab(XlNrb0<%)Z z{ii~Ya`}09A=xQE@3XP~H9c^l0=r#j*U=;X8&OZ5S*4fQvZ#?TGx_-KY>NS5Dj`>!|rmZK633bq7tKIAr2~6fJzAEnyPPP!cuT6-|rT= z23)oTHO9!o1#FA;gggo;;I@rqRq89HGSOAhL?%l8r=64+}!$Npc`7>?J{xf9bpFF$YH zJJM7zto z`zGa^kh`on_UY+MXqL$N9S-koJJPptEeDU}Du=Kt zyhiYNf4rVXdt)wbj0?4|2m~SYk#um}JXI@6T+9@yIl}UQP^`V&|A-(G!?4qT0AH}l z_w?AY)KKN3$T9BshNWrWfr>9uRo7N&MSDn)@6iZLG z#rwIkUhvtp6_}n;lK4dH;Rbuvl@a*KBt+`q ze8%=@<|%MUtjC(`b>UdP!TREUOeb!mn~W*1`fB=6t)`PsKzTs6pwoGc@pESrq0!Nn zNQANBvV3vi(-o0p%k|`kSIyq`H`YSM=3{UDI{W7vJiG^&t>%muqL`=YkYG=c1%;qQ zAx=2rygOb9zL%L<8(up;=xMr=2XSO;kAY}j?^Ha-Au7lT87A|{#UyDmYAp|0H@&ow zsQrCTy@UiIU99sZvi5+2IVQM}JAjh2B+-&0Hv)6=+Sn>YIv;$>DeOubYwT4Sp_)5< zJV_O?qGd!+0N({$B*xsxY>80}G^w+(mXmvT;AUbhm*=IjFSP0O6* za%CE8H%U=5f6QWZjAq0=v(f{Bz9b~EzGtYTV2gH#ykjgtcO|_X5Vp1Y@KJEh(9d2Y zN)kd;`U-7tgBbeh*_A7^s0)9R;BISTf=CX07k9n7)+S_2$VGD!xeHd9x1g@v_^erA zNTte;u?I^WC9qWQH-_^VS)vNGt;`zmAPj}1tg>csyr$iZ%KPo_z^t90pZKkJRSf?` zlqgAI2w0@gapj?wr?X-@atD+?`#wIf{KtBcfxi9kP2mAZW=*H#RD@Z!L$gzxH==a% z|FnR^se2!oCk-%~`2=UKv6rO*p0$96dwZ4P=|JW*1UiefUZm*(4L{Fpo}F7o10&8Z zJOPU!DrF*RWp2~pYC~bKV&d~t%G>TiA+}olER~edymM}9NRMbdO^|Dz*R}Xu=6M?= z&C80gHJT|TctW6-qV~LQuIXpargJ0uUyr%@C4_t2neYGL$k@0)6P8ziWYGhwAb~t zpzw^g`E@^_p9p4Qxioam_}W$x=V3`iynf%IHbxmiB+gK-Z6(eiqSXwH#k$iQGo<^> zeB(8c8G^0Yn76%E@?Dv7Sx!4272P`wCSHneFC#XhO)6J4IvHmDkv!KMjOHcs(QN(E z92ydWzyfc?JIi~Fkzemy-z){FvO9w^WBF;1I?4Qj9e~43CDaMaYuO>BmS(fI-M-fI5h4+;s8uCbf5s ze)Bj`8rs8J;Q+9lUlGXEtj&P4&MFY0tZWm%k%{-OluC5d0dkW9*K8Qi+PvR_8W@Z< z{>E27c`REv0DRsIy3QwkFO*D|0*g&5WX{85;9EIw;lS*QUj=G{l%@YT&!EqpKvO=D z^zF^?;(9~0F*~`Y5B2L6fTc{SyrF|UAwNE6(ZtH#oZ;P|a_JxB@)~x;R{2UNadsm% z%LD>LVEYbcT;$@6?FN_%#d=BGSs1=#e*z6!Xkw)(VWX?L>@G@eQJI=y27B0_B1u89 zH~#=_QiH9L5Zl4t!nz~5@0DctD2Wuq-t-(hlV2nJ)P&kO(&2Cs(W0> zk8_MX$;_~quud~7^in7@x>9e4vL^l?v5F3n^TReOQ7QS5c$2lSSUNO&353v42%Oe! z20cyss|MK9mV}EQBt?bCf`Cc#yHBbXE%^lc%2rzjBYA=ggP!8im4ySbv(fd-KY48;1zA~`^`E()+(G$~( zeAAZrrkiw>stzqjFJTiCCXyy&J12V;b_-;X(FcDMaqYyeATr)#w;fS&?7Zpb1ifsR-?HeaB{k) z4%ddmY7JxfV4PtkJTQ!>2kODC@N!v;2ap8@UyIxDV4f|hahc*yEYl5y*KLas`O){#33CChsDD2N!qW)N z%2gLyH!3XmG@zn8NO|3p)G-0X7@++(eO*h`G5}u^E-zCTqEdCfVVeft$ezK5<3dv& z#zfaX1I7H`1;fck%8+(k{>C&8Ge-7q(ud+mv0{Ct%f=x5=@4U)ZuItnrdEW8jF7?(tc=?dMDZLy zs zIA@!$M7j~xW0S;*v&j$-C8-78*RsGLCJL{Y$lMWNa_)cH+bbzm^tt<7g;Pz&7&xzH^@J&W* z$h`*SN_`vR91U9Kbw&zPb`7T!h9;7&;70{Z?r#|cnY-d&Y``+6RD+F z5m++IT4%9yRiZm0WcK$o7$out5~oc7YMy}#Bf~?W0oKU?g{b#;16om-)Q@e%*Zd}2 z_%e<;GlFMJNG2j3L}yOBt-6@m=+}UG&%}Wk>%3Qqy%W-?2iKj-cU%a&cvtvP)HlE6 z=4|ZJ{R2Uh>g1f+S;B4*BXQ6Ey|+Lr$R)uegK&X3<}R}Wix|C8q=%|Snrq{mP>q_T zHkPn(?dZ6I(sPwEJ{Q{HsxTN}yH8;JE(HDrFH-AR+lK`UO3&L(wG5$~e>gV)00G4T zpSNm6fAfB8!f;`n*WW4fFzs=*XqcsO1tS6_*8Y*Ff;O8pINRY8T}aSLFc^ZR2wmcq zk-m*>(seN5MfsT};3|Ond1LLOvpLxKfkdWJ6zJF0n&S=1_YCR$9!v*l#HX|*oD(Mv z_zeBP1d@hCo?{6ltJ~Bl;H=y^8Mq)-9EwiefiQ=rypzTIc#jy+CVf zf3_j?=O(1ka4f+Tyr?) z-g?!*|CWZu+jz$hRKM@)xuwo3R~@v{-D3Ce;-Aj3PAI@ExhWmC6Vrn1u2o1_Z-#A- z_>*7=Hyy$p#BvM9QiV}dVF!=&e000esL7Tcs;SVNL1w6lF zNp$oeOYxpxG%|mIRhkcirWsi^{OzRge47@28c;*q?-nx7i97ObpT~)7F1p<9L7hvaWEit=Py!y6(Zt5mDcgYl>anE1mq4IatGzHH z^o6FaUC=erA-WP-g_ti^&T}Pk+nC$)uw0$f(pT=XDV$veZWXt0RLl&8LRlAAV@%;h zXkiDZKaey$gq^xOQeo9SeRo&}kLtv^nr$koMLAcryi^V33F@TXzlQ-!k}MWS40Q}gaz*5OW;$_}9R{e@Nl%4*0gwQ|*PMcC7RcqsPAKIb zzrW4`NxMs$h0Hy32^<~byiOcE9Pq{3{fhLTH5dGr4hiHCTbJo?;YR2Cyha`mg(~LZ zk6OHR-D;owFR?`|XPEJsw+Ag^J;<}Mu>t59&>{k8|6s7lGY?x@0TrOCbti8z)Bon? z7Ji>r8szspFb@U1hnt86Y((ME*2i}#E#(q@!H*&A;FJl^SO6f=i8squ03jGl_~Z& z_g3QE3mAs&!)Z@ijhpUd z6L5L%-dws9Vo31=fveexsA8O2^L#OUPD5^qXZ7{O738 z7hdJtt{HN!Es{PLWY80^JB>A`G&CWt7``+0Y!7a*UEOtUHkJ*Ic5;=G=KPe-;m@1g zf$cUuY2TJPDzU;V+1(aCm`;liU0c(%Fw1D%=QxS41kQP6Kk0u?Pz%f_mZ*o*{hm!4 zV%iX?IZ^q6#a-*ZbUSoG3RIFM`<)@I@QCh!BErV}c>yka?lNj%OW`96d@q6DR(JS@*_S*om(YM#C$?-l^c;~Zo8ysQ8*%Sek zU#cE+xZ&pJO`ihO!p}=>UD7e0RQ>1@6rtY2Ui@9Rjd?^h4(VuBx$(Zb?t$?)^dOZy z#87p)uy6APlSw$unE*txE)uI?Ph3%KB#4nV?V-gHzt?DGRJ z7*JQdNNeB__AzL5Cf7Fm?d@=D`ZE;AAezZAkc319Js?1pp&JNG1GNrLY^wDw~N;yHYqXRi( z_9jn@weBui7oEGJrm-q|4SRDU%K(aKk%pO_pLzT9FFob@$|+4cXi=vl9=V#!b zY^#|BAQhAkboaX2qUoPm%aWO|D8JivJTq~sUvS?RJDqqOAfT-?v;BM&(q(#8S?b9B z9;O{~(tO4NW=GL`Ich`ntp2c*Pi~)q)d&1(i>)1G z<#iE>Rp#@cwOr17m+Ttd1^?ZEq_F#GnP!4KqA&fwLLR8j&lQ3B(_47HP%cO&^lj8J zlAPs8wHQl#on8AWMT%7VFOK{|dY%#*a%MQtsEbLMvR+h(GEo1ZrBF+!G-3MeEpaJ> zVpW{6%RTu1zSwQsu5%?IxwjLj!qCIGzTUEM#Mb6D#!*Dm49Rn1cxpYdnAuVs}|nuMfQi#mh!g3^8b1{ zRJ4FCN&*;hy!%x@|J2~^3)FVEg97Uln(brg-1MDu3XCFI(t4xFB5g_#6qw`lCJ{r> zex504p*xpQi*k7d+xN@gRJ0~DNf|J0v5$EoLSS7f8ob0XK_1-k0H3Cw2h4V*K6MDO zP7KtnvrMVHO{T`)6N%&_J2xvwZQ0sZh`w7MlV&?-PwqDF1;?ACxRI_C20u%GPvo7%g;LXywDLe;=(|I*zzOA+#?7Q!4O=qq0 zCl?$64B~1O*Bpp^Hhn3EUi!EfYxC|nj8TA>&Qa_YG!-QkV{4^AM2!Zx=o7NTSadOD z4|4K8O_QGi9Bepe(_xi734l|p71h6~X;$obx=T$gR#n;0mWtxoY zXx^}h$BKM|yGv@1?B=nAMn!N4M7sJkELc`HtdU)yQtUt?3%KL4IOaH4IbkLDf?`ZQ z9s#ynk8NerR0ucokl~%?gxLZSp#8*qgI5tG2#D*@QOi0r;@|b(RFMAR*3Bv;^B6i4 z%^h^LaGAwZZZwA&Kda>Ih?Zp4ILJY8{)220B)Q>${W~*)e;e!dejmxZ^?Jv(fq(^} z`zvKaA>^1rJtCPc2XgDN_UFv;ilqG{EHw|uDpoNCZGPZU(w9qS6jD1Wz}+j`tMvQ! zF0TYJdkNgeo{Q+vBkcf~+(Hht=WV>dOOUT|&Y09|@Ic@Zm`XO#_`5=8_fo6OAqZ^y zc|cr8yLhABG4N7KB5fEg`i~rm{a(9LQZ}C+Zpo;2mc=YKE~1hJIL?4*eU`xI{!`~V zWZ^a@xfI*yY1w)KW(()Nt#PCd7wz2fpqshEh3Wz5W)O}VpB5y<(1=hi4$D zRQjb9?}PPU^!e}pB|F?yWK8;E_GtMMW6+~#9&Am-CsIg4Tcq2j7hUwDQ#U?C=MY4? z9Tbp05)(D8ZK2;}Bx5J506Xjtz^o?%qQ69tsmU^P0 zsH8p>XrZX5J^R*q_HN)GfZE`joE=xJK#*DzhX)P$I{3cus-r{hhh^}K-9cqpnRhtn zIL{yMLmw&hZu%YqV^Jg0tseXyY|q?-*)@zcX5LL-%1zM?(zx26%RU!9+>mI(6xjVF zXMrjpEd`8@UTOQC$=A%2j9 zw4iUI4V_JLDDt{r#yn>#vM%a{B9dR%UZMOA7`_~#z4Ek_4yii+T9AG2cC!|HshrO) zPTrJQ_DvuPmv0SH5NyNS$Tqz6UdFJ1sk++m{I>R1$s~Rat4Cwv6f7D2I`psrp)g}z zxB0K&`krnyr$4QPTral}Kiij6mY@yb_}Md?G^pLSHI5tEu0 zE$VI#*7Yv<>Q!0NN($mPxY5y$Cv9|(vh$`Ro$ zezCliNO#6$(xUv_04{$W95I7Ax<8x!opbW{F_Uf~0Xf7#48x+u2e0ubuj(ij~ zSC|y(h#Q<;zFWB4x2bj~RbmFtw*wYae z`l(6j_2%B2juBtbb4of8CUw(m0sE}rxkt+KmpI}~WGP{&^iE47eMG}UsT9oS2mvFe zE-5G5dgguR-K0_%VZ#-5%V%SXjAL7R3b|AU%PFQFq?^r?S=&?KmRisLc(m<+h8@8) z5sLO^JYc%wf6*vYL!8}tL9L!urX0mx7QFpJMM6f&)aN&NRf~j2QV^EZnre)EIvGWD zh;co{UeOI`5UYGRp!8;-j`T=X4K9w+&{O6s`VpBt_Ld&f5;;93)dIPqo zZ$^fTUjFc~Y8pd9hM|C;k9b%sI5Hapv^;e#yyo`0OFnQGzfdq@I`BQ=`$7U?@*&AH zlQ+2t+Rsbn${~NU-r#JU!{^H31uZY(hORi2y9^0uBLxA4gct0V%hVciidWa^pc=z7 z%8l-;)iq)R<;z)v`P&l;x7TcZ9QW{v+BTHrt2ty^a?2S9v0vgKw_K) zcWtEMzK80sx_cM4l9435gviQRh#a#=aI!&41637wY6IXtf|{)r59tVic_-AKzEAKv(IA2yEoW@ z)uj!-1xekx8@j8`fj;+RfIq5*1C!}}Ymu;JtL1ZG{ceb2j?*VvaLi4FIIz%I!pxcv z(1S5a^e8KCp*5LsxX7t+#SK|Iw6X5M%?9X8FrOJ zC6)f4d+ljmPH#UC>Y)nCeobu0(ndPMlp?ryO0XC~Zo?&&TE@#t0V3yw-Y&Y%MjMe= zaBgo;pq#&_PBR2m5)^_F5hLQ3+r$v@Iv1=XlAEu!gE-DFo!#yqvn8YNkc{qQLIP^M zMMH4x&)i$d~e1axH|; zDAg?5N<6KV7`QbEJegCJ3?l%J3DcsijjZb7L1C{u+Q$2G7M3S{D-)yJFBH{W6v!)MB#=W24Z%B`F=xU}jcDBfoY|5$}_pLF067 zdARC`DyRrd5LZV2RfUXP(dlC!E@TwQqS8f>r_mN+1_CLe1X;)ie|4tYJrmSxHDce} zDxEhrD||nUk9!=xw6CH#|M(&DJP`r}jZ)$5my8EwUyj?j#Wj_`!0B})0fp>N|@{Gso)y>q9b=ia4sOU`;dUke53sJ#S;9u4U!{Agn+okIwUu1Ba z|K-In*5u!x4&dK7GUIQM+QL>YwgEA9nh0{}NN#Ddk zL;X^Jy$Hp1j0x24Utgq9bhMVt6OY}Bxz&6|k8(Dq1Sr_#84rB&ge_5Bl&kj^i-Nxi zms@U5F=59-jU%1qx`M{rKyqSFzmldN`>UY&eaS+ zU)sNh()_vN117-=TmZ~^V>MKwSwogcNjMzWy|NcdkwNBb-y>_E0!~&m=wgB{lAhod z>nb7mowti#(5hy62e07`^#lJ6$;37Pn`p}5{3gu8{J?c}b+kuaX=-i=s5FY?WaX(> zF zth}F?fJ!$x1JVw$QFG8Nq)^v9e@gzZoG$KT^=W!>w%hOFHlqzWDQA2 z=aexzBm1&}&}D=o5VG9za%{jg%eIn{hLroj`PR1bEWh#aNQ5`=4v})F(gYDx-g9rHoj<^8SH^KD|7zEY+cfU`orHs;M1@ zxzx`K)zkB?>T2z6SQmmziQtZxcvQjJ^#3@v+>P~Jcz5%GtILjTKKo)0k8(?zmjf{i za7*#A{R}u@MuXVoifEMRR+v?nnl4(W)@8lcUw7yFjlgnFq?HluKXWKQ4~fID#@J7X z>EW&vX?xz!ZyU{49T9kRnx%x)+HbMEkL>K;3Qo51K9R{fl8?Sa*KVgAL%O#=ZX44H zjx4|dyiHM=`&v<7R`9-R7;A3t5TdBhm_Vpq>w*u`hPD>DTdd$(%4L~DO~$u&w;az{ zH8S^Pxd*beSrsXuJi^YlTyw| zC@yA#rSElMMZ|be;4g(2J|c?va9ON?AYR3<6zNi9xZ9fiip~VB(sVzOK;H|ZpCSAZ zu)C}Ke{_P+@evSQx-JbGZF{0$CjO8$`m5C=Re5)JL}*bN^=vR_V@QD^S_~)da`=Pw zXU(5+n|W1LQ4DVk z=rI=`b{@R)9{cMr8}TJZInDqlKMB9X4_V)h3I&45eftgYxXb09A>r50)Z!|EUs=9L z4?DU-Dwbhz{My+`8N`J3i?*}Mrwpx6LO;6+_r}l4xaaqL=IB}`3PSHfbU|t8sQ&gq zvgBShTwV)-0{Gc^@E!!%88wC`wd~?#Ffg&c-SEYL?u}D!%T^F(p((@f>LS6730OYu z=Gu-dL&vqW+5Vy>LNMq~o`t5AKz)s*vanO3&rl8IwO}`K{}mD`ytTtm0`2q_k$3<) zm_0w|~xy<>f+!@+jRHm8?diW_T<0e0mw31wS^7DwMGQ-hBt zv3{7pb#FwO5RIqq&fLnVv!FXs>s*I&LHVM zb=A4v32G%s?XKzg9a-KPY%PFMe0$1h*8*rPK0xB7FA>W`SEmyY4)x5#i&iN3fPb`! zln`;BtE7330(Zw2nc#lyGn%i>s*A~J+(LNVD-)}ddobb?CMp67dbxgN|dYlB|02yN%BKCTN0@%ZBEFikNZ-arD3|QWl3E z4x@MkC!}6-5N(r57oGMfB6v8^LK|y;^_^359M99-2DWCIuN_=Xcj4dA`m6EtVK*

      1G%KX53Xx>Kg6wFn7E$d@hOpt!>B#p)k0_0VP%LPM8y|c65WO3sGfWXj@Od%h-Y#aMb{<5+&Wf@OnO7e=8`TfUgQ;Xixc+7&bkbJ=v*CM$^ZI36t#W@-mFWj#n?## z1aog9D{$AndaG*^xxm2+s90 zA}#sTL?RZHQ%RP#p^oTEI|H)4c&7yQqzxy<(Ht4L58`M8Pzjzvn9Vk<`{|U5lJPgVgA+KT4 z+6kp(HlOOd5tRtuZ13l=;?<{7%ruV8m)^d`Onq6eL8(I#q=H2Hci`-XtZAJ#)?|di zh-kRk5{Yg%NHAkbWymfVi^3Qs_+bLPX<=bJF%7Ng3}L{F)=P{qbm#Cll5`Dp-2ri1iTb5pX@Z5+Z{;|D3LAKcvZH05CPYWkg1R@&1*LNwk&V7DiI7_Gk+<7dd=~$0 ziiNA#ezvzEPO@Hfc)vGo@{9Fv3<@+uiuoC4`gR@OUx8!>vcphwzTD(IN8=XHs|%t= z^Ez3op=BM0-^z* z`)W!5wXsKQTb1j@D{?a{UNneZcD%582r6Sd*WR+N(@>jFnb7*dOg`re>HtQ!x6jgd zA+mLir*CW+^nm&k7=<99fv-PhI(9;&{gF@xoRJ}hwo9(>EKB=&M}`QJJqze;FA&#* zL(^-Dl2Z4iJM23&L(wF_a_88{h$MQn1qD5h$8qsE#-96ntYGbq`SE;$;%~5@D7zF#UQN1!> zB!4pisK!{&K4E)HXS#wH7%G;n!~zb^xBfm5MhMfmF6+1-qw8ESV%c}?>BxyxbC`8% zm2}Uo>Iy?9eoWjRU9eaQP+&(fN1IvOV3ObGeTcz5-Vhr|b~rzLnQXXs2QoU1IyYNzRV;Yx zY!q!geOl5dI$A>(A=r+{63bK6XW{NcVn&m569;L|ly#!74K(9y1%+|+-}6nY#f5HO zJx6~;s~Pj!&tQ;0c@0{d#LdtmQ6Il7mN?ezF17ZFmc057zvqk9#(d_?MVX7NG*CY< zv%S>nT{7q{uw3UBka16~JprT)W*Oo-s;${t5aLg8SfDwj6=c@D%;a10xR;NCA}PSX z^KwcYDzN>KNTKh@%Q&B|RGT*SQVep)&_B$h!RRfuUWzdm<<7rLTd>cHVFvW)A96|JkeO2Q3 z%8Iq5Ap`RvYa7)-MOi}&8%}pQLzl*d8*oKC;Qu)9-TRzf5|Dr3GrB+|>=2Kp`}9B= z8Vt^QAFj&W_jIoAU{J0lLf8K?YKdpsWs&WV=)@(l+4Zxl(wgo@$R@EJpkzOeHVf?i zS_G7JHZ_*!k$z?@x&S|c(~fV@1$Dlmp_V&Qst*#Tk6J=mWZB^AK{j7^)vi-; zO&*NrGpZzE27-XcW#=p*hriow%^Hc`sgM&32k$ zMVl6_+Ym=xQPnCbp1*7_XKo+}hfmyr0Ap_MA?eb7OntwSc;Z@|GI#T!=dyk8PBfbG z&wFQ7Zw<1Y*d$OuX1%_~C{Uclvjmf?o-^D3`osZ(w*V%EHV&o_nJ~&p321D23$O1Q zTZ7o~H?3df5N{l4ldb2%XLIkBB??T-fL|t-9=ji<=uedj_FpWWr71f>2oJaj zc9~9|%~U6!OMK-T{LbhL;dLWB%raAX5pX7p}Zq8$e3 z;rAo?aTFBs)k7p5@x}_`Hg4Ej7;oSH(4`pIv-k~EN(TtzU@&T73KFq3mlUGN%5ab1 z2^G`RxK-{Jx*@0E{{86(Xk>EUGz?g*a*#jH3!p!11oHYYGrx}k({a_nKs9AR!hQz8 z=V>(L9|FMWE5IoGEwL4d-UVC~32{D9V(UX}nz~6Wgn0x$ZHoN*se3{eoq?pm;-;(B9pUgo0rp?*ud)S#r#CBo$1JK?& zh+4@Px8PQ~nEv3CVSyHY4o>QQe2AcIV#RcNmTy?9U<*-K`EBfg;iwRaQ=E)r2++X! z+;{@^mS)w|*ItoWeb`w8a=pG|D;qU_%1jF!AwYma=rZ6Xnp5GbS}sFGN|!8a_3Fc< zBe+ZGO`xz7TzJJGF88?qi+Rv7uy-r(n*sP;2y{~GD{y}_QUGJ;^S|KepLhq$$VPr&O5j%omz` z1@dh%d`?e1I3smUirAlb?G~6@Pqr{9h^V@**7FJ|kWRHd&{RsE)zI4gClNkaYv?c z(0zW~;lg>+azHk#>mG6>=XB9JIGUqlHcQ9bRq9Pxrog0kZ1xgN3tc4)GA;k_rD%JH ztbXEZis3pd3X`t~p}R44vl(#Y{YV|h$sZ1+73r}Qf8;2oe)^=XabhjTnax*x{Dc_V zvWAI-80&oKG2V6Jd8QJ^t|e}D=sZfN@Xd@h%G0bk@*{V4?N0UjHjA|P5q|DTDW-@A@*y{t;f5BSb30AA5X zRB@DtVArgfgYWW{-$Y{F?YBqh*nUmPx`U@GcOHBZ^q56*j~5&^yObmDYW2wQIgi*Q z{H$jjjeVa&fl-JfDeYo%AP#%GPN^dyMzS}2r;EYWMnzSPM1q=>z(j$v=mV1MK;j7C zDHKmbzgCPXk_D-T^zPbxg;2zAP?d{mCKRb=*EA36H88;-@KhjdDr8(}B2{oglCgX{ zci=xmJ(sDBtWrWL9W3NBB|qValVHHLhDyiE+y6M^z*PdJ>OIaH_jeY8edD-1vR(|y-aKF18dq)>H6pK24WQA>pM)fYH zL2KQoQkLdu*DYCnRGR5jy64>kqwDutA^EPbS1esx*g+SEFD78+q}Hbzv}0Cc`yNr| zY;3;M`I>FX{FtP{5nhnZS7+f9_Z9C&wF4rpHlMY6TjP z8$jee>$0C_96t50G=L~!x`EFq(af+;^A3sP7&pIozKOY=9W7wX50W~CXmM$Dz&5TT zxW`+u`sY6pvtk6ae3px9a_EC~!RCZk6@G0s=S&G6Jzg99N#y!&^2=+KnUb#s zYtvK{Q`1Qxs97J%WkG>-0C3rMHjas+2b8H6Rz$-q=_{f|cCj0RxKNRAz)g6VF0$R2 z2)#R`LxXx5x-^Bfgx;Fdw6NI=Dg3FudPwM5(#^Cb9g`E6gZ*u${3RNTeUonbmAex0 znJv@U_+a*I>1-9lvqk2O43OVpdS8R-FSHiPP<>RczF$LA+eLC>%~AN_->C4V!XL6~4W)Sf$^0d&2xd1$X^*s`zEfTN=ACeVJNbpOS-k{Jh0J0QWDLS=9 z-2$Io$P??0eQ!XQ@9giaboSG$y!jG9gQCiw!J;>Dv!K)cTCNv7Ax=_D zwxN%riQq`yT2~X`&xEHB!;dYc)$M5{D~te-?hVdV0dWvzHX|+v0-gm=Zo#ePl{t}# z-y*uf0hUMB%PNjPYh1+yhl+EbfzdUs8X*4G@ZJ|P537CM@L1rgu2Tku`38t5#8gEd z8j@2bVPcuRg;q;}S?Zp*Wb~q?QA7Cn3;th7wM&}@jqa{nKYqDV2 zSH6T*l0g6f0k#329coGcq6KKfL2l;h^4s^t1?q`}UjczK*7aRnpbemfn(e$waTgO} z9H6AM++Mcd%L&^TR<-3`-o-(Mc%1ZpyR<*#CwzCargd6Yp@q06a{}I9zvvU3l%fuy zuoM)>P$LEo=jGK6@k%62_%&E2K7XN^)AFqBUX?3)lY&07H@^xy@O!*+5P1pr18n)QxPCd(9hPL-S_nL`ed;tlP1<-q|TYTD7b< z-=1?^@a6J?Shy~*5~-^~?FY%rgK^5qCx+k=KsyrjJ$(j000ocL7D(b;SVNL1w3DET)vd? z?hAtUV}9MqzR3Yhaw+7Zh;rX4yssKI@ z@pKU6gidVS!iM9;WpstwTCJxjw-Mr7FD!i*Ng#7p$0a6=Luf6w=dr2A)j8`}RCF$J zrBw}>ZU~(Tej^oU^KXKB#gP@o+o@Dp_{avcui~t0Bnmqo(iS2~u#rIm?iaf5b7@Nu zMU!+)d~_pYP^_fG=OyNhmFIf|4pIQPJ(fw``GxVeTilfAb9Squ&4+cmG&V^F?xuuNE3 zJK*^VR6aZc`v*fw9sJ*8U@6|)!L75rVag+#^QM zQITM_T%Km^2laWoFYnlUk?R}`cH@h5{t?1fC9n29A@zKDnk3&}dRpbgH_h!Q17eDZ zn$hJ;n%@Mz%9+<)GE|9PWS8H-yDUe4f{uF89@V*>if6g=>57o3<{!I^%w zvu>6^vIPg_pYMwTQl$ep32Y?^^8XW{#t_*yf92b%!c1l#=g!&X072^IMlz14Qp=(o zY`6RURa5~eHowUD>5{3Lz5q})URY~XaF+SxKm$<*p;Li8wKnkOs+4pC(A%sg#v&xD zs{$8+Ivkw_7P4KlA;!1>t1~~kTfSo;@Ykj`!{mYn$!(IB+JuLedQ35OH^G>z0X5EH zB%E!ISjhkNjXKsTOYo2z25j*UGmIDIZgF{MxkFuM)WZb zEVvJA=jfPhyH?&|xQ(@ZR(jUQLt}D1XT5I0m7HYy`yFz z4w|vM1!fqzN}sT)76(|eH49`l!YzRupH&L6R@9OAN{Ou^@&xf4%zzEl33o$XS|Vv$X88T%0EU7$gk=Z742YncOKH*-;OLm({X9D;6A* zGEAsVXUg=?kDxD*e)?)PYv!#sJZ8ViMUI>FKO}pxCs=l}&%Bj_AU#-baDKxps4~$@_E8KPMZTc~$)t7A3Y$ z%9CUmMJ7O^`7P&XvExz4JLfu0L{v6Wuz}D-gU7y=&-tZXcvzq68Q+qd7M1N|bMv|@ z*ny~#|KKGXA$!&P@~S&zNl)Vl=#ltUR8sful}!3 znzPUL-Ifx$I8(q%A8bznaBu8;r)E!RL9$nyzJtE1LF9CLA+hS>7fj~S*7vSVEX7mn z;;dSLv2!I4`tX6*Pbjq#v>F}idUMY5^hP}UJG$An7V82fFmqXfrVqP^-OW)(&eMTW zUEKGYDVS>tfDgvzf@d)X1>N}rZfc@W+D=k{zY|(dh-S-_`A?2a?)_6ibAP1uQajcj zppaG$_x8Nbe#7|bju$VTqzO%qhWhe~rN06X*$kgRy0=(U2yVJL#~O8UDzL71PV>sPlQJ6B|Yk@pS$t z8MrGmzAF~Z>WV+kN%pK~dN)jxkgn5Is6=_JWX39KHI!+zJZaR2JlIde%^s|yLq3KE zQD<4n7a0`FmZ`@N-aTsl?y|j+^3w@`jV+j)dl4WbdR@l%V!7QId16r0M6cwiC7O_< z@)oetGWn-Pds=#+NBFib>YvZI-nah$-ZiJAHvVl5IPjkBm>9mdtU_Gab+L{IA{B;v zFJKB{F0NM=Rt|_s6Bt}!9s~|nI+%7#zQB;+mhI#HHzro5pX$}QkMt-1PR+jqYl^*8 zJDt!kv#x8>wW*|RJZMmki=0qP5+#f`|634Ri?=f`aWd$Gd7{9Cgh-Y);?L0)IRBo{ zE8fJi)3hzbu4xWD?XTsm<+y#r5Qa10$Kb(Lfq8n^$h#wJ%oTfH?+q1v( z0n&v#dpF-{fuZcpI%v~xg}s50LR1K={RPTehaBdIs(v++EQa6l=m*lt+4PWg3=8%J{E{n)z( z?YE&HE&=mmb)ardu69&^Qe!>cPqurCm!2esX!$y?=H)e8fyo=aXLsu#3dKQiM#i$a zKYkXeMMf*&1oek{r{#@4^cI4Kcu?2F&Y;7nSpTmI(y2na8v*Y8m4N~&EgAmw5kO!? z1G%7I#Z~H6M{+Q8f2s*nktthKpEfU2Hj(#zphBeN!p7N{Q_2}oor2<~)z(ITY>cJm zXF~eIG;6Sq9bysnAj(*ih)2H*eziWH=f}Q}37@dq%`Uu1P&fcFhg7A;UHDk4E&f#4`JuwQ zjAgsoeYl2^JaI{nxL(@`G%o4o?S%d(Cu&?ccBHsH)o%hi0QXvk2r)I=*Y~QJ{B6N( zOQd=#MOLiET3i_!dH<;W?bwC#h4as~BIB(+@8SRe-?b~GDz4l*!-I2XxXG!K9cU5J zx^7U_6a^`yZTw*mKjVO7rtwdShV$%=9(N;HPLzYAFUc<7Yhc7R9iL#7GkUxtp=`w^ zHgPmz2|~U^>lLKpNvhh;I2$HtBOB4_$fZy~+3%nb*R|j)t7S}JOvdC30%ooZ-ZGJ~ zk7=p-p+=hBRe$ERrz_UxV=8h9y#5`;V5`s;Mr(-@*L{G?0O2qMA8Aek9l1z~wa#ea zz-tCo6rQcG>YQS^aNMBKVx_bmB z-AZ^+$8Dza&rwH9)V^wBUa2kDHWUwN;Dd2at0^cZmjtAC6Extv(tOZ%6A6ahRx$Nw z5ZcECHxd%r`yE;qZ(lrF>qZ&jiZ-t(%FSY%gDUx@H;_8{{k+X7qT1Y_u1z8Z>z(4ga$L0N1doyN6h*8o9*y{55$LPaRad;3%X#Oi zOT>`$6#44SiszZrAQESkXIs$94^zQgm3lY6;)7iA|-a>fa%fRO05#yQC$9O4c&0o;V-g+P|v zM_XL;WU6wfex;|REL;R;c4!15PAZxjwyvc=9PMGvg!cY!H1i_2&f0)L!ms>D62V%9 zV)7VWszr5m)gql}gR2a5IhMRSkRN|kZ*_j3$Ytxv!mnq)>W84IBZv+wR~BEBG`sbR zXoW|ONh>&Q4u}Bh^vg>!Ov=G+C70Jio3)7likwtaF}2HS)C%W*Vu5+n28@%#jkPQF z<93kUJ%IlBKS=nOmVIf_C%*`jmrm-Xu={!=xO#LR`i5Q9PG}7ls|)mS{X7<)*Gt1a zQNh!I?UQT9!s9m1gg|;15JgISjS4nQJHNydVPt(V#0X(;y*EY)f6tl^QQQS<~l(%Ohki_S0rTPljyKnf%&e3-kXdkm zoC@0MS9pfv8Ky4ZIS^m!`qf^~glVHz-&yStG#XEd^F~=;aYvQ z|AaW914lKnPncrqY?HYM8wN!HiOz4(YXq^OvDCJK0pfR@vyek9uNSm3(l5R2VwNHH zdvMagwTmR&s;H@}R&7+STpb=n-#NQjjzi zPSyTxy3a_qV30xm6!tKT>bG<)X1c}iEI*A8?WpaWTwf{YEzY>=nE-Kju-f${L&)&t z0;OVjf8hW5?^S%prHEbW>`pZ#3&uf{LjG0vv0pH=p38tlaK|=|zOWV-0FaHrmki?6 zAH&12Jp=|7HDLj7K_u@=?Sy@0E$ zjEdIF{b-ktY?JAec}VQq zUlX1(l)pII`NSi$N!3w&IzS7$32WkWH#ap5;a^#Wh-rJas;gtyCglIYg&<_rBNy1Q zm1D%T{#mha!w!SxB^0s0-g3Xlntqw3IOmI3uXT8gDAVWWL0*MM%{wHIL~6o)nG zDA3{aL%OI~)hqN44y3OUx?aLF7xxVp4h!I1OhF z4y}0d3wcD;s2eP!1|vD;3I6}?G$}PSzI*IMMl>xjviPR0eWUQNBm4_rgqEbuj#SFITtk zvZ40XCZy-^000550iHi>rUP{3O+N>3P=+TxyzUW65I@gbd1)x%lVEfBV< zSXyRpkxnd0|Cl)LIX?edBQjWsJ|!I?I}S)rx&B!xM(k z^V}uK(qzuVoE|Qkhs{HHIvIUa-h`e#Z4j|9htjrH1$LMob7)gyA!VU?n^&P^|HoL27kp9~#sPngP zr4WThUl7G7@V6g{a6COHv3mOHF#-bmla&Af5D-C{B1z#7CQ}7GU&~XQ&1tps7re@~ zL>xSVttquVef9UQ%?31QRy7Noibn+?u+kp)`M@AzG=`<^p>5>jtZyTO-h#F6DHZ6lby!5|VRPfs(o%G349(=y9BAQ*E~j~qvUrJAr~-4pLG zqECr@{-4>wXXU_4AxuP7ApFh6FCMiH7m8`(?#EfPm;H}9zaDHw*bT;`G39ox!b$RU z_l2hj!b(7C=^>*Zo`P0R|8Hlo4LI6D5z63_h@Gxe_WB)^02&mo_R)gay9e*0|K+hr z&P|SalpNL{E0GfwcDB++&mSCkS3uv+Kam_+V9%Vb=)$=2j6i!kDhd{12Kg9FfI6%N?if=@zH(gbbBCA7(8;hdy+6Q0 zf=xZJAq_BGXM{SVh$wGZcPrSI;zH|gLo=O7S|ee`f_P+yr?96pR04npL3}g{y<)?Q z6L(1&742N&(}E~wiR721T?Cn^Fv2Ux8ZR)p+#A0Rkes`3#zD))5aohBw3+hVRFGn@uVHWQ-=eC7{vC#!_j#t)`qIiszJJVp#fS;a_5JY zkqAtv^@-}*Jb&atzx^&5Rq=E8t$`|U)7tM!UV5s{Y_h8YVNrD`Sw%^rMb|WvdXf*+ zJ;@=#I+){xwifIvN=EOq5;7*L>m?eku?AY*L~Zgjc?gUV+f3gP7206(obnu1Ex_^^ zPDDG=k-5}03sG2Z#M!0$WVe)+7hZgtoEoRzc}@1Bkw5~bw$~m&>IO*^$x6*D;D^iE zQ%2D52@EG2ToDEC9iJo_-i?K5T=Z>lrRPR!gg`}o7XYA)4r|Hnr^kb7uArd=p?I7Z zWSlwKCT=mS7{>KFBk9cfD?^

      QBGQ42aAJ{9}h(v1{qAR3_EV!%TEy(=!hQPBcWr z@d_HzF-9OwwH#=aiajb+BEU?LZit7S9KEs{#T;ry{}X4FrYuCRRs@P;mCK&;p!736 z{=C$Rq1x51-JjS-a`w2djfEX0!A1~tm+I*BWw3xIPcz#OIxtD|;X+cHMCv5NFM-Qo z0{BpuJf^5KS)F7nH-d~9l>0Px-4#v^sm2~Ht81l<^uXFwf=6Ssr}(wkQc-hSRGL&Y zJhO~plplU|!;uWxb(FV~c9@c!)^HvqVh77@ssI z^sx7r1BBYYS;W-hclmKuQ)T=HgW39^iL^V}KBGHhFNhJlNUOO!c!5lg9(~;(V+Ks& zQ&fX|`b-m;;S{pe*hwn9jfg3lz$#KM zl|@@k4NIKs{vJ;`Bv|vdR6CM4=P~ispB)+$s0gLwn2luR^n0=A(0{zWca@c^H^Z_2 z2gdu`*K%_?yq$KgHf&^6CG+F4ag z(jVyst^(Yn6EI2H((B&>!VM#^4g0S>aS#UF#+zmw1xEs)@QQp{W6x@D7hw-}<%Y>= zMW%jH&bE=R`Rw{ULP&xK*Uu^VySb&8eU8D0Xk=0*%@sgQILEyjDVT)tEP-O*dv1jv zZAQw;*&rGTeH6Lv=ZnkX@xw+FrQQks-11ZV4l3!t5zc99lqB%}hewMu*tglPrsrTe zfQPwOEF#)IR~(C-vJc5>32WGM_9PI>;aYiuMC=* zjDzFNFOm{$3ZWQ}SUP!c{7-S?a__NW+hqQ$DE!|7Dj^7bn(u3^fa~JFRHK?BZ#-&Ab5(uAV7N zw-$Z0l)+|;nOT2~M)G{kvl1TqVi}AZo7JWL zu+T#Kwb2n}cm0eY*9X!zl3@xW;1Rd5T3Oh5wP398bGq#kS>%ZMa4W?R-1y{wVyHE! zL_Kp874SXDn&0#>zYqiLe z%2DvRwth_?F?7JC=gI9mT*etw4a7zvPVii-)IKn)v@vNn8Y*K*1?Kx^e@L~i>tFTM zU$3>tFy-tq52$Ep>z~Ps32t*?>5Now(mLBO#{84XmeIuXypDaAz-wZF0}6~U-}k=r z$fo3%QCBvCrmPEI_Zpi6<_%t%*jm8^^5K<74LHn8+&=)P2J%kwX3dGCT3PjD+Y+MB zoS2kt;u5gAJ4Z?@nE&Wmb{8ubCiEablGwxmjR`vNSqmoobufe(SvQ>Xi;w{`i}Wo> z%t^2SDpmrtmT6%pkhwTp4-e_#Td!cxa@aPsY6X!)z~2yM!_Cj#^++kH?46HRqDLX# zYZX|l240)SpS)2MO34f>y$Uz{wcVXR-G{77>)9(P3_bKI{P>V+n&)2d_A zQ|cLK5Elz0E2^e8N#8j}4Wrv6UT|ahfH+v|{8%^-tPpj~JzfX=g%%DS;hJ(Y0CK!N za0v{t3W2dYx0yhn^&lyljwY{1MZ2bD$|m^){|WrEsbOsSV`F~9`a=Ch-x&%=Sb6{i zOqxcJNV_e-Ll|s?^5U!1=-`I(lK(geUE)IDnxMz2Bf#JoEZ+Ct&i0w5YSY~+DGzCk zhNSz7u*AH~jcPqs z>YS|DB9jdiT!VLgp^=t}<7OmiLoJ6m!E?U_s?|RRg63}?t(*JOAmy}vpT8?kINAF} zbAhzl@8C;U777UIiXnG=%I`6C=PXiVmhgggVA+}5xv7_(Wq@^ z-WuArL1J$|)j8s(p7S!&nR>Sf%%+5BDdaH+dkZnYYmpN!0;u~6P>NnUCXl$dV@vY6 z|BOkffKJ$LLX-Xh=TLbW>3nwg+84KjJn`?!Q;@4fF+3af)H(CEL;#i8W)k3A(kty` z(4F{k8yD1o!b*ekWGhsx9gk8e2J{Xwar-FJUPB52$&4Nv-Ln)Z-=YSeSm~hjq=|-H znuAso3T0d%mTN_(!b9=gSRX3N-o0H~K;+f*UvvXz=eI)rfC-$$?X@b#YP%)`3)Mcx zPv+fS(6@NVGi915b%>TrU=yHLy;yJ_aa$xE8{7AjY$zOt!X5w1jUr>LH{zW}5|0Z# z#qBthdX{tiKEiHuOxNe5aVklyg8$<%l;If!vvhpBGy9EjOWfu$zf{YA`D+g;J`h3K zFsX&iGrhwEz=fT00f$1V_5hbl1c6;#vt}eiT4BcSu_@nWXoLZC=g5Eu2s2eu*c@Zs zb$3ATqpMfYEZIxO5u4O6fwN@T_7P#9=oJzkU6uIg`d5p`{~}4r&=6?Qmf`;mI^g~L z)8Jlh>3zsqK(%_RJbKQE^~6+^WzMb#`Lj;Dy_^98uyIaDr1c~M|3F0ZjDjj>UStg7 zm$54?edG_$i-d{TV(lA%4qU!j97LQ{+D=AOLW9Pgc7bBDZP!Ins_yy;wA9ljg%Kq} z^A3UD?p?)*74K!9jL260=W&Ql*M8MXQ zA`qV56#U$X^8~|@TOL8f1f2YwBzQ?f6e1=JWMMy+ z-@%~z%abN8r*WN&Ksn_p* zdJP~ABY&P(f*RC${V!*9qPb!j_D}&eUt78KwXF|jspt0+JBl#}9gv``dX z!&b5Hr@E%i-Rg?$$BsPH5Oo!Cwu-CV0cyel1rm{vO*jRZm*yOxNe!V$A<56GQ%AWz zhUU_Py7WlI*;GE@GfR8pq|h{!=MC~dr?$v!*_66pOA{2~ZzInsfq@G$*AA3YW!2%! zx&7r9RS!M!h}(1@8_Z6+e z64hGH9~3tzXl^3XnRKXsH>}NFqxPsbs*|5eoY1#>K^@Ol{A;Rz3$mo-ME$;)c#GRC z7YSleU{uR+0yF<^n>E2;TXcpAV-l28_J|Zb!vFvdAVHc$N#PGBQw2OQ@%V6irK6uiK-|tiMqlsLi`qtH+w(-h zSRbU*jc|bRbC{ZFV*aAyZE2j21?7&2#SnVTSJUfGjdvjm0D@OC0S=~;4i_VDQQXw~ zUoLXQ=r><6fC?|0v)0gMwwHF1^T|miIyF7yOx$C5Nd7;QGQ#2lZyKA#DJB{uEN4U0$&w9Jd*%$ljOiAo*PWk!-mS7fH z(hDw_9#6DLj<~L|a|r@|;f5!7Gh%7dy1_`YA>u?2j_LNBT_I>6LTlwr`|tU67`SE& z({8&D25OJ{xTq+Z{fgW|QdZX!JA>tQVUZry)-U{j1ACoR`FTB}Mbvy`1;8*7mf%F_x@QXSq8^ol zAJ*+e2a=_q<^%0pCJWl~VW8%S)1~LzP$-MU)8CB7ygDwW(FScqt=XX&vd`-{I#G0g zjk*=7+hoQwhO>Wj#=4oxfRE=~?w2%7K5Bgh@Q91WUwciUj-U1OP*A3jn@+Lfx~WPo3U-9YR-vn^xVLc(rmRIKAqD9Q%I zhi%iNb6n9taUa$`~Cv89ELk^gbX!XKG6}eH!}hSpZieNAXYL z>fWkQG;|Du>KMp6;akY?uBef4Sr~49PUuIv4p}$_Br)ZZV)<)`EP^4O6kf%`n~QGB z$${6;|LwQ(m30NZLBZT<#2<01t>}(z8I2!Nk>M2qneEYE;}#IOjotF&Q2^)T42LZR zEworZ$YeZP2_gLuz9pxrq!n(63V(NN%vYDh3YJwsKFd;)RLoFoULH@)hM)o*w+N{W zz-`u?clgUBxqBssxKo(?SbXm-%dwpV`9*FARzl`ny)5n)qDCj@4a8HP?CgcTl&1Gl z$A30P&qeK5xR+g$R}!o7Cxm=I`3Qez08FC`mpHNim#S}{p~Mh&k8rF1r{fF~$Kkj_ zY;Y!=drzT%AkLuB2?Nhhm8Cw^|18K^A-TR0ts-_;R#^CUqx70V5~N%QO8jAN))=o) z?xu9_IY%jDeY3x-U`;%kb2(KiN*Z>~a*!#9H*?nr40`04y@YW8w7u;zsSeChOJMk{ ze^&Oc*Y+ASddunzYr1r`AX-RJ)Ik0JmMBTV$gi$l6{0o0MkotJ=4_GzUykZRXq#Z=;wOpLMn!ywOncEIv;}?xPWpt z4Bs}#ZF2a1@Z}td!{wQ|bV0D`1Vz?uG4%bZZTr0GY|I{=WvEPCKXPWn2*Q>3NY&IQ zpE*M}(eRTlY`O+ZgqiutI!Xhd*ol>~#lHYk3O+>lwj;W4fa}E2u-?^KsL*5z^rL!k zku4%PZENyDrs?Z6?qbA=I+oUsZ?h@PW52v3{z?F5{kk<{p(+tAmCIRNR+MPJE3DCE zrnui9hSq5_ofe&1a=9A8l`;s%H%g8y@uynzbn3V?VcK1+?cvPrZ_H zJqwH+-Jnd%uSRunHqEo12m!OEG1GiL1XvukClF{EHApEEefuZ_5$-~r zSA@l-j|ftDW2Y9AdIpdWlI2kJrB~zVlv~A?j6Fz2|M}|QKz42jOGw{+7I!z`sZK6V z!0|Vhwm7StnEnZ{4+)F{=YbP*$N4q4-~glq(CAL@vzUU0My+8Fo~*wIzP9J?SQV=F z=N;^*OhqGq9CSMAJv!hvlg274r7eybl~)&8bP#N6z!;;c1sIwN1ah|2-~) z>~sqGft3_&twEH1rQk(j)&_zvR?J!Zt3LjcRbGZQ*mo>}I2y(Y8Mgd4t!K*1?E^DD z!lIX&!5Iz_#oG3!EF=BJEqwRy(^8w_aQ?2dw5ogh&V&XDr!Cb%)`(a@l^(UX0z{!= zWWq^0BwsGBCMXt800d>z%`hQ69%YxR?48OQN5Ac&t&~>EY4#x{T_Lu2-kW-rdTJ0s zYN;fRpY~LtI9i*bO*%aI_;S8RyjhWHYbSZ-C$_m(p=H6x!vFwXXPDg{DYZ6x$E*q$ z;h;bD8%Qe76nrGhOQqUzt-^Uu`h~u61CFClQ?$w>JWmC(b^ zT`;(`Ds4wDXXroJ7V%bxI9H4wQRl(^ulblCqa@R0Kd8bXVUM_`&&khlyvjl9v)Zj& zM(?aKb>eZmj!5^?{d%usMj+%7)$=f)W)8}|5VoAC%wodJp)c=QJ~p_i>IaY#$lz@D zl)<4$1+cUog*zzyUUJEPq(&B);1$kU%XsDto#peMzcLX*$9e4AVmCWg!+j z5_2$m@m_f@sGr=HD%QQ|RAxzlC&dL-Fns3|9t6bGLl5g!T!P-0X@Q9JI~<7Y`Y{Zx zuTs1vor+-Cbo8}x#^ru6N#1!ZtFfp(w#jci>pVYFslkBz37^gc#}Kh9xpu-ubS@v2 z$&-29c%?G*$Y%jbhc=H$N(-7h8F>LB!sq^c--MUe7_

      SNj7wjrp%p(x0A$^*yI$TkOL51gTRWH|;tG zkKP2f1~?u^Nhh(lz-1WvGSWmW711Nop48fR@`=zr;>u04By&*$zBlWNeq>+!o;xM2K6 zQR)6|7<$!vO-3nI56f6jTDw4{+tbkG9$!jZ{M^W;Ddk}_x1))Ob9rJjLT8veAO;Y& z*UjKhIs|gy*KDK?E$jD|L4}|Az$MB-07eaA8CUVRDiA(Hu2@@jXFU`nvI;O|*U;o^ zb*E@pG6SN{pa1{?q5+;`YDxdG*Hdm4Sa0BVos%JtOZ*LXL>djZ8gQSgOO{EkXylNS zs|_NWl~vC{_Q7(vghF^zj`_h9ueD*?Nr@;vodFE6(17=)+0`7<{M_cSUlEq#!ws4D z@f04lzpp&XTc#>MN?3NRkCoF~HcZ}xHF5S};Cligyh8Yz`%nQnK}0ew>*r!>#q?-P zl%K|8h{2Ch7n~Wthn=}u=6oFRl6?WYpq+}j8yR@B!3ko6A10soH)~echl6Uhi!3c0 zq?SEE#C#A_pV;}(Rt3qeeV+5`bh!1kD2kj$6b+OHecE5p<@Rpdi04Y6>F>Y{t>22_ z0jlJ~!Fx}Q!lxXM*f@N=RW3)^SvlXujfK*ldLX>M1G2VVqn$;|RMWAwoQ1t{qMQ>S z+zRGA^_z!wWG-jaN=8N}p}tDQd}OV(4SS6AU58b?W-jc-wOj7yYjq`X@$+fg)}adF zhm%LJFC`=h-9paR)ELWmNL7Mg@h$p5Bz>s#6ILgMhiKEu z(i1lz_2*}e233Y@6W9J5tIa4@Xz%@-RoXBBdG`8I-&{jI)7m6u0d-$g-}&yuqih1* zqta7dZBD3wa>)uZ+vqVgUh#%?l={ODUidY!zWV|tsdS0LOLE+t1bnDFFNnKrKEHNRxqZ)7o&sq<}R zXvWe-_O_UWk4d~8)0ZtJ!?td2@#l@g^lVzZ*ucqbr(E)_%RjocS0Lw?8!9+BHJ=A9 zD}@2)=nIn4?*IkZwRTYAYth?7@>z`t3Eu+iooXpP*y7(pnSf#){MdlbN3d1+;n4)pftiYo9oXjxb|p@t*!T+<0_Bt1mAFr-WbuCm3;-Ixpl36`L4Q>x!*VzqJPt+*j4VCU`+m^N5fPFEw= znNuZ`apZ-GP`0`W6?_IBLP$EiR(-jwGoNKV3DCe!2km$@JI3wwn+dRNZU1EwQY{HF zMW}_p_YFnr#NccH<~)%$E@bJ$^HCR$W0^$^m_bmvGH*nuo-_MEJHU|aIGgHiIV*x+ zztrg|SXWeKjSRwf@pe6X0Nj|&3d(gx{C;6nLsZ7 zL|t%naUFlh%xbr&e{!0%8g_XFcYToseP|8 zzOw)k7KzzVx)oX^+c)9I)&}O?9|N}Hi)3HlvkJ(xzYYhKz`WDv$cuvz91s%+<+&%xv3^h?4R)tSEPakPirv}$~F zSj_#~%ejgNY+XQA294i3>@qb4pp@J8SMocgHnibRwW1HNnM~A5n2aE|5u215@}|&a zl~AiVMiN$$Ppw|Q4FB)E7i?q2h?6fpp)N9hEmbwM>A@2J1|Rv&&<@7ZBAS)5*}@cq zo5QyLnRi{qrVUPhwalT`CjX zRxCBwl@I?0|8bSXR5?y#LnL8-i9wa2@pX?n zn?jLhLC2JUrVRnSroBV3s0e;xV2HerNwwHYiOpk?L zfX%n=>@feG8upg~|I#<6a_+pwi*AN*_OnM(?MsNt6r{D_#!Iifsm`jsVIPv((96o& zv%C;ryQA0?qGf^VX>F|6Bh#b$4-QNuOPiI0w6tQ-9WIGZ&v)kXEF4ZreI*Jxu5eux zk}Drqm$HcYOS(-=uF&BHT7QYXf)O)cLHCZ#M|Q1%;KwF9D~9MPpBLd$5aZqJ8*{K* zBCoNH11Uab#{@}-nYB@UP8aFC5S#EH=}M!=HEIfDGT6SU{y&6B!dMJp%9?IximxNZ zIco5AL(RQ>P0%Yl3pmE2XxH=ni`7Js@*6muNsXAj=|&czfhi@?lC|S#@J|Zvno?@9 zI3c4`onA})wB{36CVqP3nH&ql3M`BBPNaZA)u)04qrw_ytRBR|mbTb;;L z9s>BhsdZ_SroBmzjeI<*(lO#--F478Ml=_XRmjMf-hPDAyP|ZxHWJU4-Lrx^^W_Qk z6g%7WMA#O{t;{YkWs4#Z{-*O!p)wz~XQ(xC1^=CNE7E&f2*?B$eXB*n<` z@TUr>D%@8WDjiR;Sz}r6egFX^sQ|IaJ>DB+)PEp;Cb*62zNpF=RQEO%I+5R!Rd~$T zKe`qcR3vKpo7nCi`i1Ft?Nyw<90}~tx|UCb`L3?mfn*;?VBa+8Hj1k1DTM;>w!L~9 zp&O|^V&Hk{YP6ogH9ElaPxOu4WoA9yQPJz9?HC~r3;{jn&O6nFt;;F-P6+TA@JyqN zmDF`JQ!9rBp+>`~#_o0yiya2%7XIf}N|D9p)p`Z7QcBa?2(=2Tw@ii)+82<)nrlX+ zz7?NUshcz_*oe5&_1BvuB@=2NOgJ$f_PryvoQ+fD^qj=gHJXGo0G6o<#vr^RRw3> zgkcPn*=rq^!hyv5(;Z2@W>U-&&;mc5M6^Ty@b-}|Oy1{yNE!r%+d+^-Zg1NPiUw=ieDpe=O!O&VgXa?!C4(AaG}X_0fguFYiIMyvk+ zEmJ`gradfjiO2uU^uZGIGOB-v>f;7)o)q2$8VF!viE94xDk9_ko|Ja0GukT99`qCL zJKjUKngQ+tYk2#!KRa|=z)&}^5dZ)ggF%{yN#PGBQw0$`UvD_IvjNFU(#+Ov|M%ai zLJEHw>qzN%f4dr#>F6|Y{p)V1XG?an@;X|;w*lPUA1{a#r5M}oX%$ljNE)VN%*9<> zy4uCQSn}+cv~dLL+r61X z7vGE#JzW102Rkc4YuzPms&deog0~%KQr)d+#erUzFO)xA=Brj$8#gq5x)u>XKlycY z?2vdBRvCW$#a+xkHdYQt|U%H9rMA68LrZR zkWBtwW5gODho5oNi09-eq`SJV!1+@gTS04ACHXM)?h{eiOuz9kKwy2UjyYcM+U|_L zL=2yUPaf~6Sr5#s)7cY|tyw)HtI_)fwrJQqHc%I!=6zw(9T1?D)=#*yZd|RJ4s6%D?ZT-zosPUM;~$g)Et(Gi;e9PAz{vJ}L%;!9 zLc3WY*$1HZUK+xHGN?o=n==?ecJkT?om9e*vsml=Rp5!}smHIr9 zEc}kb)_)#AsTil5 zpq~osE?+EvN|pgbsaJQvNrx7P*6}N*@*u~Uie)wxoo@n&{~kRi%|Iu>78&5L8w5Mv z9+gVL)Uq)!cqc>Z`@4A0j$ODzmVq}-4ykSDJ?8T#GC_Vc1LXNB(hV~f7F_Jd`H@Ym zUg|=fW=kMYxr88-XP0B$pk31Z@Pl1*cN=smd{Z8-Uwm7BfNuOul3tIV%DJUgIX*#B zd2xAx(A+fN-Ywmrke#~S5Vv(`lvbsD!Bz>>w!^3UJ=lt$Z)Pm>V%q0lO=T%JQl^OT1g>^_YW?q0<2!v9D@~(JGAz9 z{}{XOao$!^!6SA>r@fG(+CvbO0-0MeL<>E#}5Lk$yozLi0!# zv!D#a)AALrogFCt_w9y8t?-~p22IZ9%Fl}DVDMhq0XQdXpk^lc5g_wOj+3C$GaF_Q-UN~m3 z8$s#G?5I5^LCqIwEmeUf)wJ|uPvCPN$Oj`2c#M1^x$%yn%kM)ZUjhTP749PbKSubm z{1`gX!x@NSSs~7?ht1+McGuSOL6$up3_~ln40J>{ZOx2)#w{A)u&5zG%6DYjvyQqe zHO#91qVf!#oAKt|mvqv~&07oyG-j<8bJXbW3hgWtY(KRwtEmIk-yNLg135#E?3Ep9 zTU#rAYJetgumq5oiA9rn9~F>jx8&fdGSI&x?Y@ur98$N7T!E-^w4goUm|k5UO+z|E zg8aBW*hgh_n`S!J2FXhm9I>t`R{t9$X3n4Dhz-u8YFDyh=EyCSvVb{!ZU^1RY=}H_ zPo)jY^`#0&VeH+yJoL5kg!3!3S-TT9*1vMkfq239xAw-1T5faeRf~$t!pQ1BvT#>{ zAp!=a_xJ(CXtW#)`%{79l-4IMicl9g`yc(VZ7`jC0Jre}YYY)z@jt(izr@(jDx7ia$}w*+VH2c2otU0 z{Apvr#<%mgUu!bn-Ss%`HHrPg0mKoq4_Ada_~gaj z?&E&xj(ue;$xK&q011{bCbbL3fW)|!dWt6XX*A%zmV)JCRKlsfPhfv$6T3AEDaaQ# zzTMEX%drL6ZGUwE|D`5HjD`Wj7M~n`WAVH2WRiEE&$k#!z3#LdJ(pOv@P@C6OpPXh zMHwod<|_;%Pz?$J2dr8mYc9u2WgLeZl=$fDh%B_dZSszE&uF(~pYAH%pO? zZ=a=0AQU(pM#2U|9Q)a_*UTj?75LFUyQg;;@}zn^lcl=<7AZ@7&NJaC6~L#DI%bSK zE-RoNy_y)EV~V4833yFVHk?34Ov8-w#xg#-1^whoGp*%j)qwn{#e%Xe!^!VYvtu-8 zkNl{MzRx3!95m`DC2S?9`w=LIK;#FImE{11?Z ze?G<4w2|u5f=*5bjbwTGf+?$ELUgfKU%Kt%9UqQ;3+`sDNo?3DC&vh<0_)~4{G)ue zKd0TojWpq^Yzu4oTA=2Y$&*c+X?rs{Wb61M3*;gBUH+Pc5fUpME9Ravy(l@|ZG~vk z{ovF`DZVN`UzGvbtG9;JAC+W0$1A17&gjzXY?9u&DT)v99D+D9)yar>T_tr6nxEUH zcM>rmnWyq-x@mt{*_ko69Yc=<)ycQIl&z(+$TG2*PFxCFysvkg%$iFP!C9&b{G->G z^wXlY3hy>M_NG}PY2l^aHVc9t9*MEcu8Er`F?J1rs@n|JbfL|9X!q9y)CZOG@Iefo zXi~-wWGp3v=zR|zDYRAb2*0g0@DHxZ4i-6k z1#LAcTGdDr0!}R+a1oM5raq(P13p)zI;y!YE?uMry0~WD77jM1kX6G&ggjibWbhBv zK>vh5oR-dA*#U2+e$2mb^P^9gy;bozd;-Q2OP_TuRm9bk(b1a-_J>W@!jT!_-(mx7 z{}G@%)G9u0zBG~aOsREtPNzY!hIh%Y!Vd9FAt-F&>rhVEMf1;-1Nbzs#6kDn<)X-cUOC(S%$sNt_bdybH?@r(i-oy>&0fi z4+4_>>NIlWO47PEECUd}nJK38w;XpCbyD+#oB{O=)fieOknI2}HLJqhR&o>+x_8az z?OBGxUnYh01h#gvc(|vs8Www1r4|~eaPw5r#+E7c;U?&rrN7}Vzj3;zR2~c1sso0X zzW1Vm5S)-aN;{2Lk^K0PJ4HeP@f*kA5`?@sgOR;2DC}h?VnGeTmqgzx$BPaZe?eBW z?Px#MD@Cw8qzqwBEgKrdo4^WioEi0lfgi^f(Wz!Nn=28Yn=GQ^>rhE!RQ6`Ipzc;6 zzg=(WYzK)8vPkIIPfBAL?8^xF>}R|&??$@?v|-mo>9{*85qiD(>Od&Zg$NvvlNP+l~_)$wMPtC{j`3V%{nD%uTfoY3h>at7-BFq2GqC$+l+D8!GuJ1VI z@p4B8H_T>ivv8K5se9)sdA2+KIj?MPn?jO;wu=laG~c^$4( zT<%)a1kRbb8|?_7UdwC_4LZ3dfYk3Tr_pHVe(xVy)~MZn2>3B~xDs#zm_0iyY1%>L z5i!X4xF~%Zj%C;aHu=%_1t@VYPaWo3R^&&_rIZfr9-;&VgOBw$Tv_0L4cYn%E#+X4JGEDi-Do=A) z+1_iT-fdbM$s&lq=AP(YEDS7O^K6-%f%nz==QwOyG4pqtUo-RT6j0 z@umdbQB2y?45(@{hQBJP1q!JJE#Qup;QAoLH3cN$0hT32PE9x|?kY<4G}&@Ae=TJE zgdc;FWL)HwSMyD}a0NKNG-qh#(v?wp@ehB|g9(>&AcPatSO01H6bi2|U*uRPe~Q*_ce>?VaAUQN zp)?rNQ7pX%^xsS8=kEy#MsROz8w<>KYyRVkP8D%Xi#)K5N<(nZZG4=P2`!b{)qTMX z4AHK)S-iDFkc)l0*PJ){xnPT2_8fDo^Hpcbzct>ju;67)GG*WK%Tg?$b3+}ENZ2+~ z!@UhqChIC~8z9XIOt7L%`$G%ctG5BD4^w?9rHbT{=7hv@k2*!ogqvtJSz=xO0|oZx zpx)w_`mMso%wu)?F2Rv@%H#_~o6kGa^%XA%Y}ke;kO>8~T~@W0!)FQ1UTeMS1L+0f z=vp+K!CN9)3t<=bbidf{>wuT1MXf+Hc$Hm%H5goBI9v1av@52Knv--1>aZQYxjsb9 ztj$wqG&D~MzZDeEDNz0h4e$UHgh~~3{sVf?BFTd{$8R(a?!SNs-Em;}W`5*e#siU^FpTQ`D5QLO<4%e}GLFwcVK~NC=wdVbmpFs9U^r83J_ZsbfEkXkYV6^!P^p~3E zRWh}azl?=~OjF8z;XFdW1y({&tnpZF6cmBZo=Pkzm#b@n z^5qZMj<3?b?ds#^doy?j(~Yy|bz(UE<3cvd_%`1?2`L|XaPb6U_-$^yeT^_%Bnq2K zMFWgBkrqT<+Tr0_$wIf)xR6g21hxp1Ad;@tj@YAp(utREksxsF zEr4AolGJ3j8Kin1R~*J~ek3U3k(G+!?Iu9-xU7B51ImOX*{E1CkOy?{`DAuZ2u!^X z|K;FEzmk3YgaSc9JmD!SF{|Dd@Q@ZUVz)4T@}8a73r7;kC)qwJ&tl!*NK%6m&Rkm) z{G@>es;3G>k z(y@^RztQfb=1HSNAH1_+cqex(WaD$ktO&y*^+6Y-I~kL%@dN}qWX|;#q0Y_c6}*YC z>vO=~YUY-*1x4{5_YZ=upU*Ok`rzgXPXCyw z&{lIcS|NeoW5|h}1P2;Xes0BXDnhSMWFq9b`r=c{#KpHJRb;#}Yf*|0xfaqLHO%@h zw7rNIQwg)us@%d!3{cIRLh?8Th2kn(wiH#L8L3#TZSIp?6i!obkJxR` zbo)Wah!_i90&e~|P$zKr_epCg=?};^OGz}7mti5T_BXUCeJd*Y`Ab*q%(F^Kly)pd zQ@;vi1J8d5vTo;C)yjfVY)UKQWKkYaLn|^}9`XcesC%OR(jL>RsHv(5B)2>AVy>q{ zDu~1XC-bs+MTflYmO@|4f>c-9|66M4w6Ln-v~i~0Q?ybw20^Ni{_fJ144k#7?|d}N zxR>s#2phBOM1K3QA=|}N!cvtlMefe#G8M0(=s2)sn+$&h(G;9IBx9vfdZ|#3JVFr+ zA=ghh5uT*>U?ZX)Y@I2}qlvZ;py~`Z&KMJZH!Nd-uHi8ZTKsARwrjtzEve&D{_7=} z7GhPiKewn_XhJPnNGI#%W7|$1vtZL1afN7RCoL=IsaMRUeR7hIZfr3{&vY~pA zZTy`McD~>`l-^z*#7Xl6Zwv#m$4Q>8WEf=NL51kv6SQ3cJUJu%ePG%BOepa4;(K2& zK!@jro|h5*ce&sue9M%+XvqmWcMvGoBhkO{7iM+pzwjmR1FwZ@3=5E;s8yBe(tXz$ z$le$nYXbVYbr+Ms$9)bl-}lV@%VBkrLM8z@UrmDESw$!JN$aM3FVJHUbqT`-@Kub(ROFkmSs6dGVS^n4& zZbj{BvqQycj5_5_dL;Nw|7RERw;=)YWQ^iA9CxC=?UU?aW9$4==x75YI<6e4UaL)R z5S1p~K0DYm=Vv|K_kN8i7OfQRx5bYbqyrk&3rk2s6AjwtV!IU?B(r4xr#13Fd>lHe zDp4YMn!4Kza2zSgz`9D8ai1=(ykO^V>|bUV0+2uJJ_s_2_=Q8Qt>!-|7g5-* zY!sSrHZ5R51?5gXmX(6YtauluZ-JWug%%p$*R(3O!#_e|GXbNsWOCa&1o^kKm=K({-{N7Dax;1{L>TGviRU)`&UL_uXc|3X>iR3U z(G$aItwrM9fdW7Om9^f=wiw8jsH%RLH8g14!w#UHSs6tvmmXWJZLn3EVru~C!fLjr z&P?t*DyLrTz})AF>bdD=iP+A}!=<_deS;zXalw6&+}v`0;7X*{k6ei0B&od6ML8g~FfHH%!-(;y!b5iHHyJp3$0UGcL#l~ZD`)^q8oJe9;C&)Ab2=ipC3Uul@odFm(WCoT7hFTkJtG!;4)L?Z#dq*$H~ z9bDFJ2YEijL!8dF6x4O;v1be@5Yc}*t^RGF`v}AA)*JVI8yq~;Nf;N?V>T)Eyr;|; zYsb;#1&Wm=7&6T_rBA zbsbFCbFTqZ&BD*@GQ5@xC4DnnDtL*7_3L-zFvDid}U$fJ*QJ3!ah0*hj@F zSYm+SQ8OwrQcR2wUxEE7fbaz-aW7_mDnUGWDFLc0dq=4)!4$iTVFSkNH9VR@Mvm_I z9L`P0f%tST!G4Ok*7(=G!#cZvC&hqv=6mO{B_02%|2%RH;R>)oD?c@@NRCx#gR^Q% zY1Te@)9kH=0-k#ysu3Rg@cydD`7j>?YFu1ACNGn z4w}D4{x0$T8hxq-#q%Dv9LPlu@&@{8%61@n;exRv*QGuo=IAw`ON8`*$~oaEZ3zB| zk;l#L@RLD3`ofP9Boar|X%FJDeMgMZ?#Ew!yJ%APzl$Io*+q;xg5;V9t(O5QW*!NV zlC_9vi7Hj~&}^+2?>xWFE&f`ixQF!iUkB`s!J|l2pw`uQQ_CU+BgZ?@DwjD4DMR&j z&TBS(RFkPO*4p%)-bRt5^S4o!GtQzE|%;6G6sV{ejuleMXs2K^JZ zMEt*xzSYbvSQd62!;RPwv~Rh=Yk0W~4&sDa0Mgd8sy7q6b$=<6$baAn7v#vcla_TJ zh^9eU<=$QjmqtgSLt4&OlK|H&Jc(?wt_k*BO|672000amL7J&a;SVNL1w2n*Vi#F~ z7voA{PweH-rmrf`Fu3i4`NKbCqgX{zN27PKu%k@JZ60y}IwZ6Bm5Fp%JGlGalMZYUn_vJw?l2ZiKZsH4^FeR{%{?xOZho z8#Rq<)xckGdjnH#8{pl z|Gp1yWVBJ1V?7k;5pA>!kibtupM`%MNXMtT7v3bM^$Xru4&LIol9sBNErISIzUvsB zRv#Bw0;kJfFop-ULdu2-d>S8^fvX>Z<7I0}mi#FGS`}}QB574C=h$%(bHbW&& zniL=xz?h8vHr(@m@@*|fyja`XkF>&KG0im3?cbVb8|7a-q9fI7gT$rG&s6==v=Gg> zeI^-@%VbN8C}Sl)s;7}q;~aO<5~JYkrIZCEm5GG3naRd@;FToh$s#pDp<>m=m+~g3 z16FjIs`UOFthK*vURr0HbJHN1mfcp`db9rud>|NS)P$gELoFrm@p(;SJ^cd>P!Pfl zP@OscR3I|I`3lHf00V2AHr;Jw;gR@JKNFBbe08qdK;`V)l@E_eT^R?&b;`R=P6F^7 z(d)cPQy7ABCp!!cUWQe17CCcdKC~yDT`omWNwUitwPQ+w%EWH-{APR9!V*E(e|<_m z{tKzB@vH%?0#CSGB`wi5?^gIgxK+0lk%@FD7i@nj?&?aQkotUBp^BN_kl`C-Ll{9> z{rWNb%q?FTmA9DUWJmwc)YDsQ1t%M_uhYH}x;26%IA?tk1(;yv&y;d9@H`wywpP8M z*s&iMI|3U7$CT1yY%5BLz5mT2?tlawhpA9{=jj?3F9-{f^>zaHs!3WN`)yTcc?FX^r{^ptOg+l|C7i$()~dHrV;Gzw)988DV=1ZUA{>D46vQOA%0Uzd#y} zsl#PyQ#7+9@q@GuHew>OC`=DO%+J>OF@}z{^ijoPv6o`)C3YyuV`$M7dY6ZfPsk6W z1bLa-(@bHwJ^KJ>fY@6oy&0Eljfnhpw*fpky{r!p}i(2S;$l~f*;srpA0w-?l1B5}@vC_HPB1!U=2(!%Ua9I%w$l?XEl#0o)+K&{9b&!>m$K9xK^#Tq3+-F*SV?5q{xB z{;Va4Yp%2M+Y)Z>)PFx`7;(IMeTiR|eiM5u5z zAvJTyY>|n)a(XoSjcE~u&7%X4A5A@c>66(ut^m@EtDyJekaMPJ!GSGx6U`ALAg)K0 zARO!yOSTU3w%RBeiG5?iTQqFOvOY9=_JSXWIN zYkZKjTVa^@a_cc>Yl2jiI{M-$bN|J~2qGmJ6O-)U3k#^e7!d%F9+l;Cd#L&XOoV9D zRcX+q+z}}1$VKqWTAz5yiHXq#%0B199xhXb+p*BkZEJQIjet7dZO$Nk#Bqnn31?K; zv-3T*t_gWiy*vS{(YMA@NrqNZC@TWUGiONQoHBP!e`md1|}wb3u#4 z44$mnH>|3PJMp6%lj++h~M8%=a*VyVI?-M!R@-EkKfC71N6`cfiVGM=fs=I4fp zs8LHsWTFJX_s(V+!VKc?LZ&}2zg5Xv{drGP( z=^=I>x$y;l5vp6jJdh)ge`Ye`$eycRYi7GaNI6!j#jrtPUizh@0HR=}F;5&9#zjGc zWi%0WiOyN(?|VT9Zm1wK`X&Dvez4 z`uYs@%=n8WK|_mBK$d`S?Pv!41ik6}1rzHziHjn(FvOu`ILj@-FrPaJSmvqRDK(sDhHzia72C;scm->=thkrx> zLDn#6Em~G>f8l75s+~UR%YTW#z?}X97tq)ISu96kO~}PRae^*61X9Qmn!xoe-xfww zvkSW#B2!gGv7u^vg^Ji%gn@-&^LGBYXz+2T7!8d4ROANeNth^J4(6(|FCHxC%+exP z1b*^0^}SO}gKi*&w=ho&hb6n}_+WiWU`p@s8QUr7JSEhN1}aX?2uJm7ea1{5zg(!G zJ|E)~Ab8K_DgE4=eEC;s=O!|*-dpVaPqllf!CvLnr zYI5--fxO7CDUQ?KP}c$}gqF%#LmPwN*j-@yj>Ssp3EyPt6Md`&PKF-B(2%Xw^iQvy zpewyAD@Yr?TbJb?4l(Uh1050n)uG~mtv}ZBKw`0 zLqL$m4JWiauqdI%5ZzexmeKPBt(G{gUAk!g?aRLfHKAE@&CSCLjOSzSBed@#(z5S{ znlaHTEAswzLR>8Tc5emV^c1G8H7Nyv0003J0iMWeSIQtT%7|Ns=m1kN~ODdSKH*lr8VPV!+zrbNcR} zbYx7Oa*tWJBj65F!N?YE{?llRCC>6HO@my$oT#6vb(?Y@m|)3R<`_T@lpu&u0~#=G z@y&gl5$mTB9d|qbzx21#+*;E+AeVzc0F7oQ*k{0xxvW&ND7TA*!j3uJxIIw9n_mHY z;2|CtG;sr{YIAzGM}xm{VZHpY??7i67pZF|mwOk2EtYOTo8Z0!ak8y?{Th^y3Zf(% z2L9tbC{sNq<_45;oO4Y>ncLz_6{WOc_GF$8Gl;oooUU7@+V-!D8=~?7L6VLNGkVM9PU(^d&~po)ZV^qDhOEVtnKC zqqBi8k{zl1Mw@!(k6zd5g<^P>3o`sLnBr_VAwr4b57Z4y&-+3>ncM;8N7l>ry1SH! zVHet_scdv@DxQ?o^c9Nz0RlxLcINusWDgajzw>k&0TZVwjS%SIr%j|+$L}VgF}O*e z?7DF>&hvRTBQYt$KrXB_uNJy*R3*o3%T3iRRuz%c`*wkGG~%UI;xpx4$>3%CsK7*PVI{ z&?D_OBO|b6q(QcaHwsv6kAw%}mNhpl>QDFrVe8Cdcy#@x5+@4| zeWW6)x9;u}@Ak8ZA6HgAu)uEwps*P9)>%fM`8q9$LqSxPNM4q70H2(gV$9TX zj!+>JHc4ucup9HFL^FUN{~c_4%tn^Z?H_;p5-g^CL&VJoTVYuIiI^FhBNh;!(Dou1}5fmo8pX1-;cbYJA5N!jG#& zQ^j6rHSvLmx1r6H(yyF`JZuvVwL!S@C%h@2C)%BHcm4eF@@u`Ct;SczF*>e+`%4yi z!L;S~s#@2uLkve10zWamK{#p|v_icQdyj(5QieJ|yXx|$<4wHQnxu2#P3<=2kjH5u zm4Y6ovD<=+5rfi)g$*^Guv(jMPjZpVJK0u>tPdaJpHDdQe#592&oJsHVg838t_xBv z?7vgk4g1;v{PkcY^*zL1YhFv z?{ELtfSFE(8MlwCx_NtN2x5l(ts`1d3sGp$a-7kGH?m+_!U^jq?f*brZO1Bk_RiX%NZSSG3 zr2@+`SGcV3ph!`ja;fVqBSO(iMp^Htldk~N#E1w-xpVNX0Bv%g_daaSSsvXB$H$F} zhd!yy{aNd+015kBwr$=NXJq60U&p+CAytVR0v*{#$DizyJU7RuEG_cHs7b}1hs{qd z*D|rKXOb*Gc_ufP*OaWT_!s4^hh3X@+CZdWR5Zsy3#YB?y5evAn_A#;A(Eo41C!MN z1WH8qzEOq6rD6|#&X8IbbURRY>Y)T$P(R2VX6#9zxyiGKI%MTGC^+?z+^d5o_428U zq-n~1Hh!sDiK-&6%O=)Zjt|KF42U(-9PRyaAx^x@}#XnK3i?nYcd$yJm?~CRj!vQUX>m3fV zNjbntksDF?sEQn}@~fRj6oSF{f!(H_C={>6%ZokeZyuto>3(1oy@mXy=mI$&*a~6% zQn7?}UOfmEUL#kmxCjpXlThz9^)+0l1$~HBf*bwfG9j*4F)N3CjhLyn4e{kUvH|xF zJyjs|Ks%hBan>>F2OY>s+c(+BYUe=}xyzh~6Mne_i-gXVeAfl<^1o-~#^90@U7rme z5~f%q)g>!-q@g6pjx1=LIQUOgU$`W`jPFu)r$Uh;BQ|GU4DgH|x5DzDHJ970{pTN< z(W3YOC?PU#(Axw9CEhz^L%%&5wsQlc zhd5K&GAt#tcopjJgI_&@Eu2W;tx9Ts8P9sV3#C9{$&1XQHY+?6Hd)=g1AsPIQhtO5)=wCPH5X`{;BSWh+!MwuV@j zPk>iYBa^!j{1NQ$2(R9+H53eQlQ#p$Ij|N z#N~r*PkaN0nE2_e26lH&$mAKfYqa_4R1tPmMI9Fbzg%26QG4pB%5*2>B#vWFN-7k9 z4h%Cn8}7}y?LCm%VR&nODM@>LlOaM|o}T3I9zWkwM=qTIS_y|2Kzo9ALiY{RLtuQ4wvf_+_IZ$ z0X*g@T3Y_XrG1y)eb-FOkEJxFHX&K|QU#i7P~WSsNSRE*ly0~w(RYU~&K_CCr$~n$ zXcE-9n0MHZJ-@_jt9n;hr#LrtSXygOY?7J}XH{IP!yIV?oHhqZ)q7>ce|AxM5v)el zl+@`}w=zfrxf929+Q$F(!Ck@PN#>eu6bowP0h9f&y++~w_d>3nOJ~>_Vl95B`z~%D zWM@#aS0toR4()edd*%Dot4th{ngRv3E@2+f3VGuJ4wS?1f6&-(cou;130s(LrFi-8 zeX}ki?vl4L=-JKvp^+v2Gz+CUJWQb-dom-aB@8pZjc8gNW3&ppas8#XIyQ_tqbj_R zHV;-SF}|7@l#!*bJedW&&UJs)5SIn(M)}K1F%d@`i-=C&bi7 z7+R!gEPS``DW|I)QVPf)Kw&<u!rMsPmU;w3!m)!BmUzm+WxW$OR{0b~K5>S|Yh z5jhgQKK;u3K0eQ(4{GNVs8Y(nVHI9x2U*+B&DVD_?w(`suXGz4wrk6D_Z`iq|4#{y zM1qq;LrDUg0QV?IFJAz4^zNa!;T$!iyxz`*?F>8m6cY2UN0z3FHn3zVSk|}1`HU9a zE+1JX=@IlBvztw|8mucKAasB8ma&hLJD-~1nZ16GWlj`fV{E_eesb}m2o_f4JBPkbrHt-aVIXvJ|gkj1IlpRGq1+VW}>rdui_ zh_*mDUQnMQViV%e|10uUViYu&U@(Ulw%R_o%6^~3nb3QP&Rqj=UTzJ{(&co5fvbon_u zE*mPRs~L416y6D2oZ9BY(kHD?kX0DJ_ojgJ0%?AF?=fW{#C!k%2be*c>`CDdCQ|}` z;cLD5&IoCyxb@n@LEzZI#oxjbajE0VPL2{%> zDY?}rh``M~>K#EN_wcZY6Pa)6qI*WIgE(MttdEp~Gm}w#@K7O|pi;qxmC!8T)`iUT zZ)b|OpkiTA_LqFLw)kC15gIJ44^5*3;k15YIUBhNo!_ssl>xsV;zRiQ${-e87WHXQ zSh9D*QE=FKPd7=xw0}(Vw@^l z0lVW0o+-C6xS#Kq=H}Uc8q#5YEjbk?amvJ#l0aVPS!NbN4C z&km8c1K+6!cIXRGFa6=~F>5Mg1+04jN7f>(H`3F`x-1Jf^iE2V6lz@dj%IbwfDcG& zs~&FCb6I>!2c$?R5Aj|{`nN(>B}*8)B&8J|)jf=u*}H}0h7uk=?CCkQwu|uZ-lI4Q zgqike=ew!zyAo}MWCSg9RUEXf6y&avuKyT|vPxS5R zKZC4G9!G}?%%X}MT{L#8m4Mx+D-(qHC6HaK0wmPCSVK^a!aPDwYd01Ktja6*iT(R3 zQEn)Nt59i6>vPzljrKOC^}l8nDU;T=qIvgj0)7ctm*Kj1C!zJzVg6vaj&?g&U8kak z4;^3LZZf@rpCW`#&=QilmoBANb*=aEVAF0Ib6y{XFJWSS?*xuRTBHyeuo;MK{~6-2 zucQdmqrvj!Oh>ViV^mytW1!EMEwX1wM9g0$F|vU-lzqzn{2aL@c=>zA^jGq9xr4qQ zoiIEXnQF5PMcUrQ`;kqUap9xEN`5*4h}6QPyb)_NGt<0e1qUbg7VER z9OdOYV!15Q^&Y9bce!4cDX;Rc<9*~%H-DJ@ZFmEvm5`ut@mPfvw)DqTlloZx4PnsJY`SPR zq|j?#sB9=$8`?G`ERs#(fKU2|8d6v@=do?d{W9x0(M(3;q>Dy^{d&FYd7hA7) zBZ2Ct(G?In<-kk}uZa<=lj?fgj9Qf!;DE|}q>|k}<6LMJZd8`SmlX3?;tIs@@gLQh z{z&GEb^g8l`5L5jEbFafD>=fk6V&o^d0oMovQOyOD-8PI_ZV>OmI5|m#@0VhyA`<* zanimQO68w=^Z^Zk>qPVUmKb5HqC@k~@R9+;u?2cvI*cDLOUz2dd;Od{of`4K1FScG zy_eM);V`O_fGLn%k?u8zon14x%O`S3B?FU@dZ+bx=YuX=KiRXMM%A5JD#BcX0)GB; zi}QtylApKzc`le`D~o%X9Ma7=Q5Gj?#> ztl>dDONqh1x3Se?B2z_nT;j#g>slymWr4Hj~+LA2JK}8nMSR%ked=R;?clKm#uHYrmYFhH!XToDpED;BiMRgD zm%(^4_UWj9S=*~);Iw&j;S*iM1OUlmo5iegjGUELK^osSu?f({-0;)jV@bV55avwl z#=78Ys{e3Q&+0KzKeSYoVwclkZ*k6_iFm(>hvC0kKItt**Ey}(*S&pm?#Kv+$rMzN zL-JM#n1!5s@_f~{j)Mi}#bi3gONS@@k$nhIRD6l~OBE*<$hC17?i{4Or(dtCXs z4#r(K{BAPf66Hx1^dYh8;!eSz>nTz{V7<|aloMifuU>S_#B$`;turm!`^Y)7T5mr@ zC*SDG3(6rydSP!@l$AA{3XQ?wV*<<DyhPm3iY(k0`(y!Zk?o0DrzkmQ4~qNQ^I_X`z$wpyL>&PUndY?(Gixe_0oNSl(M@8<$!7z^0HGMjC_=xf=^CdUxJi{srQl(?xLQJE>}Qo+gL&_oc7sxX1v0~1V-d3T;~K=mi`oL!(lC{E_y znu#T5rr8t$C$j7g3m0!Kc>zX%8m1I0BtlUMQJlpHQI#0)$seAY2f1UGe~E{RWvv_= zcEMhQI#*h;JNuSwC26Ir7E(gAFO)+S-ar;DWwg z8Cxt}RV(02kIE8iho(P1kov=~;X8yf`i!y_8-_U?=${mZUv*ZPs-_Xy>IJA*z2aC5 zX`Tjvd@~^{a6`{RoM6Zvyx|`F{Afjw;nI&fIC89C&h*)JUc?%)u>L#b${~<&5evZv zLz$NsQW%5XGHEtlQ0|}yoho|b+DxSf)ySk67^j$+7YuGj85IhlK1WaumUkE(s*j?VMafuukdDmuqRP?<`XzeaPaMZR*RKX>7@A1w(U^>z<-HDjN*I5nD zZBmlUi!=BKFBb!BjWsCyGV*Kz66&LV4Iy_fF@9?BtN(nj9*!4fY9}J7+t)GUv2QV8 z3i>8??a8l&J#|}AOIaM|_nvb@CUq=N#bJ+;UhcJBEF5Z+-rLw}24*%XrCgE}*>^7! z;Qn-PWxN0?2$3@SKb))^+t#vA*49*rPZX^NQahp>llPq}Qa{-r+&ffiEvvvd^0Tue zI8R+M`eU99gyP{HZgCsm1DZ8d#=;ZCFd_9mhg)~TU$4lML9*AxQ9FTP(eHS~fS-BPXQ6qJ2 z{bBtQz9rWqv>w}O(>>~)@C>-5M6EbdOB1koPf7ejHf3xGa_w)voQe{Cq|Lpu%6B5{ z!GN(nkp`dB&`R@IB;VZgf^sDab45SRZ=!Kefd%&@2h-Z&c#o|oCavO`=EF*?IfWfV zABAzZwF@K4wbdtU8fd9$xFT%~Zpw+=MZyAZAGU|-R0QbWHrcF`=zl7utA#X z1*M?XE(P+<1Okx03_?&V56XGxjt*tY4L-l16MCpH+_MsGP_XV0`%8I|=y~8bo#E&w zx>KGdM*iv#AFwFM=}_pC)nfmm{4lUaeZ$9gR?BWw@-e%)J-SD9+MoPw`CSN9naz&+ z34ngkDFu?jSULLeT_;KWbV;ijs1#EnW_lf zn*2lZ2n3DZJBnu6pz*x{eD_5ws`Mi{y16ZK46vVZ2f#=&jC|7tX{(%$*-ph_Nwp^@ z0RPp^A4V$^tfFY5O_mF z8KpfdqJ=W+XW%nZff95Hh{IINhv0mWucF+vWi{f}`^i@{pD?WjBF?{0rk&i=|7c|6 zW+kRsZ+v%3+A95*7u1a{a_W2p(jyQ=;bGq>jh7=T z(9|?_h~gZ;NRBb~hC$S<2~H;S-wg+12>m%AM5C8}yx zcMnbJ$!gg=#H}SR$f^Gqn$o73&J~Io$pC<3nz(K#Jkz>WUaDD#n}95~oM*Wqg`k%Q zV2*mEN8Zr*B^RUSN(h;f|{-KI!X*j(`Ksn!vU&7A##S_Os%|?gg5mqJ`=&yFI=lK~( zV~(pWojsiP2V(Y^%z2PF+Y_M+EiAW90;*6 zH{;jF^}zrykt!%Po44EHN@3NzV(H}b#J?6rxc*T(n4d~StRbxMvwy9q@l%4WDbb3J zprb)d-Qe6sT%psvnrY}nEw=ru!MylUpLot#^dwxUSXXDfgzT|`GN`7(0kAtO2Pc}D zLi^J-wu3l?k~0>Xy`{@lAnrtyCVT5E<6`7k!djhf@Rsm1OQMcAAQ~cUS#&E1z@{X~ zo0MB)?M!S4>EzQ$rF5)03<#auR9>ryYVi*`n6Nj*?YaxGXwa|UAaw&;-63oT;{$7e zW6A3~8dFvr{hl2QF2)Q+%Eer1zcx(dxvrt&!MchjzfUbi|D45?j|AjOom`D+iLDJ= zIstNW%>_sc8K*8lKFPBlbS71|$vlPiQRA$P8LRW!Y4S>_$cujJH2AKC!ilJRs3PB7aT#ZIo!j@PqiRMaIaud3 zIQjSCs!xK6D*qTe3t0K(L=iDwck(q#%-9(81;+vTUfg4lD{7QEmG1)tUHM-tZ};q( zt(CM$`c`t@5}KqTv8v@`cLTJPYjR|ON+{}KlP8M^_Iq{ZPN{1nfv)sCda>L-3@mDl zEn|X^hDU?_K)sJZaLs$8C@tJkoH1RQ)+&DqcH9a0 zzupR%Sa^R-4o?QDIej%kC%*RWNK?HtfoMOc==n#?QbQ$dAE%ge_iaGFR&FB`{F&!i z!X4$;-<|uiVT&Rturtzi*3?ixd6t;^K7e`>-msEAk(Vf{6PV^Gh&qA1t_}6nEw>EV zvzXx61kZQ5*xLV^<59anpxf1Es=jp376Ih;r2nvcp}tjzs|ZWw~o zI)HB>p%xaT6lhY78*4h7mXo%`WYRe~QEWI|z+n@wpVxL0-v8lfr9?a|eE}*MZECU$ z;M=f33`5)UJQbQK-69^VeGQSnf(X&REr|X+!@pJV_@TJ6tFCWuhx-i`RJZWIbd~f( z%Dg5**n-IQ{{4J(_6iPA8EWiD>bzCKRnCLY&^>cI@!gjFVJtph_w7BIeaVxBNBXFEL zckR^@A?4<1;AbC3T;E9`0bj~hFP8H^e(pU})p&E4#lp|7*qW8{Y)q~{DP)^8rBa4@ zz!=Ez5Z*fW!~T>USLz4CCaBaT>oo;>1KTepXe$bPZMNW9KQ938RtjiyFWa+RDZ^b@ zYmWMvjVytd65U#j=>TD}DpL#Nu?wj+g(xQvM+tj);`+7q0003_0iP{uSH*rT89-l{ zzN_9>+reN)Nn!RLa6k@NBNeG;Vc?-p*@d2)9lLlz{*Saw%o}u-3+7GRszKp93cl+^ zw56E}hxHm#Su?N;9%}ey%-A6*yEwae2scdz1rfPkmqIazAOpIGh2SkN*DRYxcz^Ed0AdJ zkSt)DvH;@wCh7#2JxFe7jm68}2t)|DPv*(XJ%3sfQuK-;m5-OXRBg~2=ZMse>iR8C6%52-_;}g3TGY z-&|i4bURw)gu4Q8y_i`^vQS|-M54E<&SXq~Sq`}?m;I@R^VuoBDHbx^)x zA&JzwF`i69PZ1C|4M*9>@W795f$cX)C;d$+`42IWtg~4myr#=1(z>;Xc)S}%DKN-0M3Z1X2eTOa1Nm?Wd;n=5??hM^#h+);kDnl@`4nLo8n*+@ zr5xT4W_WgKuAgAk&aGh&xi0vH?w5)mH0f+&{!RzI|Ds*Q2q2Rks$L=IB!MpOzS{4$ zr++3~(M&{ZNTaD&rj9g1T1Pw*?24|(9gq0zjgTw7Q~bi4ryyiMJ~*to9k-PLTzvT8 zWe--(l!B`NQR63a$@4L)FHL4PUaRz->i0!bRHwQ_IDI|^K8x24xXAO#Zv$`9|ALpN z``gC+$s=6N+-!G{a(#dFL~VXb74&h&RjHWtPIU>Xy6O*cmRkFCk=^}=nXYgFrcT{bDIMx|z+~J29i-)LM6_MR&KUT`xdmCn z+Hh0ja~b`SNr(#Cgxa`MEHg1?Y`YhlpX*Q`N4qUrAs9*037Vt(MgWhL9U1y7^~;Ej zMGFsUCg{Bx-JS+gZmaY#ty4`eVv}4#4)ApsP@C_Ip#KKgZ|!LEDVCX21lbg$p>*1^ zkdFVS`d=Q)9a|e*>~#P;%D(#LlseM}&EfShN>`&rSRtOJ&O|k)h#_KDm+^Ynn6txe zFB@wLNM*-LDXq7Yw3kv#f@TvSg5KL4AgQRexpc4$SbX2vty41PtZ$8LJH6dZj9jbl z4o27>=>QOfqh@|8&#M0mr=Z2xb2PX(jM9n*WC~PmkDh`oXNkT+A0+~*;qXt?q(Bq? z+q4IGG6A0tXd-sYgY@s3g3nH8CmFwKfNgKIoqokXx0KTl{He{kbCSWyaCA9-8xAyjB{vsi3);X!v6e6$S;(vhR}{qzSb_nOIK(4@p-rHnu3_MX&}~! zv7dJ%uuJM5U(DOvD1LB#NOB#1Oqs4 z*+!td;Nz>4NIpREHI%}RrNA*bm)&zG%f z~2323P*gGX4w^*bp?vESB?Y5rOTH&74>RRekg(;+8Ta1awYW8$JIyY+suGTuR_ zh;~1hqzC*`^VIdHJyt-vXK<>9(WXf&KrMohB0_6=NncV$-ReB5j_PPD9JlD6aPEr; zJQ0~gph##;2XdDwrbT{_6F9)7=_tJtSJE%dO!) z*ob5^@+sPNMaWOCv{bu2m(iJt#!O}JnxbP9oauV$$7&_t1yK14m8_%@u{R$eoUuUO zUmBXCkRZmxZO%T$QxRA0HQsR5w6+4DcLIa+$~aAb0&$U_<8BJ`(Q5B3uvLZuJdq0j zRqK1c{4*?#q_EEKi6%~7;+L2QZ7P}uM2{PvmzkVCia|344D@F8=d-OTT3BrTGPRaq zocOARFqzJ`@_hE?JVAN3dFbZ03Z_qW>*&QEQsrgKbd()S)CAD;X7`cK;kyK%3rc^= zFIgqEG-Q)}~jk_y-`^Rp*65Ay`!m3WnXQzGq1awl&panvgT(|v6WwkDJ zol)(A&Vz~?gRj{5xu(GHYt;rVy*2lG(KX?*iI5X10?`unHMLT^3d{A?(G`727pelH zg?g-^2>=ehC^F4P9Tuh5S`i#hYBc`65xg>=-OQOV`a2S@LYkOSRu^c+vUDH%zHvuS zj)}>?WIsbh(M*PE`J-9UyO zN#8tKm5+1@_v-JZa`o=>sFH=e1sdRXs}YnbCCJzl`qKlt*rMeU)tKH(J7y~WT5$RH z%1%N`obQIrDj9IKMq7~IPTs9_6t13>Lc_U8<0771a?Yz_;x{QSIzajl)y8i1`70K^ z4`_@m)L$)QyfZtUxk)@iF9eDTAO{SH3dOD+?l?8<^l17R6|#OCH{pB*p$UE_i;Y{d zKs%Lfe-`MYbTpT1GJhbG?&WYtZ1{wW*9EM_^QgUzfC~Pwe~YYhL0baKz*bHU8HsP? zTrX87h}TPq2FXHW8iG9+jW=12*5eF<9k5)e7tuT@fO#d8I>in_knUn`a3e;#kmUdo z*8;ez4kDne;@e3lGz@%*044u(s(T>)meXR%!Cc+j^@#^|%#O5I7yJ8h+$2zmx=(Dm zR{ZFKpW%BM#RYC}(euACj7?|>+NQ0O=DlrsE(NiYPPf`u&`xcDZbO(Nl>Nd{mFfsS z_iZGf)69H#|CFg`UlYhELao@i0iAn=%OAFnV>fyTT0TiCZL(4W^V2=IHJI$VmKPq4 zM)Uq29AAlr(msyoi3O(A!}7r$BscnQ;<&@R4y~bMlehQr+(30UX%)p!$;rzPj+PBH zS6u8u4qGy0Aalj9{=4nM?xAzcf+l5Dx2c=9)MUY+*$psK%hTjxfTqMhFbF8Co5IDb zDb~I97wr?C%xX>Rbu8LNTL$=Up3Hw9@#o(cND~kc`7MFx`4o&N`_MWreyddz=NY|k zvKO!PGX5~1-`di2-0j4-AD7)fqe?F?hl5b|Ei{HOo?^;avh?4z|Am)wR&HI>i|0*J z-aBDIcOt>YQmx8;v5}t5!q3!di{hLM9fhpa;Y34cp*_T0VMh8t7YiVD6z(x;Q(rVc zS|}KTG7s&NL#h8_7d(dCz6kVr>6Z6}h*a7)Y;a7us0qp&+9Crd7|s|VQZw*-@XOD!+nQTLrsAezT!7Omqc+LmD!Wa-4^zh*6J~S+?i+;2{8li3XBgIo7=UC(^@V^= zUs^;;f?<&*uX9X56`GTpYrB9FXz0~(DB6m9>QsJP?{J31XlvHMIk(E}%&5PVA89Dn zF;ny7Kr&J+rP)MIBL8q1?b03Nd$sM3I(&%Y6Im zg2^6&Xuk`~YNeszEXQz<04>cKHTf7&WDUrfFlB2@YBac5JYyU%U zJLQzBH5&H*)rZXOx@V1i_j4mwV2$P(LxeVd=)Q%|BbS+^^dRZ zCFv+`Dbm3Z?YWiGjs%D(g)Nv#kb{z{WeNcx-JhM zrY&7DTJ5JXpPI+NDNW%>5&P)TNYy8P zXNtR9UY$~qds3=PM@48dAseq<6c9cRD#UUSdbEI;_M40RBB$}2jM}K z`O3Rhv{o6)p<1cVfF!*&{Nc0L7)8)rZEpXr`wURsKZ1A)d|$Qh&%I_ls)(Q~f2qKo zGq3#Mu*%|L;P#Fx*lCJo&56TE6NhJde|;i;B&hKHoQ-~nup19qg16JNr58cSqNSAj ztqDh^5q-9bvMl~&K9;!WFN|3IJ}hr-ZS#LWFy+e9VvaiMD^7P1mU~WEydqxL@5}2H z#UyDbsaDUhg(1QtFc6a~;*DvPSfvDn)yd!Wdea9DuGLHvqu;0VI=sK5C^=O?AxcGz(|F*mSty;q zwoIzXC)+yl0}*J|bB{5~o#owXA4_cBanQzHoC0sk=csqv=l8^cu$wH2C{Q==9;kop^{Y zG-k$9Bu4@SSWZv6!#G3S23IttqH^@|1rRvC!W+*+ z@CsB4L7+w@DXubj;>SKfU5yb=U3vM;$fG+Fw*S30 zF;kix`hEhAabyp%?%L*aIJV>2=Xv=T_8y9W8k)7l+7`bq+44eg^Q4MI(K;*PxqF%ZjycxzZ0xnsSR+*~y7g>OJzo?$DCQ2>#B4fCz9r7G*I`yg zgHYRvAr~dwZbQzgG@l+`BOYwk&f*l$z`JxI0003n0iRE5SMRa+*iaEr_o4rDjvyWg zkBYzzJa+W&Qsq5Y)pmJovp27+6?1X&+`*-BCx@75C{>hB#~9ltz%&sjdH3;?cl;Z( zZBQ$fq{{NKLM0dERHV!4hd=SynLZf+uvzI^HD%R*KP^QGveY&oF%d;|cC|plP zh+KfkAlEWU{U8f2sGVCY91PvsX|T!s5>gk^0Uy)v^920*k7_Ug_rWT3?l#c^%xM!) zgQlkl!!?fub#DwF!hPwQ=cU*qCl;=DA$fE9#xZSEmXdt2;Dz}&&aF)XJ1;4~BoV;M zI}kd-l~gSV~@i000i$L7P%Z;SVNL1w2n*Vg;6s`Kr#S5>vpj ze&Q#3-#rvEtrCRSDz*mYw*tHb=O3+E%_A4%a@QAw#kKQp&%wX032AX1eZ~BBCYwLp zD^E#f*F^a30E(eh`}sptLg(B<*MsDfNRw)p-JdbOc75MIs{EEiuxu9{moC9o2qGp4U+M4*R-*xWy{FnV=4wdL)QdLK6xG7+$b@E^MLL3gD zLW@v4ei@oA4zajiII05L+-<-T+JRjNLAv-4DMB+@BM(VhVj)rLMIG-_E6f4QM>q0? zuVUe0=^-y$5o2fR{sC9FLM|^_lyj6?61?UwJ+W{`f++LdGJhUp5qC@}4L6%Z*gH_p zu+x}QK`95*@FQ5M#wSfqtmsaC|FzgtND*G|Jzi){yi%g@4;OCM6mRoUZJO}2VuOW$V3Uk`}Ts;uy6!F zV|VHNP9W0>sdDdi2@cEIGn+hxN8eb>0=Ij;0oRo4F#~h^aAjkLs{RrY1IS|5#4tg? zufN0GOHgk9{SDl-*@zYO5Dk)=0sT)`T9+HZaUAa$3k1aS`?Jaafa z4?st)(A!84VV9AE&hsa-iPX0ML&XVWGhaH2z-gIyz=vVc=@*ZyBt~&oTd?tW zm_&@{)nX2+VRT1{G&s$%>SJ|SuoU~B)6`Za=RvQrn7G%iCDWprzomBYqpK=hr-e}$zKxfHO9tKG(uTUy9kK_{;f&tmFyAA_R1HN*6Nj0~bK4`5I$g=3qN}{nk4Ler zecRzqA)aJDOl6W45#pcz{nT|W;`2uO2E}VaYkb>F6JT3LJj-*eL0-czE7(4l@D?P>A|iI z%snpy^I)YnN;UfM09-+e-bQ#}XWdC1L1WB4*1omQR6Q^w$vh3d{Q{7ojB~|iJGoC@ zD7J;eD)N`T8kkk1cJ}%XRacpG1lXhA$I`6r?BZIdS25Fv}Kw+qjthAxVc{SZo*|2UP+=qi+g8 zA*$(+lV-BLm+99&S<+(CKdb~v1Vh8w8Py?nQ|VhMG(uw{OnTW&xsiP1{ib$kf{6%? zqMcvTA)~9vNz4uBX;h;VlRA?(&OaNos5eh!#RH61pK@;p9HPIGncz*?h8<&n-nJ~_pZ=#$@T%Tfb1opMQ5cnUt3h}WE`~Nem z?ba?sc8$nKKys5*3=G2@I8A^So0!zJv?ak&`pI_xalR}BpYz- z-9Vk|lGb8dd?}~RTB&pi5_(w6Q$tGhBwt@PpO~4Koou%}1jhRh#yvg_H{0fKDIu?O z1zwBJ7RyhJV_}zF3a^-RNGHXV2B{LhIyb8MI~eTj7}<$Xz&fosqPMQ;ET<79+}0=- zJf=J&F**1wklx+){6M&?_}oXaaoK&k*e8T6S> zZ|Cvj*UC^$i4CBW{Rz`!r_UxPLE6r2y%k=*N*;S>_sj}7} zcA_cx@#Ftsddsd&AvTb}sYe7XH{`UM;|QF}_P%slPfW=ox8&3IVYFt+Vu&6!Fvc4O zzt%Hb6iV(}^DMgp&e%V@)|T}g+p)G~9T21^(Np@?udP-z@$19$K%?V9ZZ9TW}H;U~RI_F&Q51H#GLb`z+MNb$>uDLXjirATi8gyW-q z(LmCkXB!s5)5HsY(Zxb3$p0!Y6B3A1{eeRT{gAtZqOQ>2z7@y1iTU#iB~?evyrG!Q zWMV<8n+0PT#Pl^{PQ5^n?1JPNZN(WhJ{9&*@|YN`VvHO1!#%BYr8y(u*vBp;Y!Tq9 zF7kr-mT0#^Tf4Q5F&e1cyk(gPy{}MLtyV^~&&$_{Q76(|FQ8i{&3h>Mt}CElgl$U` zS=28a6C&l}nu0{?0fPe1NbeB=;4h#-xXx16JN)BJ`5qJ@8-qO@=H-#h!m(rZaeWA< zces+&rv#%$1f!0o8&QvX7jb|hbRTwGDS7AS3q>H4{yes+;q;p61`!PV=l6_);OAbg zHE5rr0Z`uzq^hqie?(n#Mf7xqGbNi&GWO`_GH-TMfkmsG6=cJml|FDc<0v;7kKMC3 zd(%{Y9hnQeZRxU7o7+(QScW6WXh{XJ4s^9d|$9`-*Ih}Vxr3ks{2l~+19YvO5I2~#tYHorQ2Q69T>u#buF}QX*sxH~O9_TM-C;=N4 zc%tNHZl}kixAKTc@Fx*M^DOC~a_21tRIZc%%MH8EZNF|7rq|<fk{MnDq znUa=M_iMi(=eTrDJflG9HM-++5!OPVuZ$Vy+C*0+4~lh@({?sZ-Fc00!zmVv0=(O* z1Xd7;^aA6Cn5HihnPnw-?D8e3DCd>`Nslp*YMr~NmRNu%ba>Z(yjYe&pCV8{sO*|# z`BCE1ZOsyv_`0B#VTS%_J6qo8W9k&ux?f?mx^q}tV%;Y7E?o{5=E!KAW=D*sH$22t zS9xIOpbhpJCp~PIREhp0@@2>nBnBi(UC~@vD>_;u+ci|Elgl+ux!@J2*}`yQ$c(4Y z?)h6*x?Vp8!VxhLB$aOXs;cn*>uVlQ9L}#{-|7t&n+kz>ono5V*+X%4NmAIJB0nix zYI~FgWt;EpeWIeQkvkS`szHgl5PdLi?rwJfYx7=ON*eu4$pVL-4e(QbhyEHZ9~CYG zFCE7z^=Cr+F*s7=&9B4}{R(0W5NAgdx|=!McQ#)4(C99xXoapIM*xQli?X&Wq+zwB zwgh2Jvvd09t^JX0h@^K0fVy(GNde~sa$OjRZ3RtbC+QvTI!G6$I8ZFIL5aC@33m8o z({C(e0m^6|O%}EtzoC*z@{r!e#f}K{E+}pEYPOvFW34Al%>o|3Y35MOvY9P`1)X1# zGyvZ9XC9ltkenbXRe?QUFpMEDCcrVYAl6*C`HBAI87u6J0;&^m)cGNXEvq|Oj)Q7GguWF_kgB@TsW!WI^Ns5`Gjf%uc% zk&kFA{%d{TS;jm@8Z6O;nDPp(eRs2o>HJ^?|rJQLPq%5R8?7n~GtAuwvSe+H(gy z$=#aUJ{5czf4s&UBV%_B{%oM0=xBlA*0y(XqL6FF3BvR$Wz0{wU>>{R>YHMT#bQ_5 zh8@kb@n7?sq)kfldICr6WCx`ve(T+sJLgN3?Q_4++7-Rij0E4Cs!a^JpKDPE375M!iqcX2u3g`n8(W`IXRAo-#+S)(oviNlO3he}sBn zygs%Amp|?%JF%L16D^!Ew`+@ncBelY(Ljekj*@e^P#e6KIwV z31;LZcu)o}gZFI952@Fj_!<#!6_BmholueeEKf^oJ!&jW5+qsgOJjlEwL@zX1&VI7 zeyaWu;lyi`8wt_)i9TH_79ahZJuPgocD{5-4U8XB)4^a;hGNk>3 zDaDV!7$TpG=e`_M3h2T%P|(TEkcNl}Odl1vT5cd&Gan(IkLfRWWWk~?wYD7{Iph3H znVN1r1&0nc9c>>oB_}PkO{+or9@rww000!aL7Q_+;Rr*?(@{Voe{LbI=$J~~bh;=A zo*FK>NiNuUhph1jAHuMX@uIDeHynK6fA@+Gbrv%0*m;&N+uQUQym0@2)2n6@`ugEDwlaQMjN_ z4_vN}A2m@;Ng@G1CS2*zS)`&opq?t4onvYtXFjgaVC#9pbn_&ZAwlJLU!v8q;j6Y% zK94`kUk^(_7jyb~*{;d$}H8 zGfo`g`?mn#yDZnU2p&^E7iqSC{~5D@Ug}~kNaRXSmH8GClirP78TD*@lVQX|NQMXe zI7Dx?=z7@88bIEV6K;N$#cgQtBAJZ>B`#{DpxS{(#URb3#@xqDCQl2nmNPTvwh;VZ zhrO19H%dWY^kvrX*^)HNQ`3$Ej&X71>NlQe%LK~gso!T2QRsas!O$VCn@mm$7_Y3% zpI)aX=kLnjY&8Z-j74G;hU&{5vnkg`%R-#SC?mlK1Cl*Q%4GY{e7HBl*SF zQKhSF>R$vn(bKK1kk>LYbn_B}C6=#8qRc@Or~o;O5jQPuSMkSBMR{Cfj2TCy6eC#3 z%j&aQPHtDQFTRl(Ppxq8sQ0JsKM1-SE5l^<{*w0}%FlKbp95h_BZXX((1hvfLr{M zd={VPe8+f*sc!p~1VNSdmj&ZyoWq|Ot9-naZa@*`M|bah9@M6S%{L2cI_9p>Dk=JS zquznl7>e7=VM&jqx!DT)iP-14FbcYCS-8K%5=dB_1kehu7uj-$rTyhO+)owU?A$2? z_cKk%Owc81&9J#OjDN6jB|)YLOfXn0NRS6fgHfNgm0L{U4S1`xQ3=8V-;V9P!)&{n zP#BXR>9C|^T{Dp`2gJ6Rz#2e3+$+8&ZZVnCCzCvdJ~d%fsvE&RGV&cbckd`nmSNZK z&H-P~^27bZx|9LMJVB-@T2y|CNVIZgCJ2s7zCH<3iH>v#XrV_1daT^XjDJkq1N+w! zf;VX-^~T(#ZFcK<@#ujF^p|#EAhm`z>;dmsDIPFBrZo^#c!A|!t<|}9nf+{3^x1l# z0C+7;xdYz@Q3QUR+CF|o!6Ihhi~MU%Au4mPtsWH0V)`30y*5W& zON$De7*<)cC8I`nsiQGRXIPn1Z7FPJDHA)&xK8+#K7eArK_l!wPQ4j?wWca}0&ulU z!6yI)Jv;mH{rtPI+qxW*_q(4}R~Jzm-L^RP8hI)v4ezq2zW z8TlKK%Ro@@D){jO|KI-CR!_;cp9 zc^E>crB0aguQ2l3*jf20?92G$8#-9fHO9P+$q2o!URfDGa7)atME`n)>M&Usys?`D zK6@g3t=rce`zZ*n{j;LuP%=UF2S1NLe?jCc;H8Mv*l>f~f5rhD6xZ-L+w(*wc2nF- zHYBcl{;nZ%pvc3Zcl773DV-xo!j+ zM<8QWB@Zkh9w`1#-<+IM*vIW!`=%Um9vTZt7_WtyOp#&dFi+!yv-F&06!!wa8t_jDp`ZcnQQ9$jYc-aFw z%-rnzcyl$?kR{Wdp3f8x)}B**RR&Oi)p#|TKLkTb86ikik&-m)7B97Bs|m51b+bG& zx~+J$5CtFp)2oAWd#d%X}V(k&M53}HccTLh=5B|RhB}~oWeM&(5v+D9*rzqR^ z;*M6lz^#6rdTnJqjP!xj<@n+PLZlNX-|<6LfIE3-ZKuWlGq58K*|did7S-YltdIKy z|H5DaS6l25ZL$~O*+fhOR@ETLk}EdQy>fwC6)Uw zDa*e;2-JM#rRYn|{!b34|NhYf*T`R1Uilvf2+)_+`*^)7b4f3*q-xE?-lsL6wWhsy zYU9VfMMSO*ZH!#Dd^!PdENuRtNCK)J%J-*C2T+tUB}|_>s!$<*+tcfIDUYhGOX05o z>OQ;ORPhnH_w^@MZ{yW=@`>$AYDbFMS>i7i@?gF5R-ZnF2FiNYkMD*yP*X=ksPz`1&0I=B>A7h8+n&?Hjdi43k|7J2Jn2y zTEY8II}G1w;(4jAj5e)_pj<+b4g7tA=ig_KJ5`uta~-dSEskP`jVguFUVK81#b}g2 z2foGG3;`j?zxu@OS+?%nVqG%^h~ldOCs*b}(W#wUE}s(VRjDz03V~Jn!6#LeKD-&P zNyDJDj)O;4`r?$|*BnqNMl~lDKaPGH1!I8(86nmo2u0@BRQ_>iR!A$&?q>rKFf?MZmP)KFhRe|h_Te(wL;k;N>4cS>d92^eL=3w}AZyU@guzfaA>r>Y_?a0?O1R^yskv1S4_ zjr-vd=K;MIKZwDwMAeP7=QYro^zTsO;;cgy6QhCQDP@GYk*mw_eyA6$)bg?G2?Ago zELFaQZ3?q+tG;4PNP+3_aaz*S#gaoU4~Paafy?!wb?fd+WpA>#8XEzY1(4cCKA~od zx4pNmSXGTU<5pmxL`g=@ObJJ~;T;_Jx zq&OIw{}KIJ;MvPj)Yoj${Ny>zAeS``5+NlJ`f^g^u8W9`gy@t8uDQyvR^EL3lR5%mB zuY|EO$m}@t*5jCAe_^Fk-O0%|B*Wvg9b`Mqsph-L2Jz6j!cC1a-Y5<<+rKm$Nc6s% z%d1PKUN$n?P2JZpz~@!D>}uC8o2gr_zyH#ZkFY1n(QWw9p2--c-;z32=wlqMTh4g; zu0zh_MehFIFowI8FQr^&*o*sA_0LFQjIk<5SRt0i$lC)hcP6SftMrhqTfqL`pDQ4i z?Hr%&mqJqRLjLsGH((AAQqdn?I?kSw;_#ld#O^L2*oef{Ht`1z@!Q&d0B^J->lt{0 zCfwLUl*yy3;DEJ2LJnx;6Aq$Dz1YiI|Ln8axvmO4I;3#7)HTZ+N&AFZQ zN}D;L3Tjpp#KZu7rzM;&f(i08!)zZ&_hiI3+%69`F+*cT#I`TRZrrRoqUAIX0<0C- z$9-C!+M|!4tTKX31=`wu?abY$K)~YHl%9t;0p{E_0PG&l);=>QzBfs)%_(=U$Y)=3 zWI6w9We*wd*p%EBD11o9)5XwCaYK6FIjC+}pvXR*0*^iM!dh24iY64r3COe1UcZZZ z&1!q$8-~+aUHzBFAk#E0dt?Q|o|8s4S_XzNt<3$g8bso*mJ|_XD)v^osp29un-b$H zbh_MJfU8K=c-LJbU$9rq@DERlNLf{RcMPsg6Dug%!SsG%b@?YMVdEa*7BN0!N(clq zQ_;}Cor!%kF|-qWEVi!$j771xum?IY8p25aF41%*ls5u>jTj%GcJc%4SiZRg4jk>+ zxa?6&gQTQJ~AM5{CZa8fq?!2*LzfR`3L8{kD53ZQ72l+ljMhBN}aw7G6zZ{ zR|~V41~9)2YIS)Q(t!)^2LP_JTB3^#t4dvBAo{{Ha#AxM4Y%nt)0M2OcJiqnw@HA8 zIWCmYbd|a_G7wGKJ^C8ieXI;z&1coW_PX=?3$G0AUq?xh7Z?YFm zB`i|XIa&J!cJ_HG0kc;6XafO{hHCtb9~V&i5g{`Zdz-vy@BENlo)SlxCET!SWffpz zsCDP|NGwOvC8$!TyXJUr=PLPaFYZ-HR2AZtA7WGWh(RrA!#)pRjWmZ>P;fdhdl@RlUbkK&(c3qsJg_7cR%6lLt#K8v514y~RL~(N zY%*s8fy13?1SKif9VRl9my?A$H>ZX@3m}}TdXtU777TB6afpW~j&?L8+c9X@VE-jT zvE9!sx&%Dh35E0t)Ajip8c|ZLDtC_|sMR<4g5&I8cYxzWkv2z>-Zp@MVg=QE=GpB| zIY98jc8Wx1Er&jTwmy8PDCP!*TI@3G_*PraPK7P{brmtSiE&sWsTpp;0 zIXb0p=Z2HopMh;GoCXb9L+enjUL+su^IR1W4bKrl9@~?^UJFdSSjowzZxo%!^HcpE z_wov}a)C7kV|-KTM%I4av3k0o#cky~@fK))&BJ|jt!nh}2?n_{Psr18O% znw*aZr6<64Y|0$~@2({1k;)C4ZB`C(l7*Fwic`93^^AF`5dgP8v&LSmdG-byurrCD zv5+9rD9(Kq8Vj)HLNbUKwTQEWp8mnFfy$D4681pa_hP7as7uwT9hvpwO%{J@+iPZ0 zp-R+(v#D+v0HP_G(?WH>ySOB+_c<`)h5hxR&#T%x+)7x{h*^mG5l^ z8^>4s<29{XH|@7I>~Z}>mZkPjIE)1qnCF$aH)Tn?MqLWdPBr(#>NyBPiw|i#Z?UEr z-PXcN96e(eKTcels-jap5)_KzTb74TEKymA z%Xsb>t0!@4>=BB$>VkIO(efpo$^o|7r}u$oPD!fK!>^Bjo2JjdK+fW9GW~nt(mIyc zEArIes#7Tuxo1>ega%Y#2e&@^YvT1H1R>pgfVf`)v+o_$xQbs#{c=+n$VKc0?FGIH zHe07v`x31HinPmPy4L_Gxnu_$P9K+d2*^E6Nu6} z30kQV}Y^7>J zaP$K!hey+n7c5$_=9{^kVoV9fE7_Xvbsfhyx=%#b?y#Uc2H1s`Ah|K!Bky{%y6e?; zQ)lW2<1OkP(idxV*;*iT4~XfO<_$dVk7zF9o@&>SNqrYnS`=^XdohOQJ&?Vt`@dt; zT#q{5?FE)+H%C(!DcM&s__MZxu1}SfcP~ zum^dK)XW6?Fk&V$XWCJdHym_?l?){|7$OH*BPWFmsGG|3L(q+~0W>jvHUjU5w&98qZoyMDL+ zIpn8LY+pThJN@ZG@Qp)XtFm_A|Als#DH&GKQY55UdtC+W6#C`P17c@&S8$vL51c4I9XeEc%oDb) zUz}|Ktp+y1XQOnVFz1MD`;u=j(dT_#xs6ZL6sk@x3S%1W(ldZ&vst-3ZTNcWs(nH|!r^hzF}NgrKwP87{4WB>qw z|9@aji+U~^`D~>mY~c?IUFjAo!1ug>zwN&g{6aYn<6KPn7glHV2Y-w&mgaa@ySB$u zvsf97aIqq#GE5!nSE|P!#=V-u7{`ByCDR*BXcXz3PuA6Xc2<9clip99mhGvkR3-`S zowVlK$@rRcR!LA!*LBoNxjmw&+!qiUsU-kfpI}zd!|IAU{~=Bd1^_OlrmL~nF17Qn zgZ9%X-^vLaa3%I!pwpR=rbf*{K&_n^XF*lkZN<7m(c-{Xlx4+}*fT8=$ve0wAKigC zysiEPx!eCwI)E&dB7igHABjtfD36Ch=*>oB70~9Bp~iSgZvBAoFZN;kAePENTs3}R z*%wZd@Dn&9E@F3lbe^?xWo-t>mJsOHUI+^;ER8dFLO>wE(Zs<;BR4B#j(?z<@ zBKlT6RUkr;>M}t#M($&tN*FHw&)x zGASWn7~=Qq(1b{riZqHLMMM9+b&a34vYTdQ&xryB9uyk^qzbJ^Glu7SvUXZff=*5k zDqK{)(HG7|3Sc(kuY}>vnBwD5*@0P^NpEmQhOU+KR|l9zZ*UQ$kZjr}`=I`~+7#8l zmz~;}`-`TP9A~Wj(r6S0x`XnkYD7I`*4)vG1?3;hTbWP<%?|)HSJxS-~cd18a z6biQS4Hwu)Px?LF11yrqF-go)s&v}dN3sH9;hczHC2&|Pt^ZCgb2KFG8+H-YM%-<& z*C+`xzFN^Gu@!?(K%asv7p9=0X|3HQC3q?^pE%X<5ZT`#R1TwBhTMZl(m6 z-dEqeo;Puur4MUxC?GkDzXM`&&LOx1Y77&q+GYfex?zY|EPEgs<#|7tT|o|OM4H6! zmy&@K!!JZ)8*8VExu2@RXbL13<^7W}+32O<#5me>0X!|HDy5a}*y%_$JChYWTt$$D z->OB=kOu2}OZVxMX{(kjT&~kig)dFKlz-l{U~Ukg$ebzHmwAeD!ChwgG)U4^ zNQ6`;mtV;a8C2%emIiwK{E}N`O`?3j4~J4*5t8JX=XnnmlY4822#V^Nf^b8ctXv*+ zmMo3&qv)s$K*l8Aqm(YQ;3sWfX;s0AmTdZHK_R46(;n*9{V z{Nlg*{6&wjhJxB6fg^A;>=KL&13Hhn`87s%b_FnTLk$Qfm@0b;dV>w4*5CO8N1UxO ztcz=kh$tLQ`tty9`45F6Ra`1d2t{49s$F?5BZ1tqh9ZqWN<{uUuPI-2PXJfJ2Zb$8LWlS zb0uMSIpdr>Mi)4b#4mUB$ORy^RaucEi;N8TL!9l0#Ee}t)ITpDR4s38MR3ofK>iZ` zE>~+!baIdFnaHSQbTA8ULNGg35oMoOKEWKk^i)Xitmxd>TkSuKY3vs&gg~M;Cj9T- zXTK8+GYh|Djh!}ZOfPV&yn}-HtXu71a|h7bKEuoaF%6;QC#=e*m;g8R`cf`ppBkGr zcOtE}PH=m~yU7Wh{G-;12TstKG`6o;2;rWYGj zU+L&F&9LtS77hH3NRb@ohuo)^O<@7~aP6yHPrpk)FCQ4%yZEMzF)Dg%raMAj2CRL2 z1KwgEl$K8jpmhp9A2?IwW`#9Rv0^Fr8VYd`L`a&O)Rwh;e(=rarYd!%wi)U@!T)G@ zCcDcc#*c5yhSMUfP>3O#wNwhsM=JgHGaF+7V44Z+7`mIaPs%EfRR$^vo3RLDL$7FT zWdEA@ZsCGy4XITVZdn& zUg&j(&A|Ww5c@%!w@Kj-CQ}7GU(Bq|IvjwWG*aB(F8X6O)H5CNn1k01ll;R$&;5bA zo@bbvK03*zlC-I0CUNzodw;R#=lX%Fu}2gyT73ZmyVWAuAAp?8TAHYBiTUAxpWG4? zCG4L*xgvtV(LQvE^@l+^|BH{Lx^ffNS!@o{4sXVHDH2@!=gPwAlBD$%dgz891OmM3 z9UTcbFt^d0AX@v~g)Z-h<7FWoV?TwF2ifjA=BzZ2p-3&Nw0`R$7qnk4b?1?u+jDe{ z=?-0A#xamMs|45_H9{GgYOh)m$TyeKYF_meJ?B=Kl};NQ>5$JyUbA!NH*Lx{9HwMd z{r;xgJ#0);rXq$poU1ekR5MOFz4ZfgjUDQie1Pq^1&lLF^m5ak^D0>VY&&^t@+^1{ zj}ps!sw8OlbO>_1H9)r_>pvnANGO!F>43?aI=Q*)HvL>&g}kNn5@Vb-Q5TM_Zb06_ zO7<<=U`@6{*iY3L->wUE8Y9bn7eBct1F|7w2A)lA0-@Ro7fhqjcOhHa0N>t~Zw%o` zg|++wD3T@TL471(Ps*LtDpY@v@^I(L$IC4<^cC`4pdW=V>OSH-IO(`Bt3;m|Na*jz zQB$)@e#R;01S%_qwR#g2AE0+n)zErnP70(RFinj_?CQY;w-or#!xg1OTMn`eN%d7S zM*m`IREDYxunL93MpyV;aYS@fe_l zvU%%Q_lIo04@|#yUuA}Q9lZ`XMkzI7Xr;R(aPmK2QrUm!_6&8gHLKx}?Y_eV2tQ2V zWSMkl$EiaQi7L9UcoIO^I^ue+jQf6HZ0dMVfsu%<@5np5(VDP@lzv}pE??skJ?dl} zKen2{!)=hO61tU(NY!4n zZehHJn#CaE&O~~50-RIbWQdE`kb)G9my0#wQQb>Md|-*Q7<@^vdB9#=d8LG?STus zV#x>Tb=_`hTrx;^$!G)v`6Pt;+OEGYUSIyo$|3A69#C*=MiMuEL@hwk$qd}CfX2Ft zoNaDmN2`Ch@kchy|CKlu_pve_ z&#`EDa)?Md7V=p_D1)OGDBk7k6;)d*h4fCha3m2_!LfbJZziT_J-5v8*2HF341Dkv z4c71u9-~{9D2mrRI|VWoc`gzHduBLauu;?ED`~F`)hTz=Rc^UY4@jF{HddshMYNB$ zbNs>f#zukhtXb6YvS2iS^-M|K{e4_!et6fm5_|#=INU)UrmT9ib^iNdGoes||9XwC ziO$OujJ{CtWs^FrREC=T$O6V~JiG_R2QLZdz>@d|DYo;gMdrjUOfxTGx^Lj zbbERl1$RNlj$8li-#Pd>=OQrRwmb`Zo{yQ9LxTT2p$v+2j2~-fi5LfA^w}ZJUP(Sw zFW(QKL1;((mgq7J~OXN0&kCi44!>mz#Mxr|H1(D;@Z2D?a{3>iMucasB7+4s zr?MC3Xyup;XPA6IrFi&+F`;r-H`uA-O)HZ2rC(alCED{VZ+)lNkBBjG92R|f_fF&k z^H?@@5-9hzmkVqX$()a z;Sl0kvSfSJUcsMaARU3EKZN*1?nzN*H}@}l!d1H_k-PrGLVyFzYd;X)x)Ug;l4nRt z1mL`&?)XmjPF%a2{^{d;5X!mg@k9ylsi#V`Nd(v0nx*K*BNouS(1aY^db+Wm3LHdq z4s<#kz>;MLJILP2+v@}?Ab8AbpHzyY%m_t$bKbi1)Ki%~JNaK56p#5nl0oMo3!%KA zoKgW}9s1IxCIQ8Bm!_~$MqDbnTHTuK>N_X-Ane+f>xReZ9SwEkYa7?=}F23VyRA0}_ zFj}!Tss6_IjP|011Xvx!CxgL6PzE1nevaDs6X#uJ_|2V8r4~FM(-GzOEYOYI?qR#b zu=p`d3JAb!cfeI{&VWUG!DNGlRDjrKPG(W8T8pg_I`ov+8HRR;ybtn2nbd$w4`J3P zkBPBc`(SLu`7PUHh_}Bgn@nnVnb!6ZUB1ShdoUD=#2^qdzyEcMOId(?vBGrS=84L- zf`=19;hMO@&D((4Yut(5Qq7~>(ml1?{S6_#0*JP4ASbh5z)SaLLf3|*kGGPC~B9uFx7xsAFJOY^Kysn7hIzV_X{{d9E}{nY$L zlWFjdvhn({P)Rt!RznghoWLMBK&;$aW$_*33z6OC@+{;P_X!~4VvMvavf;5{1w;-I*K9@;^br^ zABY@@12i+JJW9@}8!p}%vk_|qCTwq}I<*C|2Lq=3tsUh1LASpIe$z2qtt^tEEdx(> z-ttX|5GRMPkG#!AxhU-QLto`qb=r9cm9~Oy8I;Q!s&zUyJHJTk8$1SLE4pj{AX43- z7=;tm;WgmBlRs>Eh0O%IMbd&^G$5wBosP3a-ZM1SW7u`g3uMp#j_lk@#&sjk8!vBM z6M6U(=FEsgTjxP0OIXiN<^lQclA_-bh4=rFmz_(X2rNl1AJpNgMmQmq8}S>J+%#Em zX>Z{-BY*A(e_X-^=`$YH7!>jzm1DvDbW{QBiGFfi0yO11L!wI`DzEnjqn#$r+V2o5 zO@(5zH!VVgZk84FRBxa=A0ELeh;#o2?kg*lQS*Lw4Yp_bhcb_b=NMKYqwjABC4vj| zxe{h_|BuYkOx{!>1lul8J4627+*m0*8kprC`o}_-_UA!7SKFme#UJx=oXcVb%2{(j zv0Gb)hdJ$}VKGCawG3t9h(J_bh@6l^avAFUgfwuBVtva%P~K8PZOfyN+EO�y|MM z<-WdkWhA39O4BakiE^!R6U5w2?F4=Zz06Wk1$?N&PTartJ2F{?WLyNc1y^V({N<~WW~9VGD?!Q7a57-9 znTH4rqGz-+Q}fwsW{q!4n{TWm5iK0Jkso(%0um5dpn6y9cA&u(?Cok6V^xb+Bb;Ez}33}Ym0^{&< z=TUGE*xY?{30rxyvgHy;DvV9B~U8sw~0 z{#gfrsN)KXc?{*wd(T=p;)Hbmp)gnT`D#qsw7$hjmjx32X!F{T_kE$ar$X6ra*DtGs5}K{<`T@dRzK&QL}GNwsa)uH+A7iin3+O_t1o zF6YPNn1I?u*!VGz1PbW?p0uVnmNM#1pFWbB;9AhS7T%G7-j7c-K&AgFVq-cz7c2{> zp|DX_`i-yl(-S|`S<@c482`Pb6gb3uVK>t*sW%>D;GC+C`K+P7Gq`|!p3j`NKW`)P zSR5dtak`!MXQZd8?y=8u3~)|CR`E!Y-nP7-uW+%{NqQPI^c<^IUSC0S5rs?VM5Wdn zlhe-7RvyW>PkorV>tMO25^6sekTqlG%Ghzyx$vAG0ij@&=oOT>d8_2Y?X4u@0hbAAOBEZZ z5eMk~;CaA-RAL1YQ|BNP(>fhHp|+5lAtc;}p4z0`My_sLciG;u2iGaVU9+s0*xB#E zu{h1t>o{6?tOV#*ag(~pSUbu}we0|%8$=hQqgZsDt8u|Aes=ZEGiLRljT56LN)kDL z0Tm4E<h{8wc#togl4cCmNT=`%Fh?^T^o9oNO6=+N)i>E6e>J z?2f+1>9*gQO!ju^?T4*29=mU%cMY!{UxTfx_f)&|)IR<`Y#zWzQxDy=shGSkXt^)2 z_C>_$iJzsd0)7`ypWtj^TJJum5Ik!im((1?;`M(!d)?&pmfDG7p$Nv}b_ zyVbKvlQe8t&IvGZTC9P%uz`|EK<7<1rja!eNv3j~U&cM=!TC`DVK=c|vx>52BFS}}~%7zIw==wBDLft^FoKu$|hNp1N2kpiJ*btVGl!8LF)LRKIAnZ8wj~4Dm*anqt=yxMZxuP}?U$t@-y?@Zgfut1PuMUe1jZt#)sQp^*kxJ{uBSButwl(Y^?JkGtp_3PW7y2o6{K5o%|up-#aaD{{ewxT15d$Z^+?T zX4VWk0kAW_2DGtb3XHp9uf13Q)j9x@52E>~bN{!d4co7(8_KUtX(_S`$tHikij2HF zt+;{1jX|!IUd3-`6JhFf8h#%u1WG(*kghGFF~hgNVOwPYd!6+wg9>-Ur0E6yj%Sv$ z4eJbZr9<&;GwkBlcy}E^c_@(5Md);}(F}dAj~VEJN9#E$ZMFTRTXl|zcxQp;rG9fr zHs12c1@I+J>*S+Ce8S=_$XdI8Bt~m;yjuAZOiM?qZD(CtgZMEc`Hj|QrHelu-S@=* z+ZiDtYpVVWk56hjJ_qR1~{Xu6&DmpPW0R7DCn~NcWL)@!J%e32f-gQaP5n zZvGi-HVt^S;R$7EV@OZiT3JTeb1WXNsIIzz=)ZaQI7@H-veC`v&`c3_iJV2000izL7Ul0;SVNL1w3Cxx&-I9pqn2iReI=m zr4(2jydjj&l&EKap!X>(j=w51tFeWz?@zqcgqQR4ZPil!^ew;1OcTeX53?P-|3lLM z8Me^4cRrRY$XG*b7M^wQcsfb*No|Y=b1GbY#=_}eh$9OBrjngQC_bC;U@_ftjkeQa z+0CslK8r?B5}V{A5txm9yMJ`Mg>JlH2?H|1YWw&VVN=*k45wRaYrw^jb>yqbFvn**Yx zDpR9BH$h<(1I3BO{o*@UWYLMUE7Zsy+sEDXxvP~^ip^i3zVlGBn+{?9vgRchWbh3D z$ODGJ2xGv1>OAgk|03m1&UQKdRS2@c{*%p0b1vz+EP{nbuUv`N&ZkCv9+fHTGl zU|>VMMnN5Ldaaj2Mvvk&kJr5o%*AyYv1nP14Iiy8^f88l?S!w0fB;i)NPxdRwmrja zUcqPa!>n$9$EmR#+Ih5pJY9Zgd}0Pp&i_}_mLbPd>_Bv=GZ;F`M8oR4E-6WuXVD~1 znQ}b7F%ps19dB%uQis>9ZGv>T+tPyN-*Qf&q`h|RMu+mJ*gyaCICy|^AHGh*W8G4_ z!bL~ezJ3O_`2UcpA09DKdRDs}+=MUOBvD8?c4W$x1C4}YrIUH6=lqeZnJ-6P2}HE0BK!R#-XGo_;77Q?S>B+QJd_IV*! z6!fn3QWqm3SDq!#p!{x&8%(Bw_9@Njean0!q<8C+!iw{jwQnDN56e&~h6*UA;+j>+ zd*I1~7!vtPR!XhpM~1cBJXF~1g+r_X&)|AN{ZJaovdFE*Nt~N}m8XSOJ^Adwv(S@Y z*9;v{aBVp}#iMC?p7!h%2?cgD4&0(efUtcK)}P?X19GQJs{G)B^GbYJ!}yY_?TSn0 zW02CCT(rEon&wm7O9$gkkx8gt%EnU1NAc9mdYx$k4k~I!A#YCS$uQi$3HY^Hie{DH zQ7>|b!2S#<*|^{aEfR|i)3Z@?2aZL{(W+5t`YsmYVk9*jb_~5UaUv%PawtFsZc!mH zk#|;YI$GP$4>X+Z!@n);MfI@?rP6=13^`r_?iIYY?VVcGCLU>?<9p%ey_AywG$=_x zH4r*msHL$_Fy%=b|c6cSEuV+T%FHA9R^KAzZ`GzMGo^#6gKs1p#ZDzDA&W`Uwi_d>+G@<-- z6z8Qg%~M86|NW#Rh6tF_H>Bq@nbCo-Yb3eVhV-&YOZq-U-^bN+@p*^N`#ay9H#g!? zx|lFXBKfhI05%s@qboix!0yJ@_q;rZFuMfe8@|2*xB+a1=u3|rx z2?~N()3#h_e`myrWF0{u5==0ObGXKgJ@@CTCexp41sJeztA4%Nt9#^B2`T~X-WhLh zMFQk~eKfz+E<5M$|6yoBU51umedKPUJ|I_KaZ{*;jmL_T_zM8+gczuvOajZ8EtD5Q zhJ3vq3!lfPt9H&0P>;5xTSPX~(79cv@=d~3v?tB3GztPFp$yCBO;$iHGvIxa7FnsyF}tWr8?^J^!@D0en%Ay)c}fs`^Xa34 z`(hZy`im)W8qc888Dx#QOZ|9d-hz~}e|&-8YUsxIsNT}yo9|~eZNKmhc|v?kd-N&0 z`iub|t(0Ku=0y%oRm*QojjGz$+o-XvHpe+QH;TBJ>3!%&)U|+(Md;rG^oI)~9x2K> z%04*1e1v?i5`X(hFyY0+3(4&G<;IY=#2I)?*ZIyo>7`_$8V!OG_r)L%*X+0HnA@3B z1sea{MJCW~cSDm11Qs9Is9_z6z)k(iY06#pU3^sL#jZ_mS6iT0;JYoc0M2 zJUi*ZMQ4~-)H8=GxlnEvk9;u@bv^1d3kK$)z;cURHgN=ZM`9T~;mr{L^E|umkB4K> zpJk1v^B<;%Hf2=~XVtFc)?;7Rz3U9sb*Z^wcfI2O3l6p6=!EFukXc6(VW}Gw;s$nY z7KeYFy8x?SzlwRIw}8zzw4*~kGM2SGJGn(~K+8jL8$zI;)iQKA$21EAXnscjy_5CV zZ|#UJdgq3}rQV;`U21VQVloly!cDI3y`n&=Yh&gpu^GqCL&{Kli)!VWPhcu1x<*j` z>pUltI|Gi^N=XxCT0m&Bb2Qj$$5nw__BEsb?qa_ux3bp!<+{M0b)Nd5}>zeuV(vKKL!l8BfP`w4OYp*$see{zZIC zvN%T5hP@E_>*`qhHc#{0i&9u=PTr3ZL)QV#e2TNS%Q;?o}u%ZFU7 z8<_SyKli&tTe#;(T|+b;Tt50Aak>M}Mg2S#XKbShxN=ff@;sh{@p@BPfKkI* zE20kt!j?sdOi90}?|0AxR7&s;=r(1(DnF0uj}j`F7<;vXDp3u;%o7lF74#_PE|DO- zXM~X@Wd8Bo;NULSV46L_-NX$uNTZ^9rHP}$F6E!Bu51iQns?Xy%F5^uId0X{z*5gQ z^T@dI6v+^iP~*BC9V)EiBcXU%WF*+ZP{vuNO}XJ@#+y2+9jl;^KYj}sBn7Cs-SW?R zO3k2J*WfNQrumIFRmFu(*!)$Ql4h?0Exgi*)G2IsKJ?dn3yOiDt7}Pd2jf06sP}7N!&)cC}kM0@vmuv$V zqEvpJnz%PnewZGA$&!DAO(^wY^9ZkE($mDS8$C%Jt>GEVF)ESo+Juc*`=xhdTTm$ z0pkl^AE8~A(?$|Xp~N;eEYB_{YsYrE9z7LhAsN{7VgY};0o(?kg$RQCh=X%;F)F(3 zQL)Pj%qtoppD_VSQ-gm>KFnR3>V2Q_mIh-{e)5FVtx%dqMqK^DyA*?vDSUT7N6O;U z>gpS)7@+TZzt10vQHi4aNwZiz`^|w9!bwTXEf73gRZj)} zXdq;GocI2BkCy>q3L@i}!RU%kQ1;3e+dH%~q zsg);QYA~lB{>rlBAHlF)H`N=NGDXMWS<*(WIpTL?+OFZ!s*}Fda4Kf|wIam?6rRc> zt-&-l3sQ%5h%e~{vLjPooaoU z6c`!~xyA*^sytEOj28FSvQ(9+v?2HAAbTeuwbs1D&UQmdw~PyODGoppr`D9(9o>fo z5=zVg(ANKY9TKh3`7@^qzZnt8Vq!rw5C)aK51jJjzC@0w11?MDOtaEN) zCSQxo)4Xnxwd*%~1)pHV$5HW82|tQM**MsYFEWqEXwf9N6Y|$ z4p4zy!O{R*fhY`A6&!TD#zX3z-kC#c3)#1T;RB3zhlHM9<>-#73pO1K0nHmqOldm5 z7Cmv>yz7^;Oe?jVW_~a_$e}!35XAhL@IgiX>8l|L=2VZV=w957ycAY|!6ghwg<qNw9AceQ>@HVQk9qag>G{tPV2$c*!fe6;0 zDdRw`yOq-us0CULbBiv5;}#hca!27u$dS>b44XviM(ZiU`dP9mUSeG0E#dBBBs52` z+4x9yyIgkv+8*ZX)||8oOC2DRQ!q^KCl4HvXSe*6V5zl_6)eA}Gto!uFzSkYd!P72 z?=+vrUrj5<7}-b|Y*b%6m>z^yO0C}TSVtzbf1mY| zRY3Ph3WQ!I`EjKlqPi->{RiE0LqT>lJ*d%KxB$FguL#I+8GtU7je-#fgnC({qYl{o zbK@udr()l}y+&RgU)bL#&>04LeC!&emP(FFh>+`xZovV>;26H=aPKxL2!p3n;dGMs z9Cu?jgDsk)(&QaX5^#RGy&dIjxw`NTe~P74QCc;bz!C>!FGUja%?cdt{HajfHAb(wIM zI}PSAKx_EYJvLhoLj(O1hBppI3kV^8T{dah2G_)bm;RpI*a zob$$Awg`w295J&j;Sg=#P1V>o0f0|K+z=Q3Uo=dF9E{fel&1>JvX5Xr(Pi5?9-4mMCRGD|>gtzk|5${h;d zuN$CT*3s>>ZBT8`2|r2}TkIFzDDUiH(tQ8?i#^H+S)8B#REyQMQsX<~rO@TCjE}QV z`gLf3svCM8bil}K^O40&c|X+w$Z30jio35rQGQ|#8hc$w0_RTEO9mOypW5)7B9JsU zc61)lB#1g{AGezOAP}yU1sb`vWk(dhJ4@VVvU|x71T5go;YbZ}Le9!er;;@V=7~C+V+s`hnP`MH29c0MW|m1Vqw;7(*>Hz7dd3TY^s>P2#Y8Sp8$Fv;y;i}S_9Ql-d#YgX>9__e`nyBf%j zV$0L&-JWt)gS$Tsn_(*P%8jvwkxRiMR~`ERSr@}-ElP4q-rRMEOV^=9VUC^SxAkrH zQ%5z>Lue{2@_MYY=oqud*a@h1NFy&qxU|eQqlxc1bIW$J{%p3k$L7NPRNT&1%Xl!s z`vOv1(##PTEt%(?PDmmYTj4utqd|}|N3t>%gG#Y)!0nz`B8!-1S)@~qVB%UPJmh^Yt-R$I7(-w} z`HP4xs*0zj`sXlHdS68A03DH4Nivmvc%>rsANW0c3^j9#>S5{M8dX&HBKcJ!Wx9{S zz$AQu*e}+Hg+fM*FaJs=`5B)Vn6s03JiyU%FJk^jK$QnHBgD`8RiQnE;vRGnwxJQd z$SHAfY|+!JkZ-!>GR0Nu*_Kk>Wpj^`l|GDh){EB`x&4UtKibNg=Et%KBA&GFTc*qy ziQ!5l;V3Xm}z!8l7%sq%S$a1%{B%;Q3Gtg1l0#T)#kmU4qLK8Bh zllq+96!e#k#JG&N)Js1}uby`5MQ@V?2La8-E;tZkq8A%${R9C_m#GW4#Eljn-1Cv6 zrEdyz-`32@jsTCCTuv$u54{A6naU-P>#AUW$pO2EeK5>dq8dxcR1`0D2=(P2%P(1{ zPLi|{3g=_7RPujgs^8lLHlC0fQsN4a^kut>5_yn8_qg>LQ4=q?x+|aIW_djP%5Qj{Ngd7nccuLbqprlzj!5T$_e}IGWCfz^{ z3S#$Tb`)3XC2fknciE42M0)38xerY{$^pZ3>CcBlMqj_RQkzI z$(ph-I)a30dmuV9bMwFjw-o_XtN;LVidUd?h=`=TC{6uetMH)VBo|Bc8!Xh15w&o7JOx{x4M%o>`GjuSt#aQp0?bR`Am16BIhQK}2#;-h_&uN69wo zcip_W)9^D@@mrI$*xV>JEEFW^l8j^+oa*PT<^xV%RuUY|aO-Lh&ru=}7avPuhryx7dVc5y} zc{g}9M=(AO?l?n5p#7^$= z1rIV7$FJ72SO(+V$!nuU=5z@^@XJbyRvl|4`W+MdrbE6%*5xXd=uX&;)C&@d?XS9g zOsCbeAyv$8FqJ5FlM@~5*~w?o8yOOTC6f4*jsvz3qj1Tn#Fy%{9Ct3V8{H{2-2DaB z(K)Z%D%Ha+3Dy*iNle^J9N3{DU2J2x0KK1tz2{4`0FwSYNFaC&il_YD33eOhS^C9% za7$7Yv~N>qa3^-U)ZlZWpjF4)5gh~a5Oz96!sSspL^8xJJIEYEAuIFfwD+ob%s(b) z40=y=XBxE=`C(9Y`vGD8c|B-WPm|tdBU531F3VGyYQG>DntK$1vE40fimB7u>ygkI zw}`@3)*Jh~8Wu+HJsaU;Ylj9VEy?^0wi6x{(FGi%3&4cUbXJZ{r7Dt63H_316U`}d z1^-?dQ#adq?5Up%FrN(`8bnf?-Q?~YZ)B3<>aQ-?G#mkvqbs|lEz_Bn?C=7 z=qn_C5d{NzuoeQxBKB3ajo@%CQN7zv_-x-QC-8S(SzW}zfB?nfLC)bWx;Zeh%&4Of z!jyGHtESnpV&Xcl=TSFjzdnq>6WWfx`)hG)owoGR5lm2$8I^SC zsa!fyd`15XRd-yB$g9v1@2*v+Ut5oK4k3$xNG*O*YnyZb0w8~>N6DTj_R1FqeHgzg z_IqM_=9QYo)zNY5RaexQffkb;Ctbi2KRl<{qe5=YWHM8k7dAsxj`e#xbq~_{@{D>4 zq=On}LM2UR6{O23$-e0ghqPZI;gW8$^ZY;6Z{Vv;^CC-^drf;vVtr6$Nij!cRi*_u zgGK;|e%z|ee{`XEIx#AlNQcplLSt(rjWum8_j|RE7B2OC@V9&zrUJja7heS?#E9o0 z$KotfMKpz-C?f+G#)lsiRy;LyC2L3GlkQ&1bK%D6kPLYb$D^$bu!yzCK_R3UR|I0~1Aonhnjn1) z6B`8Ujwl}Ec4m<~Nf#s!bbtkgJ07iXFdErvZuLDPAl05ZmFfp<^AU$ik*vesT_ot+ z*&t$XvWkWahXK~VJeBtZW5d~7{wdw}Z8`qLZ*sbfCoZffi_5{ZA#uQ6ioe_0mJOqZ zgE~YN!?v)tn26Ab0AXjpkfaxcl~vvPJueI{Y35&5MJXC51O~RSjvQaCEd&77DTOq7 zW>|T|UfWv2yOT_?$l#9A)i(db|3C{&w4~8#Xc7qj?1#g0=~|@O{qxA)+hscqJlPsZ z2WA*supL9%{ypL5sAOgIvyjjg^1U2GP@Gp`j1#s;qZprTF)I)|Gb6F_9B^&@T;09Q zaA^Pl0cZi98fsV09GK9!DWGf_N4F8NIYf?a_lG|&QCfr!YWF8s4Q;;N^^7c4-hrV3 zHNzr^A*zoeq1JsKe+8nb??<|+QRyw2g`U@M1htG78;pKLFsJimIMrv2ulYG9{E!EH z!$g)ali^zAuBJYjU=gQVrz1L_$S{%t&BSX2^fJbmT^mO6OS#&<4;nQ zE31#peI@@>xK90*6QFNFIS=8*CiG^jovdQiKa;kDZ#oPa?$}L2sccwN-n@AJHp8T9 z2m5N)+{4#zqV#H+C(MQ?YcSg@O3tzj2ZBFZ3#32@8p)KY&lx-wcmAvdIo?QDVr8)u z)eR;g2UYv$hf*m$A>VMXn z*?0f|5T`+!9ZBI2CQ}7GPhV%%l$%ql3PKXnB9DTmbZ@OtN(1l%LtPexqOlIyXvR5i zP_brVScpK#YDdx*#jH{Wa_4+FH%Q*wHb*0}`e_rL+!@y@a)#C71dojn`%2#E1 zDX{4k24N+Ia6}9-=hUPuJ9x!@&nuwMf(^HLUpmTQ`x4?$$?n;}g*TzOl{ z#IVo3D44RCG!1Mg#wowkKlLJMP=IOl2@TYAJTr@f@mnrJ*cG}o;UIAURfAb}s&0sL zzhDyDUMhWn^MRLub;bK&bE+)39zX#J$-UIY2?Z4+C#cy;1=^C&oMF#7eZHwx`*DfY zJL}=CP8DN}#8MOfJ@{(#uR>U`LvK5{ryCK`g$^h|uvo{8S6|=P;WOSnm&#IyUHJ>& zcg3&LhNhjj7DaO)?*QM_T8Jn%w6jekJ|cAfuu+0DL8A=gvRMW8L4vin7@?CvV(sp? zmIxu0s?fJ-dzX&aXpb6vlm{zq<%pN?Hhru*DYIWY+BE5jmFYC%emVV6eVl_m`ZH=# zW`{K&i8lDNtEeK5VmcaC%8hkdUD1J^~G^Ng$~c7F`6f>K6-rg4!_K`firK_m z*e3$ZQBaM|WgPRv=)1fL2yQlI9T|FVnbX}=M1Q4Mw&w zoi`4CzoiudShU;ux>PyCSWP?tR2J}s`;G7MXHPXe8!vLJSP0*f$fHVv82=p}i{?V^ z@z1+}0GifDT6KJ|5Z?|f0+>ydD0>s!D5gdPRaSRLZ66Y+$IqhbYGc#x;C%UMDEi-X z-bIr!a90duS21V>?=vOBIC0wJxS~ z-X-kQGnH`QA4FTHDIDr`@XZI_s=L17!&EqeKv*#vlaX-R^C`0YcPF&^C!mTt`_@Aq zX3j&w=^~hYw2bU@KRzy+b(Ro)tOvlZ)F5E5X5@E7b%NW0DIjCFz;>yp^=`DD}m99+Iq5_NRB!#0_vc8$hJSH(tP*M_)r~DM#mX)q+i15dAb$SgN8z zl?#nn?o{B@;#FnYXXl5fBZhLn;Jem$y~8F@jJ5Lo*FmLcc14Q_L3TmZrBfAY#z-*_ zBxA(;4G+GHTeiCSsOpk}l$NHL3P^$4zogDQJK{tGn34)zwee^68G1hU zY2KaX$=HRZJR@sMaLDCoJIy-s<@*EKapH1khX*E-Tn~@~!y?cF`D_03u`rS{ZkZ41q?^kt)Mp18fT61R|r)8W+!)>8N)* zRBAF{E*x+zo>GHXnee)XWtANf%J;j^PdT!q zVm}KI#&BU2DlRP)GBLKEOS3YViD_DM$xI;^TUyoZ%*^zm2bwAJFgPM9L4^0^)1SvtW=rk`cyj z73>3RvFLM{qeIL|0Wcdx8ZjW?7-w|B8Oyh)9P5axH%#z>YTZKZbDM2&P)}I@)9dei zPDf(bmcpaKwsbIzYQ*Bpni5DS{ z2`{GLhI`Z1qV*D;MwTMt+l_wMy~%WXe82Z*;ME``x4&EN-l)R1g{sN0%VF~mseN3P zN{SD_>R#u*pQZFcHo1CkWRM$v}(F z#@xP8qL7~_C5Lqgbl(B##sX?%_e?M*YFeG(YIr}79|5D474JnOm z*X&Ti4v%5WdI22B+7z3*`1CSbDgmW$ZW$l^Jt>wWmxeOQ^+uF|G zQdhVc?&;1s_We0zoz%kJ{b3&Kys^OP#|36v&Y|WcwBf1R98f)>Xwp|L3AjC(zP1t+ zO=P2vl{y}+@GH4{-oZWKVu_4zLkx2Fbd^`APwzWop-(hACKQ!~JKkD$jqWmnxYU6- z#S1ar=Mob)fV8PA4~^|`dhGO7VT0%y2Qwnde|0|77S$l_PH-*P+d~LmpF#0}^kG|N zr^6L%jeg*oC_ob#^_gV%OPe`Yz=w>DfT(v56yrWaT<^45tVmfFv=TR)D**Nca#1p; z<9gxTD5wmL)1vWa#j367a+qwB+`syr#9>@@mjY~#wb{%YWr5@J)C57DOuc4DeiShW zaF5kHo3uc8YYl47Ql*L=+FN^f-zxvdED(yFzkL_Z98@akz^o6mbp|%TlnkveQ*;58 zUP_l|1vnP%hLvAY6<{2}599&*e1>Y=?sw1Q;~rItO+1YUzbSt=u1VE0t7cc}K?d`T z+9eW31+BhsDc#ch$?MDA3LTl>H+TZ}CBBxY4b_;Geav7OXDWPN1>pA;^Z^&44PP-{OdNY->HUG!Lf_n4{c+< zsi>c9L|62fmyUsz9(IX$gIN;HPjFs58_wSe{#INc=ksipv3S7syj8x(#ZXYL$ukND zTk8wBRZIk-sh2MsfC)FTfK0gvS=xiPqxBB9q!_(kmdN_?K&~c(IPnw)xaX=}!!$0~ z(C3$mXwj5P(_$xJvGmb{eM_d8na;s)#|J5rQiJ9~IA^ilSVkd^5O}a&wy~rS6Pi^m z^p`6F;Pm+zd~fv0-!gHEd3!XUw&=T2Jl8@BRfm>C`ER!5X9IUP-vPfunWXhG&$JN6 zcG=YoicFlhuZ(FAxxe_O_STKBz5Hu16Qi!%nEDCwtXMOzLU{pP@$(Fm9_PHJl8*}W z!SS#&!JvW!5;~dQC`mEA7XVW+z&MSGxCs(U8{{3Wc~NYpBT@b~Q-{+;?DeCDH^$rb zJB#G0Qaq&w%td$*AAv9&U9pNMT^&B}47F53zeRl)R4judJ^8R^qX8vk}bv%pP`l|kv`&^C4Z4kHjR6( zDNq4BK;T@>)EjcJBRs+9UK{vv6+Wv4Mrhvlr!9xHD3X2p^`nmIaHiczF*fSs2LooB z4#(`sQ1u+|;9!kEbCiD5#s)3}MA6UAS`f|De|@mWiDzq1Qtsnnhme?@V=kPPa# zrH-Pi6+4*y3V2;H#&q+m5X%=HG7uwsT@4^RQm4EFTLg(V59$Za%sO8C?)n3gg|Q~z zW6&n$Nt=lS$Mk_%TEce7Hwn4JKCTSH=SV-;sE=``noc+m-EZ( zNMqb4%fE?ZU$J@NNODDC5SE+rR+u|Uh#$aE=aXBl`o;j0#htzoH~#O0JMWnX0PC)S zDz*R@)%;&XLJyyj)g&?Ic%|xk)-=T00$u%HPpo^VU8fPyR?)+Jy>n`5?niU zBjJ>TdF|z110wD6B7ha-#|*-PII}SYlI7WfGiGU}$O6fz-D?x{JU@b4DTU`nra63x zkhJ zEB_lY7wr1)B`1@P5SIN_DpOb4hF0Jpv=N_c$aAmB!Koy7T@M|&$|THn z?8mU`PXGV{K>?mTYFEvY`-8-z(d5Mii08y4cv5q|7E#FC5A!bQ@yuyKH0t9xKw(xylT-4<1aL%LHCnDXZ|31}Z-nKd{{s&3;%( zd`yJAe4W^{H$~Modd`7?sx5y1hYS4lopzf$d0pQLv}q~{s;zppaWvP7K?W^* zCZpH|Zh!cM@ZDsi5Z-aU^exl{GbOCSeiAosix&^Ne1vSD3DdJj2BcLhrQho4iW8K| zF@xZpv$6cgI$t&-eCSEe8yC=L|1kcq0nzC z46*obeKS|*aCQ}7G|4|4(o%=U?6+!eXmZ}brqaEhV%a8vx zaAw)davrPc%O_ujO35?n1PEdXocicyZ%8CJpWrl5GmrD9fRo(LgPFEnc44dAP_?1s zaK1SmDOS;;wOqqU^8XhfgBW4LNpX&lb#e}(>hOG;`O-|bZFlxeXcS>MLJ?dY!DZhQ<8a$C()i@aQ2_41Or_OucD@n!!6VFWk!9fv zCuu&a>$8`UqgJ~5I)Ij~TO+*b|2v1it|X0{=21}(7eNo$*i9tb%!VT*9L!flPgqLf zXouQ2MgF6!sIRMtfI6`~^j2cgeR6imrnM0M7hM&2)4jU#1NJ(i{O|G82xS@I-y(h{ z5UzyS?0aHwfig661wkF$0a+vruetCf+etdlW#g`dNhgp6J(*p|W*9)=Z`LBVa1;jm zblWGuDB&(I_B2TNX-ErTT7=duj@au?v9QS0y!O?(J(PgE5uh1p2p6(`OGPUKXC&^# zcV^7}dJ2O3(iT|&HBQLDiX$2+$`p*A+ga2VK>~iKYmw&C7BxocmG(?NGyH?hKYlLt zHB776_g29h-?Zv8fAKG!coxL6Y$27Uxa&P1!xy^hfO!S$wZ>W(O+tQ!(A9WtlW+S9s-l#?tQ;%(=8~bR?1>S8w`ICG1WH(kNoDx|x}P+;z>~cAk(0E$VRL%6KZp zo|;|M*x?u4l?PgR1h@lrklwhT6f|AJA*ML9gs(A#JI&DO_rQ&oQE%-Bp%S94Z3;-tJ>HH-su zdh`gAsoOzdgUt$7f0=^jwDd`}cu7S=ejAw=9JC{l@pJvaSQj!?3r4Va@>=|M@{mXg zkHjMnfi?q^DAhL@IOm@p1n><^XqNkd0qT#;vC;0SWp;CX^v7FIFvf!g=)%x7fY}-> zYf_+&eip1JvWQSQO9HiDl2A_#r|?V1=4?uA(am^N4I>2)43BK!G*vZ!(ZUc3;V-mA zs1MQk^@+Qob_af_d1wye9LdT+;%GG8k!m*HsAXQrUV|0m!g1A2z9#+r5LHLm1(R`I z9Q3E#ic|2s54{v<2B?=jplOI~9~t~ojr#(bnGqjY5Sexe;fA@nG#Ck$q_KoJDMQn` zeX(?3Vc0J$D8l`_?FKVX`43g?sld4(38j%fXVcD#h>sA6f`@neVM#+hk>#ID%9{uz zLg0tC7F9Y&qEl1$mmj>?yz|y)u&EfEtV)R;7uUxRX79r6zEgv+#j$dje$Jh?`ak5g zvQJv~bUryMAaTbT+UUhATY095L$R9CE9R|Tv zdh#+}0X$bejp%@vqJT0WwP1MS)FqP`x9HV;hhI#u4*R28nTrsPHB=h=7pe(;I7W_c zXb7u3XVOn5ovZ1TLBcfg23sRY(Y~Q~O_%e$I{azN@aBFgWo>yHF`$c&MNcg!hn^Ps zD&5qo@13dcpiV6}`{hHh%XT)&CBU(PV+V?{88+Zm6e~}|1lRuUgExd*gy#lqm6U2I zjenGs%241R$rSTrX``%Qmf`F$WRDzw(?aAFi~qsjmp= zE6n#?i@zokTPC92%8ERV>QXQmBA&JW5Y$^xHtWc*hw*`7rcEn^qEw#$+;y{8KUs2N zZD+nBXo$<|Ae^m3F`(?}RlVK=HlV=8M1qEGUr+rn*jnB`CMdbzJVpcmxA+mMNLvpPxU$zO;f7RLRttqaLAy=W(A~Jto6$D&^YQ&X z6)R8O5HV45>(*bri|c?~!%H}18Pa^_JGwTZ^g}F)&Z9kIeX^v_ae8=7vIa$|?iPr>QS48Yh}eNON#u zYy>3eMu|SPfC6wtEOcmvz+5)tdkEEnu2f3I?!(@S24hT558M(0coMOEUE~SAlfrH7 zmB=1p%b`9SMu6fk%vwwdtn$h_h}nJyB;|hTTVF2on@~AkB>t-306Q&?1B6BX8rQ_D zqfA2FHK(#bMf4x6T#2=5qV!35YF=OsqO@0%%i+giC_uusG+4AWcI>kQoMPlOhTXKD z|9m`3!aRwpZP~o`%a%DzrJ^FgUNvaG&hVwkEd~__$+8dVy#r~3J|r#2hB}2&qJ>S1 z_hRy04b*?vkuuf}CCR6N`yK%M0`4vDFNmyiht=w(vVrK7n>z|(fE_XWkY|!jlOTZI z1r;2-1);#qo9;ZsHi2z{LTiJiZJ9e4c>L{5k0j@PjyjZNzc{;fM7!cieItQVI_~ZM zBmHu-5DF&Hi5envI^(8_Q|AVya8&Blfu*twjU@(E{{uf@&VSPL`9!1((-uSQsAZfH zi1GI*ZOgP#*eL%7QRcHOcwg)3{z}AA#>!{C$Yn+F ztEpPIszFCtG*G)IK;^%PPId_SH^G`LzC-~@6a-7O#U|&yYs`bDY?4q6?4YWY5I69!z+gbR*J;Al1tg~@y?F73fv-*mK&3Zjsaay zh)xVyVQtR9oO2@Er&EX-N4Iv>2Fw`7D!>*|L&&bN;k8N543HWIqfEH zg?B}SZD=)o8NW|d8*%q@v9?bHc%0(VC1pTJE9%R09+zl4y6|&|#f)@G#gadrcO!%XDdUMucv|edRaY@?2$rFT6scdVWka zdtJl5w44wNbw0h(&4Y`ip)?N}pe#oJQU{?U?Fxs`G(gv$!`720?a6GFAY!PmJhw4% zC`WOm0p61h7Un3K;tf7V*=kVS7qr9B>G^6Pr3W4!l!i|G_`pRa*4wLpSy=)l1JZ#TwzZKB_Wj5ec7p4VnfE3CpYPV{IT{o3 z_v>H)o8IMNcS5n5=i8Ty!-r?H&=X_8XG+1gUx&~9&0Hcpj(wcO(~OiQ=L*0AQ)2#Z zw-s&&DGcW+h2D9PoRbF^*@5j{cYC2IpOLZXB8M_eb?S)t2O}e-*ID#>5 zn1VZEt!YDG1!qQzXZX$NN--enETu&)@KWTqEx3eXBu?DJ$?X#($=SXl(3trhR*^?o zOJv(0R={Tfdi$sf3VfDO*;z}=!3qa~mXwAp<3YiMH1K00&3nGZP6o+eyDa!f`1Ii$ zRP)aw^w`&dV9W=O0jPjQFXF}P2jdQP6rFF{o1yItZYYnKVET25RNkykg2Wn5;JFf5 zCz!JYIBP*6+53|K7`=Nq!I>WI_VT>FelQwiBmm8b-)>XsP-}E z)>Hp*b2HUwx9-DpxZ7SjezLZ>1POmJAQ7z4E5>lG?e^zhIR3@>fi?il_!4tBcYzZo zWcyE!;lHbr0wv7@i$+k3vBT8$iz9+``XwV8NU()HbIesCb7s@w!WC|`@&55}BJaX3 zcdVahDkJ=f-t^0#R(tw>8| zX!VRn=O-SP982`RH}_iLLae1|*1+I& zWn@4@*O~H_j4Qb_oUXYK^%7*^#8fGWyV9rQzG$sxX)tlyLCi9(A2Xl7iCIGNi{3bv zR9_XW<8mrk?>zBv!O9Zb{^Y^g4vBwM$R|u?9)T8X)FzN;ok`l;Am*)(AP=e*D=g_Z zb!Z=fdpmsv3+nd2e84%+5tBD{CpKeOdVN+FR6$=VmKS&8f@lZSB#8@2tQX>7N!T5Z zy#c^6Qo>z6nao|iO9K;t$+21x;9ZqPoB{)4`}bf}V`R<>0_ktKjoRGr;NSuf60fQt zM-|*^?Dh6I2(Jk~?L}vP-FFv_y5zG*aTQgJVUph`KnaYMDdx0=r{`y-y>3WtvT$U*n|DwTlc6NE>sN~+lMBumh#h4Ff)QOQEKm+*e+PVm~H{6kyJVoyRk@K5B2JR7Szid9lzP zc4ui;E`sS6g~`H42*51+f|(&!(T*0xp>>}j=}oOIxZCsJX5TGK;e*@5W+o^x@F?z^ zq+}jK5uOq(5LLkLrUIr5C1I3?s#7tf^J$RkFVDG_!))e?OlsEfw-_bbw z>U$PFw1M9?TI@=*8OZo;BYxO-I|T#GJJJO-`a1gL}??8N4)ouB@r!^ z-^s!|K_24oNAHM9cStL~*PsJFMR^XWZw>)aY=Jx3=3Y_th+&An#N2vyzN zf$GM`_@4WW%ierz?;T{I8MSJ}OaLj}Dt)XZ@T)eTG8{`B{v$HiP@vBMm8hZ!hCl~e z3JmU4;eo)MU|Xm&8V8Kq1K<4BT_n-c+1;k6JOCfhzl#DGJ>}@J!w&MGi#%ubzwj#t za*xX_Z2DpgyYfe{mP4kxE%VU&PtCakd2Q1Z{Wmq*uW;Kzss5WA$6b*1N)`|R3*+@a znO^Wz6bzf%>Ca0p_H;sD#_ohIZT$4TA2S7ojg?3lp_RtnY#$lCb`|183&WyOnT<~1 zCXRqH=&{gHtEv=IPSAY#f^3QUgmr|5nLc}l&uq zM;=h~T7erm|2nJ8R`o%0-?e%QqlB4Y2x;Dza;i-5s2Q5OhXN1>njdQUjAn)|*z$r3 z|91p-H7D-zk28$mi?<)&sK=3>n)wjX`Nd#T&I7RF(X}S!f0a*V*OurI^=HCUaSznT za$>&7*mfbuu0%%C0A+yh!b!HL7){pn8yoG#gT9vL-{9^`5M#vAlSS^{o?Zs&+-Da% z4Ok>;NCb>1Pi6R*D_JK+r2_Bsv`An1-f9GU4(S!pX`Qz^d_`AwW-;!)8J7$~@?s6v z9TBO0Vr^yCpnyOztMx1$s@;f=zB0sa>`#{tGRWq(DbYV<;0$B#ImU1Kzt3JEV9R~U zBDrdB<)F@`hum=fOoIDZ^D|4CU(O0BXtR+^Bxuhf*Fs<0L;~RrJxf=k`DTs+Hjjr4 zZ%N(Hj%NGwU)+Ew%m^zUOh!VBq(vz_t-jVb2)!+y`KkCQ+{JfYKzl{CDLSN(urVBl z!VC(wyBe<6K24?qT&@5i`R@Jf$W(SyO2Yov)2{dh)J27AOHXj z)j^s;N#PGBQw2Z&$o?O48K@E7CQA!T37dkHcF0)!?!dXO9!phU%u^Hc{3l~aZRw=( z)-wKu$Zx!!INm;Tqhw1Im*J{>tI2iK_kbjJ9dH27mzd^DRcC>@Q_3%(EKPg_e)vXZFIWe-)QTA!giFwqVHC%%$QY=QevG`*-p2Q+)_Lx?^JCodl6_C- z14=&#u9ed4Cjv*@$Bww}B@VxvMpFv55hia(f)xotNwPSod#~cld=2^*_{B6lL6Ll<# z#rdB;7FH$Nv79M-k`m%14EkbIfj$F{80$0PBwECLMsoD|QKPPFh68}MSeWxbd*Pnw z{5_2GMdMdcj9^Y80IhXi&&wV~z8pNo&(t>`9LVemh0ufeagrk0A;A4B&uudOKrAKe zbH92#It@Ygd;{6?TZ7iVkmORziX>Sh3Vi!AIY?KsQktO1tmZ$@@2nM0Uc; zSZ@%Wr5xzfb1z?Z9$P2ilvb>WRl;}QH`ek2p9f8IRSda0tSsj2hePs7%*nAfqPel| zXM`(jeEW>1EYX6TJeoy{g~{e~h3-@mr^#Cg~qn-3e7zIIEpIqr%`o}p{3nPHTW!2b zIDWJKxK2SbQh!-zOZ+mnY0$`i{2-ba)P^}4cIjnrBFijXU<`+y_})flvNmu^;F=tz zK+HXTnSf#eXEa#~olp7*h)jFC&S858FezSnDw^GF&a)R7>Z5J5A4(oEphz}VsLE1I z{mv#DPxnG4yu!}S#-(2pPaTwCiCT={h5tcrAa(-%p^fDP4jI)a!yw_;3+dW7+fG+Y%QM67#!uv!w%LYN zn)ie}nE~J4*;5rHdEiWQ<2wn415waWSm2)IXdGnF9PiH5G5eZm`03;4sZV`0J`G;3 zLAJR(D?pA}D99l_N}f53Y9Y~J*iuiVP|-u+__~T-;e_vqj_|ExZO~jsSOsbfLj{Z{ z{Yiyz96qklM)GJe2nL!e@Q~;UfXK8wh(f>6uIh6Qdcl{-scEzN>y};i{F5H6)hAM{ z&4r$XZ5{I3mqH{YqE1SFgl}>rlKxwj zB@srpDFRtwW@lxnN2<_~P$f$Tc83y`<2lZ#E$z9DkMi$!6UahNl!pP^#gdWc8m*oL z`e20Pa`+RYkwHOyiD!jWu!ur)3-5EL>?TI0)JoHW>DDklcMrPM_Dp|t4Xv3{eU1c) zvojq+Bsg^9*&JHnCh~bCR>9Sj=6?wj#j3FWlhh=cvppZR740P*4xUg2?nev1gSKkQ zOn`?pXmJ7YlXYJj3PaqKYV6hd1-D9L>g>p>RQB9+G;liI{Lq_Rb%_HXjPja?xN&L5 zq(ea$UUlSVHp6%**iV9y56AC+GO)@%n!s@DD>150!HjRJdiK!v8yOTz>j-mDe>sgE zajZuk_ju7E7saCttY*u#RmNQUrdp`Iz6-=p65OlipdP>a{2740R#`v@r8p|FwxJuo z;VdWnoy!=J;dcTDAg8WK#>`E`Xzw#6U?T0Cu@xtBJQzg7l%8F`8>B)AbhUhUq*PFKQpYux2Jo6+? z?x{#vg<9E0Ii*1zaMH6lS~K>RA|DqIY4T`ASwB|+YaOgx7=2HV_+`xD|MRC(7(P{G z#8B1;&1sSuZD4Bo&vaVitKG@s`y$CkyoO^KJldcD`Kie_X;Xa7LSyQ(Q_zSBbc{d% zLvz|!tbF}4cb)_m#9!HOtm8Q$Jv8o@iwWAoC^np)OnU$i)6zT6$yaZEbBfL=mS5voa1(KL_xd^vIMB zqjRBg*DXKA#iM?7^Al{;xs^$SE~uQD|M#eLY%_l&`isVVw6QB0*ET z7Uf*!kd8fW=lM^?{q)}yzbCS}t=b-VQt<8fkL8ZXLGq!Rdc`8ni8qQ4fli`7(Aq2j zJ^%EAD@#qW&r!Zm77e88Pgyf;-ahRS>#6GslP_IF+F570ADIZ1Zcxx;h_1u)4#@9Z85`xIjto&{-?fP-8CLBmUSH#d)=z~+uL>Rs%)(Yx@CDK z?g>YWJFmlNd-H$#J!2|4y{#f9j%r|?BkaHWqQ$Xc^_df!a~eyYChl_}UfXGoM4X4 z;%U>pZ{b7BryKmoC$%Ixy3-)v?Lx`F3eUy;pg(}d z0-9+oFDa2d13=&_S#JZg@KxBit+3KDb=8#ua0=-`oYd>q9eXIBl@?xB_+aBxe>#Nn zZe}#zTkT`S#?o=zWqTWCijmh$`BIhKpkowzbVQ#jfq=xb7q@=G;Bt+XcNr*5v@Og^ zHGPM|iUsQyX<;Z=z_40Qn)=mLVfQmYg8^QTd?-U0NY+K*FVD(SpV=AW01ECQkYMX5 zUlBL^{9^Em+7mG4p{t(G#Kq>rT;Fs!SrA`}N5|eH?E(b|h!l%|%u;c0V02AR81kP@ zvAp25h51~NL(TI7zDr8m%o7iFDcLDP*&;jZ|d{_fZs6;^EF~rrHUQksbV4FEr7V8vTx>P zCi_{264mjA;K1Jr+;h?vBFUXG!v-tr4{<1{#1x!WtMzo96FVj5yc|7i%-o*GjTbN~fZaZa|4Ns-v| z=;+&#c?tWP6{UhP#;%fdKs5zj*2s~LW*Pfm+?h4-3-OKLJ}AL;^!D-cNYr^ymDViI zPW|0OanS!-FwX8VA%p$&n=HpS$%)8g<1|S())=bJ zXa;Y!KO8WC5;}F4?$wr{5d&d$KZkHP5v9n051ox`OwP97TWk?`57a(q_RKoRGWI*a z4nInMzhM9X0saA=U}{bO<-kUWKTyrl`C*_a)3=*Gnr0}jj&cdA)@*6Umv1*v(t%;9 z;{>TFJF~vFjnb0O&tGqxnj~Fa-k;crDiN=DODm-{S^~@t51T0SYt$r0VBb96DtgEx zzFpHQJJ+$yJ?;mrG;qCY`-9JXVfziG+YKJ5h#1E{344R1tY3XtyEMtQj|}K!MjQy* zZ3$)Dp#r&lCpEq>fvG5b4T3BY(4>-CD^R>|iZQ0V*9cf+VafB0+sQ7%n`{&rPS+IO zBLs$h?{6cv>hfl&7ZTbmpO1r{2P^?GZih4K5nR;``me-|u@Qb7BtAs9_cLTAD#*M@ zEqjS5YMxW*iNh0j=T_xd)`H`Y@fWj5^^pdaS|u9(5GO&Nb09&=5b=n~;6(YozhNh+ z>Z837Xw7pEE`f-jQNtwOXhsq0smvXo8i!816MD_ee;q)r8WOisp?K%$Wgdb~^#xs-xmE_UZ?%4wtC0h^7pX z000jmL7HPp;SVNL1wDVfLbtYcdyIJKl!s?bVE_LA^YTjwId~t7t;m2|_rA-pZp5Cr zSjF}mA|YK^8WC-L_YXA&6q?UXjx=F9M}h}=gTE+H_4rJHw6EPh?N>#{j+H?Ywm?wW zm92U5TboSwc1=$uVF11bM^T3f1~HQNqIqqCbaEmI8|(kBzJdXVb5+UL?>4hQ{1*cmH48skEvu-CXPYoxk? zzK1x*+AYogHnggHhuUyJ+=aQJ^O8_>K8I1&8(<=3=ej;`)--7e*W-*OKy?+g61A0CS!4ZfROF)u`A`q!AG_;X>|ki+gS z^wtfWMi*=>-qwE&jpM?;oRHrkEC!v?F%yul$HyS~_28HT*h4>Fi#iqZ$+`@Ef&3zx zt4hOhkjxrk4tUB=Ov+#V|AKmJ&|RQDUc-lKC7zr-d4RUHIvL`8@b>Tb9#`UF5bgn(lHR>-GU8#V=P1DL7pIjD?su3x~6aRD&v zyXf4=3Y=taoNcZo3$5m>9dKcdA)5ibBor7IscWNLe*}OyuGVE)Ao>pFY69QGii~t8 zUObTCq(VG9H*5DUGdzXju=S3FSloc&U3;(JOJN!d;>OB2Folb!$ zcqi-m-75mXZ+d#CF6V>wDWUstH`!d?0TUB2HOBlaxG*F`^SJ4^S-ADAAE9C9w_UDY zn8ZLuO(HmI&Qad?dtXY<7%Ckyjm5l}BQa!KIh5YkgGEHa3Uv8^YNK1xGIPk3r?^K3 zayaW@R|(dtrWjjf#|u}u$pM?AyOLn77`qI#QG@pIXeGB&aB zCl7nlfD?;b)7V@#udOjRBGLl7z5!F}WFge$bur<97(y1twbNUs%>iJ?$9K-!Uz>D~ z$-^^$Ji}o$<7S_W>b6h#|F&Gnm2!*c@0%Mb1e)4dvnjwZHYN%s!V@>gy!;&Im73=$S9hdKU#^0gsx@n4cZ5r#HZ`vLQ z9?Dc~D5qby6W8{iTPe#t{3YDG8M@x8CI`$0aAMr3}u0+?6$6+d=G|6~p#0KqG@o}ymg@@>! z1<5MBs4^8vG`6o;dMrz-fXRrT2jj@QKRzV`a{!4|eEPba1qy1}((hnk5Kv~7D2af* z_vaUCZM=_b}*z zmD0JZQerEz_Q8SUEKFz>T2oQanCQlW&leSG&GBy-VAeV0#W)2AgLSpFMagfu{X@1;*KU2<_Ht+_=!SWbc45Puw61p zp{c(Pr~dzVk|G`pLj;)%VCk!_l*)&9$o?t4j5*>8_1CQ}WpO-!>fo?4dTWi1JH#ST zHjmx!ZJn8wCMJ@@J%?>SrZ{$Tw)EJ1sFK}`YLgWV3OG`@6Uk%_ayY`T5;iCqQh-e> z>2&B1ly5sh$(MQZi98X^HRPSk-W`Iq2%ZhHzUY_`hfxc^bFdUN6q|a5;)=2O|7(Q! z>5#HtB9>gQFn)7<9g{~YPenHRTI8r2&A6KI#eT$Y(=Y=;J1*|UaPjFkVR<_qpjoo8 zuQ#>X6ceXfQHw65$NF{+7vdbiEj(KU;KsPJlPN$%9Urz`Z$26w*oxf(j9PtQOyDw6 zkP4ExNQ(uDE)FNQl|V>rzV!VFsNP7lZS7Q**B=atXsP1ktsDxTa3&bi^n7dJV zl*w3j31(UA8jFhFw68(+ahGeZuh88g+uevdZ0~F)Ev{{uG6Z|a_uw7gjH3sGn38_V z1|bM1jGBQ@AZt57m!Kt;A^(Sr46f^7@Wz6@>y#bA{_9+hm-(~hX_gLB*vUTY9%rRE z>JTr1;-}aURLTK~T6U!vC(ECxPjCthH<2Ix+n*bOW04+Yt%pRf5P-=;yy1b05EhB? zP42rrtrYrUq%H)s{CsIQ*l0{rkw~dG%-_*r?UUYs30Wd@DhId?dO#cF zP%%tA7$I|EplL8e08D=`j+smw78O>hc($dI!NdALp7s-gKGDhU*B<-Cg4(UFBjWv# zu4BlMaAX=R!f=b+9(?h%LTN87GM{pin?0ufFYt&)%n8_Wx_@%E_+v)yMG4U9wiAG_ ztI91vb1P_Ch@?b!hgt!y7l|_}g!RuX6_#*yAiJUK9v7weY2<%FqwDQ zP#C757Bl2xxCpJeKDc~}!S>o!w?EFvzGh`0`>%}Eq1Mo<*-92Q()lyf^ZwjtzFJVo z4TpAb9@E(&fH=oiZp3_5J_U5Y{{ut6&q{^^?`4$V>t^QG70Jf+l4`IA!OPa8_x3QEkMnaNyj?!uIMsjbFYpf~YxuBg zNoEgNx$TA=r=}+>h~Ta>)Pnp1E@eKwgvh9xcgbV-UG?#!@U*d7fYwPW`7kGh9(?|; zPDMoM-@)+Bsa|N4GuBjpK7FX@2!yJhyunky4Hl>lE2|9FrK5+PQhI2u2wHiIA(^)QAZ1*N7-;jy z9?uo!nigO^jTVsKY>P@Kpb95wFv^W&SLWg`&Nt^RQw+1TY3!<=D%1x+wDsOyR~EPJ zk(qg$2{lVwe@}*S=1ZTkM%+K*wo>oh!Dy>lY=v>s%VQRh3z&-W&?8f~OXC5B~ zCj}X&_9_`!{ucnXrK$bd#8RmAO(zlAS2_9x53Hy1V_(>ze*{fDBr_UXqt{BG^TjM9 zoQL3gB_X6<*gMvS9)ha~%q2C^%gjEkQ)=ZDg+s_HJOQ7((sZ9m-b`?%a81%hly2^* z(i7&`4MuZV|6^cP6rMSkOsXv5x?{zy(sW3H?XzB5nIe4fG?U6ci2reut~koi4vk$! zufMQ(z-%5_L}Zp^muo|_#?TV8!1NowqSbUFQZs7m`0fYgbf@(y6>-MYC!l15a zDmxQKvSb%QsHJw*dCLwt&#D7AK#sSY``vAp`|k9V_;*Fk#~?^h#)@?#qfM4%chnN; zQS|os4zm*wcay@C7QvF~pj~TaQMw9mLo0KhU8n zVqxQB0w%AhWl)zm??#o|?uLCX( zi7t;>dy}m2Au!cFi>z;!2*k8+i^_q@r!rBjTV#p!Z{X`s@(vr^{M)G6=?c|X;vB`h zeo1qfCK)y!fc3zr=KiWwjg-o;gEZrL%tG`Sg^HY>)vZ%@K8N9FdTK25S;Lq@c(okJ zkl8drtm>7usKTRnxyzaqrUPWKP=e5g|395BW-U}=WL@rIPm}02v!YK~x13`36QPe} z^$4)PWrU=7FMt354#7d1g-PKLCQ}7G-|_pX#bft`a2(k7>~Eo#yuD~3Ij4suQdTC- zjw+)eHf06WO?poT_@vO{hmJRhF8@NZy2`4_u<~Uk-m>FAaB-2AS!U+$ zU34AeM8AKpp#f|av4!4E1y^mbX!3hR&!cf@Vf|{*)gTd4S*p7VMYfAN zvqn0~hXbhkJ-DN4;X+Cg#P^o724$C!r-!WC(ZyGH=2Sb7k! zL?5fBo8Z5HU7QCWc!1kx+f=JfWs(m(0VHmf^!`)+_X*Ut|JDR;^cb81?DGl+t0jR%;yR77%!J*w_JT+~diHI(6>B601mGGlA7#>_qn+;&ODeN3qor`RVL@CY?h$)@ijmtudzA6Ftf82QPns$bh&`#Go#cROjWdU$Oy+yat3V5`Zgv7t>Z~dv zcL2Y!8AJ#W!O~N7_C#_`GWe${lWTbS?UoY~I5V7Han0tSr+^*s`%RVl3huEjRvxp$DJaji4iI`Dm@PieLr^O8VJKRXzMw=lV-b8QyTKTj7SiTWhlT4&9hc#BS-x=n_o zknjPWBq7?59Sn*y`rFqit2vOGn=47ph}^4Xz<913u3tB}_23j^&#J-Xv(5(x%u;7# z1F=F-+1W2TOFvPDms6Xw52*aGeJYpYTvB%|7U_SEA_!jRJ_nQ=KKmBc=?;LdYcunT%y=Jo1KA+)u{AdlHYqzU+Zg_uVXCe3CZ)Q&o#nku@&{gI{iJ1+?$7b zvmTnD&N@_N8R* z0MMgShR()= z3%;qbqO(=!LNW1&BZ6)4Y(umzPt$Yz4N$BDy^v^R%83UV_Ma~T-nX&1e zH+{-8npejq=Tz_6P(1v}Q8&<^YSLB$p@6HaM8I z-jPJOULKj)av;}GfJaP0Vbua8L2A~mbR!0rsEM*z%)^Aa^?!Ps50shK-bA}-N`gNM zL~YE8pL9=JI3Rb%hFZO0>i|razI^X~u;0_F2Q?j9PeAk}y*3jadmng5)??o<8kd%p zGv-Y5+WaMzUMR3I`^_dxk>Eg1Tj&?}DNP1C3soueE@Qr^m+XJ+Z+V3NZEFlw zZ92A;7@-Lc@fqPXJ+M_X+l4n4KPIk6*udt%O|YFTj#SLLNt2sZBtp?D@NPi9Q8SN& zNC%>+bnM9sAnCbb)QyRRHjKS4?(9H#ie_4KT6*gSW} zQPcJpQFnvwgJ5}4H%#Hi#b3Ih#Znhlq{~7DdcEcW?D$b;Ul6fA%V-L$!G4MvDbd#L zS}v=PUUDEMsHq)V$_ul>TjE9@FBmG*C!OX5`@-aL<25nhmUA5Dcze~o{vFwB_Z>-# z>Z#hJK3@b;hzM^#h@R_vVB&j#IqxqN2cLY0m}m_)oRU5yy@@QG#N@D*6v!MLT;|0f zGQAk^4cQF!pOa9P#hyn_fJ0nqS6gJcP(zJd!0Mb77)$k2;=BJ1XCGZG<{BxryS5BG zrN-n&Kh(KjMX;(w9BPqc8e!msCvCj@i^+%~e%SL;NoxiLG)4Vko9JJgn3&c%9o!XK zCd?1#*GfpHoJ+m;eSD-w1~>!?A(RM(_0CCU;-oRty!`2&*!I2DJ)oe3g(yDVSKcW*>Gg|eb#D6$fL z4cfHc{aBFwZEjTtwkhC))oT!4`+xNw%wJP>Ef;0Beg~{BGa}^1L61Px8Ow$URR9_j z&1Fpz2zwaz-iLfDfjH;!4GM3kdc~~MP2eZ^B2INz@~og)HhTev`7&wwo3al^4alC5 ziv5g`5(Kpe^}Ouo+@CDZu#_`9wn^S$G97Zz=P`IZOWTbsJ{aXVP`WcY#Kh3(db5;G zz$(d+Rg|eEixt8bpc#Wy)pHNjGigohXiQelt>1{k`OD{=bR?I-!22r|6@!V(IHk$Y zLB794K2cy%LjvcA*`w{G1JA^1TKz}I74(Xkk@FY}mFla6^jXtad&83{sh(wrnuk4Da5PdW-pkCZk+D}ng?Xu{6e8YTZ5V6r5eLg`)R8K{SRWc8M?ezc zmY^4Nqb-8HLvt;pBpj|E*pRLpx-uw~$f$lJZzsVSMQ}|&)s`nil!JHr|C&}#nx?Fs zlHTg_IGz%p#I3N|39~h#AXo?G9!{^JH&N45{QXA*>5&L>WjxT#{}LIm-Br)XV<<*h zi~K{rVu(lgQMOQz(vcC+t5jA0S%AjG0rWd?O-QEE!!1pf#1+blU2lkqfw=!Ky2Iu+ zd*AtfzS0Ot>_A37P0KlB%o;V~{1{W?Sk6uR%c2jPS3++s)1{)6&gEBij+*S2MA^M* zJnPwH@UrJvG>C3@zxPiu;E|WqHoYn+S8|R8i}A*4FMSVEK?^_5y}tq&WF}I0xmJEU zOQt0nbz&RXJkC+K?r^X0cpCH$m1^k{b`#*HQ2q`67oP@Cg7o{K|3Uz7A8(%$5a0W5 zS>~ElaLLVba~MRfH~gI0JR}?L{g{EU$zr_usiK)WO@J&c*QCBSri5*TNJd6Z`{p7sMD>Xb2fdo|*xtN()jlQ&ij(xpuglO2AYc5!|epP(} zD}0_uU?KWvC-QvL;9ZJ^GmcpV%asD7xhMaGKGF*;|945Caz<&sh8@RZ$iDB zV~(qTHKL6$S7`Wz#SvvZX*oQp7I00k_r^2j%$DEfeE_D=7xpuEhxV#D{Y>-O5>XM{ zlF12A!2bPui~3w6Q$#sD7bkeguVN<*KY9|&LUEY*8L)G4f5)^I6)QAyeXU6;Dna2f zp*D{+L3*aXI&M&IV(4oSY@lzut&_9XTtK@+XvPw@o(M&dGjP5=XG{i#Bd@u#S>vS4Ab{W&XTFKXrSeo z^0eur;2<(^a-7k5V&sE8ujFO?vXVrRy?=kt@RFj;GFb48l*B{;;8d?X?&#OhADFF8+a*PX$&lad8n+QoMgYJq?k{d~2k|ALpJ7KIgjT5-tdjn>>MjQV5gS z8XBXLThA3p5J?67paR!7K}VG$rhx%mFv?C+rvH$Mbq8GJ+z1;o4YmT>ZOxE0r-yM29KL@% zzeo=*A2+*1WR>Wcp8e8@!JeZL*qbR(TpAtg3*i61VIAWdOvMXpV3^ovK?W(d(af!z{Vs3}$5j00Ddf zo}_9;+_`BToDjBs3xx<9AmBzI1BfwPLo0nKAXB;$w)w12 zY9lAzhJSF^Y7mjd7%(kD|N~O17^8DXLjlBi`&OD>glu9#VK02T~p} z6O;d|9d=On?suy64pgrO@VBkuvZ+w9G z8To@}i0FtUQkP|)3A#x&G=hxqdu!*nLENRiJ2#KftRg48RF({r9zttkJZ14Pnv5T; zvehJ)7K55fw%crC0C2SsWTkdO;bstqlxO=0L(9LR+7bBR-G1hJz&DiaOC_xNWU&0e zs;P9cMY1j+yIJ%%WJLom$A9rXGtibOO4^LfFo@8r!9|N%5?3>a!XgPVIkfPcO94M$D(di-iW$`k=4KcK3Znys^+fp2XuB;KEHw z{0=hmcMs{j-LOF2O)ov!{BGfs?(O5Ym97fRzpaX#?{{smU$TsC1qpS|)1 z>|n1_;OG$sZ5IGe1+@p}%Eq(NdZ970Vbnc#zu7VPZ{ofaWk+QRy+Z-b_5%A_}1Ya0@gV5OU4Pl7l`Zh#KL8_+wYa5j_? zLFjyarTt34MVZ=Ba6wvl1y27n3Xg_~%dBh$=oCuu!&$u9C(p;mpT1qzBd9P_ zJDwz|!u{sh_g3s7AO(n9jGZ<(*{csVzS*hLERS zl@C)eS4FLMh%kXfgC)vdg(2Q^%*#G#Ce%xr-glNAtz)x-t<fjn}Hxi~-sw(WVa8Wa?}LOW%Gu>@$=F$KVim@~FkZz&L3BdzazY*JkcE z;TnRzFksvG?LW`YvApFebKp$5cvsVkpa2K)TuwwALXFRh)0?>tt1l9(lZRjZL*}=T zkjqsN$07-P;ZLa36hmZ7UB{C*HY8q_^X}dl;`SZbHb4>$Z0*@-c3eW&$*q&V{y|SJ zYL>oU&oT?nr4BV}1Mj!fKjv-FkF^W4P0Sz+Ip$RVN;{`%%=)KukWVu`4%u98DjE8PKaKFh^& zM8HU-^Vdd0bw{Ob7dNGC%YPQi*w$&@;8SXFNzyTt=Q0TDoG<@YPnMFzXUg_~cx_}8 zOshw?Z9aqQ__vgp8bdwP%P&kj4Ih$!Pp8gDxvUvNPEA~r+S=M!o#AD~{mNF5SSAWo z^Xbo711~5-vV;f+c-mv!%M^4jW%Ns~_kG#DU)vDXEEKkZ*>Pk8m8<9C>=egnY6sKb zpsILeNWr{fK5#6?ClKAim6GnXX+$2QW9hDX4a`&(Zb>!wCYv`rTt`h}zAsX;Y2Oa+ z{%Y((+Q~L^a9Zmy3Lb%muyxS(y6YyEqKC^!sgG7K3PWD3*_|Y{ysMjObRQF-QD2J? z0znNDn?(x?>+iFK1UU1f#;EsCPVpPvO**Vlj}aTQLN#x^;Oun2JjVa7fv9ALf$`-8@O+RvU zqd-L==Z8W%2VZntCO0o;yL}003?uN||Kf0_w(ITS*i;iQ4*4rJKF_IMR>+X&CHWi9 zHh9!L8K;c!w%=SYa^FGlTXfrG-dE|1HhK{=tp2!;)A6*VmD}(a@xkJOl3I&F6n8tL z_c%*KZD1oS^nxHvJB^byt?B>kOyxiAkW|VR)@UG-L;jQIVChO5sM&*n3Id-iL)^D* zc)#{|8&VIXzO_*8~vyw z>3G+xEBFC6L2yL_6BUrdTV1x>x9={fgyOH=BZ1bO0oF}kZE)qE7;e;sn<^P|zWE-8 zeGZ3l`?%@@%D{%*I7?~=5AVibx%Ei~J6@YEug}Y=I@=E+;8bq0frf@miGn&AT?0AW z5%3pwh6R7DL@12Hkr?e&J+hJJ0bdT6GOiR#ORXs6U$S-{Kjy>)!ZUeoU$#`rnM52P zztk#Y)%RZ*qaG8Unsx;U8P6ki$qST`uU(Q|*ku8Wgf;O2{q9%9Q~3kS8I^pxjYFFjULzrV;7;>aL%LGrc6qg~KkxaZ{H?PQ9cs-fU6ZX@0(!r&k#$ zgbz8mO_vvGKS4aEU3LkseFX7ZUyTy;XEdbdLs;XT06^@C?K!Z7SCOi+v#Q^eSRs@m zM#g_;92BcJqDC;cSdYnWU$6$UFEEo}-+JKwDM>}iU0pq%EUITin@o6kBZ$-GMhi@E z-@&L0M%%qtNKWP3wDXf-kCD`-%*{S&cafVwVZWmIA&_qa3q?z34)Bv74YPn(Ds-qw zO6tS>DdUO+WqJG#eIg_vPH7?~wlNbtp%%d#39I=oj?6?|_K9uexg#shrjJ+{j-pXb zi|$w_LVK$Oq+_KQ_*IQ!xwbi54L;7gFhS7R`*vSSt@QeDi?ufhkD*-8Mgd|yhRo!s z-}%GN=say6d++2vf%_QirHThd(;4JLwkVTp_bbwf~w&ZM;`b zR**hrCa#Eqx6_VF&sn&9aM@_mD}Mm(=1hi9pY692-`cV@ePC|lOSrBiHzAxmKW{HC zE4)q=(7^`)00FE4p2liMfA0Wr)6gCZq>5*4YK_;zFBvc%3(3h3n-%08w@aqc(5Blq zTmq<@_w>jHnKT5elQ|YQ8jciRcq~G;1f~5ppKF6@hWNeSW(DinOB0?2UR> zeUQitpx$Cf9@{RPWvYABnU{6s9bHi~uQ`iULews^BG<{xqXKxfE3w6C-a4Z#dF+B_ ze;q4eZ(mrv2 zcxw9NFZJNFi0eQ2d~<<@i@~RnrPz)#djc)*B;#(ZPcH5nXrDE&=U+HJ!$1gT424C9 zr#TdIyPxaVh;N`t4VF>5PYFAbb|<@m&hOV*ujKP+upQ!7R6r37UTvLS(udpEMbTgUD@q!4!aH-m zeQ;FD4fNR^Ok^Gu*$ApqXGy)iw$=#=VHAUVhj#`U7aqEtZWuR^oo*9B`{H9gM1Xy4h(8N_(VV~)c}4os5oe3g_!wZjF#X-|PVEWM2{T2XZ*FWrY* zz*#3Ggzl|gvizhP6KMhTQ3=1KU`U47K9}q|*Xy@eb9i>c)=#_tg5vm`gSEtCQ((tp z{Mp2f4B(>m;`skK|JMlpjI!o}^HB#@FOXY(6tZFh>EnwuSHXq0Dvx4nRR3K-v?SC1 zdMw;j{ieCm9ZeK>e4&@5u8j~&C{P(C>qR=p<=HpKj6{j^#+J1oBBry6`% zk7_DTP9xG2xz}rkzVcS}{8-c(b=JXKQZf!5*sHC?8sv*enVjxBQHeU*Y2Lii@Z(*6 z@9po>*F$@6Gzngs_snMgU(Ys2LpTb*ygH24GorRge7 zN1h)B`in9uiCA-3<+%sDv!8Kv{0+NBu?jD@^Eh+Uo-be!Mx-`uc;T_gMi&MkAVWro zY3xnuLtq4KAdAzdk+gIP-ePyi;-95LadkX-9$g>HQ$E?oVfagp zxJgWL$t}zZG0|R>21}1-ynPnD&hXC#l&!GJnWNZdHv09C!aEm+;XyJjnIZtr{E@4w zCE#^>hi;#6E(9W@)rh+;^$)hD%%T(c`1x>9CI8ppSJd$|ylAkJ!sd|yn@6^0dI(Y@ z>VjtF#h$91&I!Yriu4TiNZv2u1Gb>Xobm%|l0$fj8rYqvBAbiblz1lQZeGBp`*8$f zkUtC>vZ0#hDi}@^Oof!%{~U-MZ{a-UoY(PLk70ap?I6vr;wGmrE^o@7TKS=33fY(r z7K^Xpm~Xx{`%ls=ffvAtG1-5esz_?rB-*(IY=dhF4nJhb4xWLkQ}OruM%&CqaDVsi)T%LLUGaVILF5LuF>thIsXLE7Mc8tKL=(FY!bEKygD;R&eRFt38@8@Nv=UBd*ctK*}!A9%%o8B zSw%5VMdS8-3Nj&OV^P&_;Ai{z9s(NM4NIf&I2X8zf<~tL=LH!{$d|^mO*H&*90ju= zvC|aYNmwM`(FOZNK9poto`_>|6e6V)mC<3`JH;f6n3mi8^%zB(F(L;YzifEAhStp= z=#Xs;e?;q@Z=)772;pHhc<#Vq0Q<;*tb@8clYxi24E7rUkp<5Gk@+p+1OzlZ6?fI(fn z`5zqkpmR5`SPMdb&vE=rS6f8vx06odTJr7daHJ*?$$9_oMB1?_Impv210MtyqV|y4 z1)xUX%!gz?0Elw1OYVM%?ja*H?O7O<76Qb&I7$i9Ssjem6V10b^~EKtZ58{ zlQh)i>ek&lWwZP-XHcby6#||&_Hn}G0(EVQP%HJ zXX=g}%gL$41y8X|eucbnLGu)G`(U;`xe{u(3~H3NAlIAoy^mV(_6KZwHvken+!Yc) zkHrv}mxZES&Rsu0BrruNEdIjbDG7U}LuT${ace8Q+wh2Mb-rysUqcK4X&CvWi2WJIKCfmN zI?T*bMltC`_v$x>zOUFf1+`a&S{hAJ=;6}Nz7DA*e^3WXh8x;t#mB#DFyGF8Lz*C- z_C{4KgZn45m6VszN_|J zmE0vi=0D|jO8%@)Kh8t_(_PVYbOk!u|%Mu&b6w%@2Jhw z_!m?g1?&^18`pxRp&Vda@?8IxSty@EQwqDr(1@i@D#UUanf zgb;eEr1B2((9!zS+ez&iC;$KfSplBtYDRzV)a|Dla$Iv+cQ!NfSp4GqYcguoX8)uF zQ^ZakE97;!ejG&16LA`CqrJ>}$mz@J63BK>jbgF1%4gbYq8j!5_=97H;`dGj=r#taij;I7X-$#FO2;$TbB#=r*)ac z1oAn0Dpif>VTEA~%Y=fm{tgMpgAYdq)u3^X96{ig{6vNW?M(=%%6O#pdk>9EqBQlx zKKBD$S2%)RZxa1ryxhF*YgoG!`z&XtrQM7+D|1eJS$HFue93JbxQP=qwU zJJ0|C5*tC9>q+4cCQ}7GN4ja7My4fV25*{+lvb{PDiVCG2*6I6^?F{LzLqoX^PJFK0QC2ar50dORJ zu?&*Nk5^dg;59dhz|`^3we;pALzH=8dq68IEGA;?^#&~l530wRo0lsenOZq`OB_7s zbdysQ*jvjgTjhj!!=@nOXYHADYt;?2d>QWb8VlG1plEs;QYTNDEu3L&v(jL)N&=;j ztCLvn%h0wyrT0x!gC%_&HlL`#pI$Wi_InBZ^|{8R#^ymm;h$AFg}Q%~KnL&;?F=%b zcej@^@7e)h6{QP;TrKFCJ)NwB!96xX=|PYQ&QL^aqZ&sL zJo-7~_z5U8FmA|t(Pr|_cS$|uMPL{*7e&MiD=mND$1_SA940$bY7ocNe(P>`VMI~n zhOHv>w=}AyDyXcY9$kU>q}^R5!5uNvWI-w|Y%2cVfs7-Tl^mTk=Lf>n$%?y)8fV(; zq8-cPZEOg_RtdA=z1S(|k*74je}Cu!!;7QCL|gez!Cqu}S{ixmvUll#IWW0}lEz{F zCC-&UkC>36XfxK06#4a8#M2^Pg6;rc6IyJ8=;`j>JcH>=crJdtlNqg@xdToRJtN_( z*rwrN#%5O}yw8f}$e$G?M1nc((2=Pr7?yns8Lk?LVb4Ti`fZlV`sq}82Mo*e8uImFil%2)8k^M$D!^w-E>Kfp(4VNESq)y{ zd^kiJiLwHVIDQOTyKQVqfuV-Y;TU5!aco)!y|HxYkE~HgBfl%4`cIzhG+w9!sC@aA ziKA$%9klAx{Q5wnF}21|-}Z)l9;K8d;4}<)V=d)Vbh~zF^20eIA3iB50$4_*{3pvN zjD5=kV#}xo?{&&fjw;lJ?-|kx>EKeEM?)a3!D>CJgKeqx;>-G~uM`;4pfa3_K(;f7 zVP57ptZH8vliu!m@yIgoikpZ~z6`6!r7ynj(csi;Q=|YTS(Y$*R>33t=_gx&gMG{! zOfSicWVXo^N#5@_V5fK$mb`SkrTBM&L-Ga~ttdkIJQv*7koZ7G&5NgU!A{8M|qxF zH9Ra^>;b^TXC$ka4X>!l@@0TMaNn<9o!3F2V|OyiOtCe)E!Q1O9>+2u8hB$7akZ#{ub4^jIcwC^x(bYt0 zW7bhUo7GeJE?6gAz$zpjPx+SdjczT#R);k4o7yNu)o9)*OCVeTm0Y6Fan+=Y6w~V_ zG!Hy7TclaYpEDpi8+RhpN>$BHQMNb5MXPLfmGDRB-mDI`J5p;6+v*Zgf<{`G^m*uV zKL?QiT2Up)6@?c$jH-%rG4HB7oLz_Ml8o8Q<*z8fA@%f81rnE_x_)j45Uasd8(J6N zV-ZPNW<1-)VuR}ki|ozr6uTVXWDTVRvy*ubP>18*lL?6hTmLsk z?=ku6;xusJ9(OmKfn=<}y=njw&)qOo1DQZ+-X>MCZ+S~_Q|#+#z&d0VcYQlIY=xC9{?QTW^)= z0dYsvY3Biw8FpkzUcz}UDi9BQS_Ez16CNGprdpdcu%FYm($KGt$0HwrcVoA%<}D>% z-q*~ko_RG{_&~D7(1Hp%AOf*8xx81R%VIg(X#|{XVh+!P!fLafSh&qDF0O4aRajVO zTe)}0iCchZZRFZ1XuQFAF#FLa5pSS#K#fcBvAx{ZuXSIpk6QC2DLUM(!7)DPiy}s& zG4}72WECp}XhSaEU*sykFclUKEL`(jL7I!@M1JxU!6|m$lUevSK|3+PQ6!{B5>$JU zmXpzg=MNsekRRE3bzI6~?AvoMUb5GrMM&U-mi^NRt0XmEWV7QVdIY&65a0r%a$x0U z28qR7dW2E{l_pkqg@-*Wf2s12F(r^@YMHs6dH+ew4xoahtZs3+ zRDD)A1P%~8d_u?1eh~_15kwbkq?ab$*ZvIdj?U|%@<@Xq=-=@r9;sT?e##>|ZVM{Q zb@kKRaXAS}dD~2*Tq)H_=yAP^uTSmOlu~vcpvxQ|@M*m4u_fGs#wFV@F(Bg=~fUBlHD zb?i879d&+cl`BXM1k4Dp;T9%PmxL_l5(&tQf^7+w>hlB^Mm( zLu8=v2$|eXWZ{DMf7lIdr&zsvs8=F#8All8u@X;zu0qu&VWd`0F6Z#flw{W^CQK$EjrYd-h|4zif;KxgkQM>jyd@+Z62s*Mo07L^`#OBKF!_6yjc_=^Q zv5Fob<+==u`H?+#1JK_4PEhV*Kpk(Fbi4HFFiY)+Fi&)KlPLzHh_Qs!+F8{1s+B{c zok4o~t}7cM?aX3QMSGf#x|kkC1zp^XbRC|2scI4Bm>iw#6IV0AOU$v6{}$+n`bcxa zNuiqdDO$*RnPM-Xk~w>oh3}^9;LIGG!hS045umYq2Ug9N*n-+QR?aXg?9yJjMEzEz zS8Wkij}i4TxF~eY&7h4!qG(mF9$mtxs_**_pC3`#eX(3Ty#&ZtP>$1_MV>gvzZ1Uv z1dvd$2A~tc#1pW@86Haz)Ax)1)88q;v!S#@y2r+o`x4L%EnoJ+Jur2>z|Tp_BGE;y*GJ$VFUtC#@*(Z)ly_g;Z!J zI)rdch*29L%fRwd@m@4wPQ({_0+`^qdxy)5Ml#X?tNO{_+$#sh7oxcZmvIP1rB*j^ z^_0fs5-cd$18Ip26SS|Kgv37MU&4rzV~7r@6K+_#FCYEKpB)I`p?*D&DgtFj&lpQZ9YLq^itsYa)l*B1<4tsT-4Aa3t?T)Q6oXd3gnBHie5s5u z2E&+}(o&=9P#;KjM89~*L6z*KnRDGqPj^m`t2rW*f=l?;Z_26up=_uY@;T}9TFGh) z5)M2r+{fm&MiN$PQEEr`L=$JsOCQ*`_0KfXMRit=fm|QBnh&#rzu+5Lz^provUEn{5_fv3Os zA}FTN#Zw|GNEZESM(TNrbL&}e!XdLspB@)ZaUIkmn7e#dUGWjft3xvxX&vCF4ycn? zgeJV|=R}E&(4txZ62pGX4@$HWYX$rz4i4U8%f0NBo;#GBj?R!Q6iw=MT%`gtBQ_TK zVr|n>OVhE-b}8>5)qU_s&yH7s!HCs&%uiAJHrBj9jc-f1;Y<3_9I&ClZJInjA|6Y| z#^lETdb=+eb9w%ZiXMRCT(szyTqgvAQ?xTVzn19^>ooSts@^rfE65CfqN2^zCdMOi z!qv@8nCb734NUar{_1rzYTsUqbmqg-SYb7I6LB+OlwIC8H;8w zGh>K#kb4OMN4r)BF5~@N+C;%Jj>-%=uaEbN8{>P_K9_YeG|EU%9;pses_Y})&U9sQ zoMR!cNi&M9+gO!IFo(5Wex@B70vgbDJbE`gevl=_1CVUwIn3Kxsda2i7>nydy~fC; zu7)5*NwOEg&@lFqc?Yd%w&;5sAut)1)TaA7J!fGMA-qAGp?p8b2*}24nBSd5NDPtb z*F5qB{y7fI3V%CM5a6Y+B$2%z0rlBjq^6yR(C(Qo=FH90HN&Qw_`DT6EqpviKvjG7 zQZtR|q?;3?mT@jKli_mXy?b++!N7>o%Ej7p?1o!ACU8+xEN7zDUF6{@>r{yyY zDku(DT!4{cOv^`|j{Xl$M_cLY?R0@b+!#&F|Hhns)cxl=9h{-cXJC zeHo;+hW+<2*4)dV_1#Soy$<^S$re|*j6$tfDvIVFB-vN0h1ZMYodEt^G>GSs3e2ml z+S;>`JmH3v$&(M?4|p7EBo*4lMq38ydRZoZ&rY4gwA8V+=?oVBdm5Z&xITJ6ILI7Uc%&? zoV7f>01Pi3$Q}a>M13-qn~c{@fwtjSW!Ua|7&e3maz4~{BR>Hb6vEUDV?5b>^f@m0 zJ(W|<0mYmUJ=BJA9gyNNfkx3+ow-lkr$LNp@Jx!|7tk~9rmZo)Jx#G>!ueBu;#V2h zpIY2NQK2ik9*pPsEJ^~iP7Dv}urdr&GinCX{CnU?q2)IBpIuujmkap!$V%+?n;Tx7 zskS{6GtiXVlCmSVf9l98UG;mR9%3y5!EDGUc~ILmCLwSp>F`G4_7FR487{lC`;(P! zIOa9@*abT+;u-nmrc$kSzU`JKA0TRB81_id^8#s8pF0|MHrNfDV&J5XIGZWr zYy++Gq@PJ7+g6{(AqXPftxfg7t6*gO7HUYfjzD6HQ&aPX6HD6X)<@v?N585301Q*)TOOg+ z9`>|^GsPfy|GVi;q7hea-0nw}vz$bMpT|44BwO$%zD=nDE{gws3;^2dwHbQ?L7 z%FpiN2)6>YGCj`UNow`J!7xgsU;5uHd^970|U-8MLpP@_FAdd&qTCOBKwzZ3i~B%qz^%D6rR9M<%|fdPexs9pDC~=>_`O z^L?%AY_Q&!VEUpcMZkcDYRrWvigH)5vd^O|iO_bSGl3ggm#*4UvH%Jd94jW)KU-H< zD21}QZOXy!p=sC!rKn*0DElphBP?MvEHY6+lNb4SS`3@V()<_;PAjCj^fO+5+G&|h zejdHxp=eoJ9}VhM4j{yD{8YLj>$wqzlF!ZY^zvV~oL!$@w%zFQjZVUzZ8N+P)p^y* zLERmuKC`tM_-Pw(9_{<;^#1m-UBqBw2&$r)LwXsCe4!f5ko7}rnB8Q z0jkRiTf<>lG+=)RMUswN&)sPb4;sSp?d+18J6sP#^<)EKq7K3SjTwPxdptS3Ytq6? zK*alVrxLokGN`e!tmf&2yVmM)-qK0fOO@1huQZuC(?M?@(4L$oeOwIUgl|E8juRP& z3s)x>WZl%+DU!=21M|7J1W@vlA7!D0mB_@0Von*yCp6fHg@RtntQUH8n`i8-rnU5_ zz4H6Y6lAxSK`aqOrP(_YF4M`vX(QEkJ@o{igJx2)btJUUNwv|9JD82SFVi5t*a$hR z(bNeL6zE0m{g;D%O1A$g7&lH@effMUI+$GWi>3OH0MG7hFabK5;i|f!W_6fJz%Hg0ra&H+1xL*$bea$tP@9hiam`^(gqkg`s6;;~Y>3k=%g<{L_J`K`U zm}!5N1{3pW3Xn!?7|vi+oR~HF&Q5_c+T{=}nu%hWx`oe& z)aJ7bc}gSejZ?Jn(0FL`u0nezk#FwmEd8rQ*2cTcfg{k+u4UL=3jSm+&X}vBVGCkx zl|a)v#UAIe0cqvGr_p3A?4okHaFj%)z65?QT8^1_$Sm%+9G!&>84UV+{PrE=Pg}3F zbJp!*{2(QjvYG1$Y&rlaLwN2dzi5;_iSY(!KT%ZwxO=U)=iTk}&H^;!goGzob#l+Q z3mR(|gYLZr&Ytd6Vm`a5(3(sH(sskz=$9KVbb4?@3Lz1n)#iDqlK-Ss@(K5Z!=&^T zxgHP%QGI_)+S&jg*=ER$*%058&gxxiA%G~1UN5OV5EZ~u9H!ivEGuu$@bxLrz+>lL zpZ!y6UAIT1qezqT zJaTxN)?IA0!l(Qef{W6<@$ZL)C%qzI*-eaNHjqJ@eq?mxj=Z@v#EpeuUuyYQEZc`Gonk;zgv*)b-GXNE z`*W0#jeRkqBB-(CZ}~!}W)VLYWe;}Lax`c(nEu)n`%Tfn3l;fv6dKl+I z!=u*acYsW&g$}Wu9xApwg4Z86t)R`OZjKx|-e?-a9RycKu5Yayd?;F7+a*{G`hdst7Ub7qV+#A*jQ^HGEh3sc+YSNpYWHB#*=FfgO0%$nG5t{#xAw;BY2ACzA{XB~0|*o1g*&YgH|8=Uz4D zc;esy00CA3pDb!df5+O|X2G>HLer_cr!|&B`93<2GbZDK6F~))2xxQX&C~Sz30Mk6 zK1_ZkzJ%72btgzZ@j`9qi}ACK`*tJJY2+P`L^|06CuhS~uUnNUkQgb4w#{)(7|eV5 z@b&cV>xblZ6z4{K&<>D-e7mTh9r>kcvEV5bIiII2@xy?}lnUIp5n7~7f(UGp$j6Yz zNMfS@dSV`(RIMBhFDB+AnJ_0rG#7G3}IRu@Bw_E$2YX+*hX)eCNG1lu2q$Q{D z5lW1$v;*>fGaIMGMEy=-Aa?2Wiw49)E}rv7l{9_ z+XAn~vIMj6RFe4qDU&3HZa)d1$qR1T;KwYKlc)~6a4T!=(5S=o`)Y{!>suRvc9JOt zjOra9F68|mSPiq27XX;xktU@WJTkibROtJ+i2WFZ2`Q>KF-fX_9wM8nAMqMF8GJa-&&p!bFai6u*-1Q(dDJ*8Xo>S5cLX9n8v^PYm? zvo7uSq8ZK05a8&c+5E?EMB9KPj|J2u7pskPZT%)fNFS@Q;vBqo660iW>_c9F(l*wp z%#6n=n*z+El$f-WxXWX$tXRT|D79;fSoXf*?0h&Zt>C%_7*Lh>Q2z1KXGbZ;74#MF z43E^Yq^B@|Vv?=X!4lNfTrh4}wr(CVR9yUL#hMwT!ng02e+qqWYmjmJxn}AUl4*qz zoUL@JsS`ar?LA)OLM|f!FMSjzZmk7;_@+_sRWSPe|EEE78Au_N?8bI(SM4pLqFE-4rw|J3J`iN#$+ekbhPC9&u>Jr zXJ<-yg>~Yq+6H|i5tIIQw6I{yWrASJ5twxzT1K|_O(18V9Fr(PS7;Dcd{-s`XFzlXM#2Q`w~Kkwkiu|s9md~NjntyXVeU|XH7OtRh+m)!!499K0!fbez{WC!TG@%c2+fry_iaG1s};+F+rdId+mQc^LPN+0 zOTYNRb~=wer}cOt)1niMCe+)=nDHf4IiP_7PP;eGEyUOhthE!l>Ywj|vH|{Ab_xk}do4+;a z4x$6rvU(FgetO^Fs%Y1h-PJb5&!yIWXku>yknS=lv=~1S-8Mv zhG=Vzh)R_LDx{GhW>H=ED~Y(2jc?WJ4umNVV{2sl55J>%9Fz84GZ9kca1ctX6$ zTAsHi(CgSGqnCin=@lyC@6?*gR`*gHzi6N}AQ2^P*!Tutk)jqQV;sN_xWVvdpFfZD!DJ z^Tm-qPP%731v@Z?`J|siy?Rvc=Os%jQvniwFDD&adX{_rq)q{6AG|9700$*)Z*O)0 z003-lc4%Y(00000000000000000Za%018R~0RR920000000000000000RR9100000 z00000000000RR9100000000000000$00000000000000000000000000000000000 z0006200vKVa$#!#003NcYiMKu0009300000000000003100000018R~0000000000 z00000000000RR910000000000000000RR9100000000000000$00001KmY&$@Bjb+ z001y$WOQ=?001auY;$w~0000000062002V&|Ns900RR91018R~00aO40RR9100n7n zWNBdl001CuWN2gn00000000000000006YKy0GC0001( zb98fL00000000010001pVRmCN0000000001000000000000000000000YCuo07w7; z07w7;0000000961000000000000000000000000000000000000000O|Nj6005V~A zV?zOC01f})02yaw01d3!K|%Wg5C8xJ01yBG0|UVELYZIz00wC50004l000aq0003N000a80004K000M^000f@0003}000q+0003r000i)0004$ z000!e000e-00048000pG0007C000i%0003?000iw0004O000oH0006-000#`0005p z000fy00064000jq0004q000if0004i000bJ00057000a200049000sn0004u000K^ z00046000FV0000Kb97^G00000000010000m0000|b!2p50000=ZeeX@NIG0db!}m8 zL1T1jZ*D|FTupCuX>V>rY-weGcaawb6wt3aBKho z0000000000001p%Y;$w~001SabZ>6}0013iVRT^t000310000?VRmLUHZC+UE-^4M D)sy~u literal 0 KcmV+b0RR6000031 diff --git a/axolotl/tests/data/datasets/video_dataset_1/tables/learningData.csv b/axolotl/tests/data/datasets/video_dataset_1/tables/learningData.csv new file mode 100644 index 0000000..3e38c66 --- /dev/null +++ b/axolotl/tests/data/datasets/video_dataset_1/tables/learningData.csv @@ -0,0 +1,3 @@ +d3mIndex,video_file,activity_label +0,Jessica_and_Gregs_Cartwheel_Competition_cartwheel_f_cm_np1_ba_med_1.avi.mp4,cartwheel +1,April_09_brush_hair_u_nm_np1_ba_goo_0.avi.mp4,brush_hair diff --git a/axolotl/tests/data/docker/summing/Dockerfile b/axolotl/tests/data/docker/summing/Dockerfile new file mode 100644 index 0000000..b88d36a --- /dev/null +++ b/axolotl/tests/data/docker/summing/Dockerfile @@ -0,0 +1,17 @@ +FROM registry.gitlab.com/datadrivendiscovery/images/base:ubuntu-bionic-python36 + +EXPOSE 8000/tcp + +RUN apt-get update -q -q && \ + apt-get install --yes --force-yes runit + +COPY ./runsvdir-start /usr/local/sbin/runsvdir-start +COPY ./requirements.txt /requirements.txt +COPY ./code /code +COPY ./etc /etc + +RUN pip3 install -r /requirements.txt && rm -f /requirements.txt + +WORKDIR /code + +ENTRYPOINT ["/usr/local/sbin/runsvdir-start"] diff --git a/axolotl/tests/data/docker/summing/README.md b/axolotl/tests/data/docker/summing/README.md new file mode 100644 index 0000000..e47a451 --- /dev/null +++ b/axolotl/tests/data/docker/summing/README.md @@ -0,0 +1,3 @@ +A Docker image with a simple HTTP service on port 8000: HTTP POST to it expects +a pickled numpy array or a list (of lists) as payload and sums all elements +together into a scalar. diff --git a/axolotl/tests/data/docker/summing/code/server.py b/axolotl/tests/data/docker/summing/code/server.py new file mode 100755 index 0000000..01a0d1d --- /dev/null +++ b/axolotl/tests/data/docker/summing/code/server.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 + +import collections +import logging +import pickle +from http import server + +import pandas + +logger = logging.getLogger(__name__) + + +class HTTPServer(server.HTTPServer): + def handle_error(self, request, client_address): + logger.exception("Exception happened during processing of request from %(client_address)s.", {'client_address': client_address}) + + +class HTTPRequestHandler(server.BaseHTTPRequestHandler): + def do_POST(self): + data = self.rfile.read(int(self.headers['Content-Length'])) + # In the future, we should read here just an ObjectId of data + # in Arrow format in Plasma store and read it from there. + value = pickle.loads(data) + sum = self.sum(value) + result = str(sum).encode('utf-8') + + self.send_response(200) + self.send_header('Content-Length', len(result)) + self.end_headers() + self.wfile.write(result) + + def sum(self, value): + if isinstance(value, pandas.DataFrame): + return sum(self.sum(v) for v in value.itertuples(index=False, name=None)) + if isinstance(value, collections.Iterable): + return sum(self.sum(v) for v in value) + else: + return value + + def log_message(self, message, *args): + logger.info(message, *args) + + +if __name__ == '__main__': + PORT = 8000 + + logging.basicConfig(level=logging.INFO) + + logger.info("Listening on port %(port)s.", {'port': PORT}) + + httpd = HTTPServer(('', PORT), HTTPRequestHandler) + + try: + httpd.serve_forever() + except KeyboardInterrupt: + pass + + httpd.server_close() + logging.info("Server stopped.") diff --git a/axolotl/tests/data/docker/summing/etc/service/summing/run b/axolotl/tests/data/docker/summing/etc/service/summing/run new file mode 100755 index 0000000..5d300c7 --- /dev/null +++ b/axolotl/tests/data/docker/summing/etc/service/summing/run @@ -0,0 +1,4 @@ +#!/bin/bash -e + +cd /code +exec chpst -u nobody:nogroup ./server.py 2>&1 diff --git a/axolotl/tests/data/docker/summing/requirements.txt b/axolotl/tests/data/docker/summing/requirements.txt new file mode 100644 index 0000000..65e7abc --- /dev/null +++ b/axolotl/tests/data/docker/summing/requirements.txt @@ -0,0 +1,2 @@ +pandas==0.21.1 +numpy==1.13.3 diff --git a/axolotl/tests/data/docker/summing/runsvdir-start b/axolotl/tests/data/docker/summing/runsvdir-start new file mode 100755 index 0000000..a8dec8a --- /dev/null +++ b/axolotl/tests/data/docker/summing/runsvdir-start @@ -0,0 +1,5 @@ +#!/bin/sh + +export PATH=/usr/local/bin:/usr/local/sbin:/bin:/sbin:/usr/bin:/usr/sbin:/usr/X11R6/bin + +exec runsvdir -P /etc/service 'log: ...........................................................................................................................................................................................................................................................................................................................................................................................................' diff --git a/axolotl/tests/data/generate-database-datasets.py b/axolotl/tests/data/generate-database-datasets.py new file mode 100755 index 0000000..c0e55e2 --- /dev/null +++ b/axolotl/tests/data/generate-database-datasets.py @@ -0,0 +1,403 @@ +#!/usr/bin/env python3 + +import argparse +import enum +import json +import os +import os.path +import sys + +import numpy +import pandas + +from d3m import container + + +class DatasetType(enum.Enum): + COUNTS_PER_USER = 1 + COMMENTS_PER_POST = 2 + HAS_USER_MADE_COMMENT_ON_POST = 3 + + +def pareto_choice(random_state, array, size): + # 80/20 rule. + a = 1.161 + + p = random_state.pareto(a, size=len(array)) + 1 + p /= numpy.sum(p) + + return random_state.choice(array, size=size, replace=True, p=p) + + +def generate_main_resources(random_state, resources, size): + users_count = size + posts_count = size * 10 + comments_count = size * 10 + + user_ids = numpy.array(range(users_count)) + post_ids = numpy.array(range(posts_count)) + comment_ids = numpy.array(range(comments_count)) + + users = container.DataFrame({ + 'id': user_ids, + 'name': [f'User {i}' for i in range(users_count)], + }) + + posts = container.DataFrame({ + 'id': post_ids, + 'author_id': pareto_choice(random_state, user_ids, posts_count), + 'post': [f'Post {i}' for i in range(posts_count)], + }) + + comments = container.DataFrame({ + 'id': comment_ids, + 'post_id': pareto_choice(random_state, post_ids, comments_count), + 'author_id': pareto_choice(random_state, user_ids, comments_count), + 'comment': [f'Comment {i}' for i in range(comments_count)], + }) + + resources.update({'users': users, 'posts': posts, 'comments': comments}) + + +def generate_learning_data_counts_per_user(random_state, resources): + user_ids = resources['users'].loc[:, 'id'] + users_count = len(user_ids) + posts = resources['posts'] + comments = resources['comments'] + + learning_data = container.DataFrame({ + 'd3mIndex': numpy.array(range(users_count)), + 'user_id': user_ids, + 'posts_count': [(posts.loc[:, 'author_id'] == user_id).sum() for user_id in user_ids], + 'comments_count': [(comments.loc[:, 'author_id'] == user_id).sum() for user_id in user_ids], + }) + + resources['learningData'] = learning_data + + +def generate_learning_data_comments_per_post(random_state, resources): + post_ids = resources['posts'].loc[:, 'id'] + posts_count = len(post_ids) + comments = resources['comments'] + + learning_data = container.DataFrame({ + 'd3mIndex': numpy.array(range(posts_count)), + 'post_id': post_ids, + 'comments_count': [(comments.loc[:, 'post_id'] == post_id).sum() for post_id in post_ids], + }) + + resources['learningData'] = learning_data + + +def generate_learning_data_has_user_made_comment_on_post(random_state, resources): + user_ids = resources['users'].loc[:, 'id'] + post_ids = resources['posts'].loc[:, 'id'] + users_count = len(user_ids) + comments = resources['comments'] + + authors_and_posts = comments.loc[:, ['author_id', 'post_id']] + + authors_and_posts_set = set(authors_and_posts.itertuples(index=False, name=None)) + + data = { + 'user_id': [], + 'post_id': [], + 'made_comment': [], + } + + for author_id, post_id in authors_and_posts.sample(n=users_count, random_state=random_state).itertuples(index=False, name=None): + data['user_id'].append(author_id) + data['post_id'].append(post_id) + data['made_comment'].append('yes') + + for user_id in random_state.permutation(user_ids): + for post_id in random_state.permutation(post_ids): + if (user_id, post_id) in authors_and_posts_set: + continue + + data['user_id'].append(user_id) + data['post_id'].append(post_id) + data['made_comment'].append('no') + + if len(data['user_id']) == 2 * users_count: + break + + if len(data['user_id']) == 2 * users_count: + break + + assert len(data['user_id']) == 2 * users_count + + data = container.DataFrame(data) + data = data.sample(frac=1.0, random_state=random_state).reset_index(drop=True) + + index = container.DataFrame({ + 'd3mIndex': numpy.array(range(len(data))), + }) + + resources['learningData'] = container.DataFrame(pandas.concat([index, data], axis=1)) + + +def update_metadata_main_resources(dataset, dataset_id, dataset_type, size, random_seed): + dataset.metadata = dataset.metadata.update((), { + 'id': dataset_id, + 'name': f"Database dataset of type {dataset_type}", + 'description': f"Database dataset of type {dataset_type}, size {size}, random seed {random_seed}", + }) + + dataset.metadata = dataset.metadata.update_column(0, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'http://schema.org/Integer'], + }, at=('users',)) + dataset.metadata = dataset.metadata.update_column(1, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute', 'http://schema.org/Text'], + }, at=('users',)) + + dataset.metadata = dataset.metadata.update_column(0, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'http://schema.org/Integer'], + }, at=('posts',)) + dataset.metadata = dataset.metadata.update_column(1, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute', 'http://schema.org/Integer'], + 'foreign_key': { + 'type': 'COLUMN', + 'resource_id': 'users', + 'column_index': 0, + }, + }, at=('posts',)) + dataset.metadata = dataset.metadata.update_column(2, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute', 'http://schema.org/Text'], + }, at=('posts',)) + + dataset.metadata = dataset.metadata.update_column(0, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'http://schema.org/Integer'], + }, at=('comments',)) + dataset.metadata = dataset.metadata.update_column(1, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute', 'http://schema.org/Integer'], + 'foreign_key': { + 'type': 'COLUMN', + 'resource_id': 'posts', + 'column_index': 0, + }, + }, at=('comments',)) + dataset.metadata = dataset.metadata.update_column(2, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute', 'http://schema.org/Integer'], + 'foreign_key': { + 'type': 'COLUMN', + 'resource_id': 'users', + 'column_index': 0, + }, + }, at=('comments',)) + dataset.metadata = dataset.metadata.update_column(3, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute', 'http://schema.org/Text'], + }, at=('comments',)) + + +def update_metadata_counts_per_user(dataset): + dataset.metadata = dataset.metadata.update_column(0, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'http://schema.org/Integer'], + }, at=('learningData',)) + dataset.metadata = dataset.metadata.update_column(1, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute', 'http://schema.org/Integer'], + 'foreign_key': { + 'type': 'COLUMN', + 'resource_id': 'users', + 'column_index': 0, + }, + }, at=('learningData',)) + dataset.metadata = dataset.metadata.update_column(2, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/SuggestedTarget', 'http://schema.org/Integer'], + }, at=('learningData',)) + dataset.metadata = dataset.metadata.update_column(3, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/SuggestedTarget', 'http://schema.org/Integer'], + }, at=('learningData',)) + + +def update_metadata_comments_per_post(dataset): + dataset.metadata = dataset.metadata.update_column(0, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'http://schema.org/Integer'], + }, at=('learningData',)) + dataset.metadata = dataset.metadata.update_column(1, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute', 'http://schema.org/Integer'], + 'foreign_key': { + 'type': 'COLUMN', + 'resource_id': 'posts', + 'column_index': 0, + }, + }, at=('learningData',)) + dataset.metadata = dataset.metadata.update_column(2, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/SuggestedTarget', 'http://schema.org/Integer'], + }, at=('learningData',)) + + +def update_metadata_has_user_made_comment_on_post(dataset): + dataset.metadata = dataset.metadata.update_column(0, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'http://schema.org/Integer'], + }, at=('learningData',)) + dataset.metadata = dataset.metadata.update_column(1, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute', 'http://schema.org/Integer'], + 'foreign_key': { + 'type': 'COLUMN', + 'resource_id': 'users', + 'column_index': 0, + }, + }, at=('learningData',)) + dataset.metadata = dataset.metadata.update_column(2, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute', 'http://schema.org/Integer'], + 'foreign_key': { + 'type': 'COLUMN', + 'resource_id': 'posts', + 'column_index': 0, + }, + }, at=('learningData',)) + dataset.metadata = dataset.metadata.update_column(3, { + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/SuggestedTarget', 'http://schema.org/Boolean'], + }, at=('learningData',)) + + +def handler(arguments): + random_state = numpy.random.RandomState(arguments.random_seed) + + resources = {} + generate_main_resources(random_state, resources, arguments.size) + + if arguments.dataset_type == DatasetType.COUNTS_PER_USER: + generate_learning_data_counts_per_user(random_state, resources) + + elif arguments.dataset_type == DatasetType.COMMENTS_PER_POST: + generate_learning_data_comments_per_post(random_state, resources) + + elif arguments.dataset_type == DatasetType.HAS_USER_MADE_COMMENT_ON_POST: + generate_learning_data_has_user_made_comment_on_post(random_state, resources) + + else: + raise ValueError(f"Unknown dataset type: {arguments.dataset_type.name}") + + dataset = container.Dataset(resources, generate_metadata=True) + update_metadata_main_resources(dataset, arguments.dataset_id, arguments.dataset_type.name, arguments.size, arguments.random_seed) + + if arguments.dataset_type == DatasetType.COUNTS_PER_USER: + update_metadata_counts_per_user(dataset) + + elif arguments.dataset_type == DatasetType.COMMENTS_PER_POST: + update_metadata_comments_per_post(dataset) + + elif arguments.dataset_type == DatasetType.HAS_USER_MADE_COMMENT_ON_POST: + update_metadata_has_user_made_comment_on_post(dataset) + + else: + raise ValueError(f"Unknown dataset type: {arguments.dataset_type.name}") + + dataset_output_uri = 'file://' + os.path.join(os.path.abspath(arguments.output_dir), arguments.dataset_id, 'datasetDoc.json') + + dataset.save(dataset_output_uri) + + os.makedirs(os.path.join(os.path.abspath(arguments.output_dir), arguments.problem_id)) + + with open(os.path.join(os.path.abspath(arguments.output_dir), arguments.problem_id, 'problemDoc.json'), 'x', encoding='utf8') as problem_file: + if arguments.dataset_type == DatasetType.COUNTS_PER_USER: + task_keywords = ['regression', 'multivariate'] + metric = { + 'metric': 'rootMeanSquaredError', + } + targets = [ + { + 'targetIndex': 0, + 'resID': 'learningData', + 'colIndex': 2, + 'colName': 'posts_count', + }, + { + 'targetIndex': 1, + 'resID': 'learningData', + 'colIndex': 3, + 'colName': 'comments_count', + }, + ] + elif arguments.dataset_type == DatasetType.COMMENTS_PER_POST: + task_keywords = ['regression', 'univariate'] + metric = { + 'metric': 'rootMeanSquaredError', + } + targets = [ + { + 'targetIndex': 0, + 'resID': 'learningData', + 'colIndex': 2, + 'colName': 'comments_count', + }, + ] + elif arguments.dataset_type == DatasetType.HAS_USER_MADE_COMMENT_ON_POST: + task_keywords = ['classification', 'binary'] + metric = { + 'metric': 'f1', + 'posLabel': 'yes', + } + targets = [ + { + 'targetIndex': 0, + 'resID': 'learningData', + 'colIndex': 3, + 'colName': 'made_comment', + }, + ] + + json.dump({ + 'about': { + 'problemID': arguments.problem_id, + 'problemName': f"Database problem of type {arguments.dataset_type.name}", + 'taskKeywords': task_keywords, + 'problemSchemaVersion': '4.0.0', + }, + 'inputs': { + 'data': [ + { + 'datasetID': arguments.dataset_id, + 'targets': targets, + }, + ], + 'performanceMetrics': [ + metric, + ], + }, + 'expectedOutputs': { + 'predictionsFile': 'predictions.csv', + 'scoresFile': 'scores.csv', + }, + }, problem_file, indent=2) + + +def main(argv): + parser = argparse.ArgumentParser(description="Generate database datasets.") + + parser.add_argument( + '--dataset-type', choices=[dataset_type.name for dataset_type in DatasetType], action='store', required=True, + help="what type of dataset to generate", + ) + parser.add_argument( + '--dataset-id', action='store', required=True, + help="dataset ID to use", + ) + parser.add_argument( + '--problem-id', action='store', required=True, + help="problem ID to use", + ) + parser.add_argument( + '--random-seed', type=int, action='store', default=0, + help="random seed to use", + ) + parser.add_argument( + '--size', type=int, action='store', default=1000, + help="size of dataset to generate", + ) + parser.add_argument( + '--output-dir', action='store', default='.', + help="directory where to store generated dataset and problem, default is current directory", + ) + + arguments = parser.parse_args(argv[1:]) + + arguments.dataset_type = DatasetType[arguments.dataset_type] + + handler(arguments) + + +if __name__ == '__main__': + main(sys.argv) diff --git a/axolotl/tests/data/pipelines/data-preparation-no-split.yml b/axolotl/tests/data/pipelines/data-preparation-no-split.yml new file mode 100644 index 0000000..2a1ff87 --- /dev/null +++ b/axolotl/tests/data/pipelines/data-preparation-no-split.yml @@ -0,0 +1,36 @@ +id: fcaddd7f-39fa-49cc-9f31-c3f326b85557 +schema: https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json +source: + name: Daragh +created: "2019-04-29T04:16:39.642266Z" +context: TESTING +name: Data preparation pipeline - no split version +description: | + Data preparation which just passes all data through unchanged +inputs: + - name: folds + - name: full dataset +outputs: + - name: train datasets + data: steps.0.produce + - name: test datasets + data: steps.0.produce + - name: score datasets + data: steps.0.produce +steps: + # Step 0. + - type: PRIMITIVE + primitive: + id: 48c683ad-da9e-48cf-b3a0-7394dba5e5d2 + version: 0.1.0 + python_path: d3m.primitives.evaluation.no_split_dataset_split.Common + name: No-split tabular dataset splits + arguments: + inputs: + type: CONTAINER + data: inputs.0 + dataset: + type: CONTAINER + data: inputs.1 + outputs: + - id: produce diff --git a/axolotl/tests/data/pipelines/data-preparation-train-test-split.yml b/axolotl/tests/data/pipelines/data-preparation-train-test-split.yml new file mode 100644 index 0000000..5ff94d6 --- /dev/null +++ b/axolotl/tests/data/pipelines/data-preparation-train-test-split.yml @@ -0,0 +1,37 @@ +id: 0168fd77-5310-472e-a755-1bb89edcbffd +schema: https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json +source: + name: Mitar +created: "2019-05-01T23:54:43.334702Z" +context: TESTING +name: Data preparation pipeline - train-test split version +description: | + Data preparation which does train-test split but does not redact anything +inputs: + - name: folds + - name: full dataset +outputs: + - name: train datasets + data: steps.0.produce + - name: test datasets + data: steps.0.produce_score_data + - name: score datasets + data: steps.0.produce_score_data +steps: + # Step 0. + - type: PRIMITIVE + primitive: + id: 3fcc6dc4-6681-4c86-948e-066d14e7d803 + version: 0.1.0 + python_path: d3m.primitives.evaluation.train_score_dataset_split.Common + name: Train-score tabular dataset splits + arguments: + inputs: + type: CONTAINER + data: inputs.0 + dataset: + type: CONTAINER + data: inputs.1 + outputs: + - id: produce + - id: produce_score_data diff --git a/axolotl/tests/data/pipelines/fake_compute_score.yml b/axolotl/tests/data/pipelines/fake_compute_score.yml new file mode 100644 index 0000000..d73546f --- /dev/null +++ b/axolotl/tests/data/pipelines/fake_compute_score.yml @@ -0,0 +1,31 @@ +id: de6443dd-de0a-4000-9b4e-383920019571 +schema: https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json +source: + name: Mitar +created: "2018-07-27T18:04:43.650608Z" +name: Fake scoring pipeline +description: | + A fake scoring pipeline calling fake_score primitive. +inputs: + - name: predictions + - name: score dataset +outputs: + - name: scores + data: steps.0.produce +steps: + # Step 0. + - type: PRIMITIVE + primitive: + id: 1c4d5cbd-163c-424d-8be5-0f267641ae34 + version: 0.1.0 + python_path: d3m.primitives.evaluation.compute_scores.Test + name: Generate fake scores for testing + arguments: + inputs: + type: CONTAINER + data: inputs.0 + score_dataset: + type: CONTAINER + data: inputs.1 + outputs: + - id: produce diff --git a/axolotl/tests/data/pipelines/increment-dataframe.yml b/axolotl/tests/data/pipelines/increment-dataframe.yml new file mode 100644 index 0000000..95d366b --- /dev/null +++ b/axolotl/tests/data/pipelines/increment-dataframe.yml @@ -0,0 +1,55 @@ +id: 1b6184c1-49ba-44f8-b02d-90fb41e65e1a +schema: https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json +created: "2018-10-08 02:56:45.277695Z" +description: | + Test pipeline used in simple-ta3. It expects a DataFrame as input. +inputs: + - name: dataframe +outputs: + - data: steps.2.produce +steps: + - type: PRIMITIVE + primitive: + id: d7e14b12-abeb-42d8-942f-bdb077b4fd37 + version: 0.1.0 + python_path: d3m.primitives.data_transformation.add_semantic_types.Common + name: Add semantic types to columns + arguments: + inputs: + type: CONTAINER + data: inputs.0 + outputs: + - id: produce + hyperparams: + columns: + type: VALUE + data: + - 0 + semantic_types: + type: VALUE + data: + - http://schema.org/Integer + - type: PRIMITIVE + primitive: + id: d510cb7a-1782-4f51-b44c-58f0236e47c7 + version: 0.6.0 + python_path: d3m.primitives.data_transformation.column_parser.Common + name: Parses strings into their types + arguments: + inputs: + type: CONTAINER + data: steps.0.produce + outputs: + - id: produce + - type: PRIMITIVE + primitive: + id: 5c9d5acf-7754-420f-a49f-90f4d9d0d694 + version: 0.1.0 + python_path: d3m.primitives.operator.increment.Test + name: Increment Values + arguments: + inputs: + type: CONTAINER + data: steps.1.produce + outputs: + - id: produce diff --git a/axolotl/tests/data/pipelines/multi-input-test.json b/axolotl/tests/data/pipelines/multi-input-test.json new file mode 100644 index 0000000..627ff86 --- /dev/null +++ b/axolotl/tests/data/pipelines/multi-input-test.json @@ -0,0 +1,85 @@ +{ + "created": "2019-03-14T03:22:12.490865Z", + "id": "f52d3690-31fb-458c-ad4c-4c2be11f5f36", + "inputs": [ + { + "name": "inputs1" + }, + { + "name": "inputs0" + } + ], + "outputs": [ + { + "data": "steps.2.produce", + "name": "output" + } + ], + "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", + "steps": [ + { + "type": "PRIMITIVE", + "primitive": { + "id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", + "name": "Extract a DataFrame from a Dataset", + "python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common", + "version": "0.3.0" + }, + "arguments": { + "inputs": { + "data": "inputs.0", + "type": "CONTAINER" + } + }, + "outputs": [ + { + "id": "produce" + } + ] + }, + { + "type": "PRIMITIVE", + "primitive": { + "id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", + "name": "Extract a DataFrame from a Dataset", + "python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common", + "version": "0.3.0" + }, + "arguments": { + "inputs": { + "data": "inputs.1", + "type": "CONTAINER" + } + }, + "outputs": [ + { + "id": "produce" + } + ] + }, + { + "type": "PRIMITIVE", + "primitive": { + "id": "b8d0d982-fc53-4a3f-8a8c-a284fdd45bfd", + "name": "Random Classifier", + "python_path": "d3m.primitives.classification.random_classifier.Test", + "version": "0.1.0" + }, + "arguments": { + "inputs": { + "data": "steps.0.produce", + "type": "CONTAINER" + }, + "outputs": { + "data": "steps.1.produce", + "type": "CONTAINER" + } + }, + "outputs": [ + { + "id": "produce" + } + ] + } + ] +} diff --git a/axolotl/tests/data/pipelines/random-classifier.yml b/axolotl/tests/data/pipelines/random-classifier.yml new file mode 100644 index 0000000..79c866e --- /dev/null +++ b/axolotl/tests/data/pipelines/random-classifier.yml @@ -0,0 +1,58 @@ +id: b1e92676-f3c4-4c10-a014-b33a55217540 +schema: https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json +source: + name: Mitar +created: "2019-05-04T09:42:27.443844Z" +context: TESTING +name: A random classifier pipeline for tests +description: | + A simple pipeline which runs Random classifier on tabular data. + It does not depend on TrueTarget and is useful just for testing. +inputs: + - name: input dataset +outputs: + - name: predictions + data: steps.2.produce +steps: + # Step 0. + - type: PRIMITIVE + primitive: + id: 4b42ce1e-9b98-4a25-b68e-fad13311eb65 + version: 0.3.0 + python_path: d3m.primitives.data_transformation.dataset_to_dataframe.Common + name: Extract a DataFrame from a Dataset + arguments: + inputs: + type: CONTAINER + data: inputs.0 + outputs: + - id: produce + # Step 1. + - type: PRIMITIVE + primitive: + id: d510cb7a-1782-4f51-b44c-58f0236e47c7 + version: 0.6.0 + python_path: d3m.primitives.data_transformation.column_parser.Common + name: Parses strings into their types + arguments: + inputs: + type: CONTAINER + data: steps.0.produce + outputs: + - id: produce + # Step 2. + - type: PRIMITIVE + primitive: + id: b8d0d982-fc53-4a3f-8a8c-a284fdd45bfd + version: 0.1.0 + python_path: d3m.primitives.classification.random_classifier.Test + name: Random Classifier + arguments: + inputs: + type: CONTAINER + data: steps.1.produce + outputs: + type: CONTAINER + data: steps.1.produce + outputs: + - id: produce diff --git a/axolotl/tests/data/pipelines/random-forest-classifier.yml b/axolotl/tests/data/pipelines/random-forest-classifier.yml new file mode 100644 index 0000000..cfc3062 --- /dev/null +++ b/axolotl/tests/data/pipelines/random-forest-classifier.yml @@ -0,0 +1,74 @@ +id: 9ae0b7f5-613a-4ca2-975f-83cf9317a03c +schema: https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json +source: + name: Mitar +created: "2018-02-28T09:42:27.443844Z" +context: TESTING +name: A random forest classifier pipeline for tests +description: | + A simple pipeline which runs Random Forest classifier on tabular data. + It does not do imputation so not useful as a general pipeline. +inputs: + - name: input dataset +outputs: + - name: predictions + data: steps.3.produce +steps: + # Step 0. + - type: PRIMITIVE + primitive: + id: 4b42ce1e-9b98-4a25-b68e-fad13311eb65 + version: 0.3.0 + python_path: d3m.primitives.data_transformation.dataset_to_dataframe.Common + name: Extract a DataFrame from a Dataset + arguments: + inputs: + type: CONTAINER + data: inputs.0 + outputs: + - id: produce + # Step 1. + - type: PRIMITIVE + primitive: + id: d510cb7a-1782-4f51-b44c-58f0236e47c7 + version: 0.6.0 + python_path: d3m.primitives.data_transformation.column_parser.Common + name: Parses strings into their types + arguments: + inputs: + type: CONTAINER + data: steps.0.produce + outputs: + - id: produce + # Step 2. + - type: PRIMITIVE + primitive: + id: 37c2b19d-bdab-4a30-ba08-6be49edcc6af + version: 0.4.0 + python_path: d3m.primitives.classification.random_forest.Common + name: Random forest classifier + arguments: + inputs: + type: CONTAINER + data: steps.1.produce + outputs: + type: CONTAINER + data: steps.1.produce + outputs: + - id: produce + # Step 3. + - type: PRIMITIVE + primitive: + id: 8d38b340-f83f-4877-baaa-162f8e551736 + version: 0.3.0 + python_path: d3m.primitives.data_transformation.construct_predictions.Common + name: Construct pipeline predictions output + arguments: + inputs: + type: CONTAINER + data: steps.2.produce + reference: + type: CONTAINER + data: steps.1.produce + outputs: + - id: produce diff --git a/axolotl/tests/data/pipelines/random-sample.yml b/axolotl/tests/data/pipelines/random-sample.yml new file mode 100644 index 0000000..abf0756 --- /dev/null +++ b/axolotl/tests/data/pipelines/random-sample.yml @@ -0,0 +1,32 @@ +id: 30e8be4b-3aec-447b-9c36-f9a37c81c3ed +schema: https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json +created: "2018-07-27T15:40:34.012397Z" +inputs: + - name: indices +outputs: + - data: steps.1.produce +steps: + - type: PRIMITIVE + primitive: + id: df3153a1-4411-47e2-bbc0-9d5e9925ad79 + version: 0.1.0 + python_path: d3m.primitives.data_generation.random.Test + name: Random Samples + arguments: + inputs: + type: CONTAINER + data: inputs.0 + outputs: + - id: produce + - type: PRIMITIVE + primitive: + id: 5c9d5acf-7754-420f-a49f-90f4d9d0d694 + version: 0.1.0 + python_path: d3m.primitives.operator.increment.Test + name: Increment Values + arguments: + inputs: + type: CONTAINER + data: steps.0.produce + outputs: + - id: produce diff --git a/axolotl/tests/data/pipelines/semi-standard-pipeline.json b/axolotl/tests/data/pipelines/semi-standard-pipeline.json new file mode 100644 index 0000000..72af83a --- /dev/null +++ b/axolotl/tests/data/pipelines/semi-standard-pipeline.json @@ -0,0 +1,67 @@ +{ + "created": "2019-03-14T03:22:12.490865Z", + "id": "f52d3690-31fb-458c-ad4c-4c2be11f5f36", + "description": "A non-standard pipeline, which still takes a Dataset input, but returns two outputs.", + "inputs": [ + { + "name": "inputs0" + } + ], + "outputs": [ + { + "data": "steps.1.produce", + "name": "output" + }, + { + "data": "steps.0.produce", + "name": "dataframe-output" + } + ], + "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", + "steps": [ + { + "type": "PRIMITIVE", + "primitive": { + "id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", + "name": "Extract a DataFrame from a Dataset", + "python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common", + "version": "0.3.0" + }, + "arguments": { + "inputs": { + "data": "inputs.0", + "type": "CONTAINER" + } + }, + "outputs": [ + { + "id": "produce" + } + ] + }, + { + "type": "PRIMITIVE", + "primitive": { + "id": "b8d0d982-fc53-4a3f-8a8c-a284fdd45bfd", + "name": "Random Classifier", + "python_path": "d3m.primitives.classification.random_classifier.Test", + "version": "0.1.0" + }, + "arguments": { + "inputs": { + "data": "steps.0.produce", + "type": "CONTAINER" + }, + "outputs": { + "data": "steps.0.produce", + "type": "CONTAINER" + } + }, + "outputs": [ + { + "id": "produce" + } + ] + } + ] +} diff --git a/axolotl/tests/data/primitives/setup.cfg b/axolotl/tests/data/primitives/setup.cfg new file mode 100644 index 0000000..b6a8bc3 --- /dev/null +++ b/axolotl/tests/data/primitives/setup.cfg @@ -0,0 +1,25 @@ +[pycodestyle] +max-line-length = 200 + +[metadata] +description-file = README.md + +[mypy] +warn_redundant_casts = True +# TODO: Enable back once false positives are fixed. +# See: https://github.com/python/mypy/issues/4412 +#warn_unused_ignores = True +warn_unused_configs = True +disallow_untyped_defs = True + +# TODO: Remove once this is fixed: https://github.com/python/mypy/issues/4300 +[mypy-d3m.container.list] +ignore_errors = True + +# TODO: Remove once this is fixed: https://github.com/python/mypy/issues/4300 +[mypy-d3m.metadata.hyperparams] +ignore_errors = True + +# TODO: Remove once this is fixed: https://github.com/python/mypy/pull/4384#issuecomment-354033177 +[mypy-d3m.primitive_interfaces.distance] +ignore_errors = True diff --git a/axolotl/tests/data/primitives/setup.py b/axolotl/tests/data/primitives/setup.py new file mode 100644 index 0000000..8eafc35 --- /dev/null +++ b/axolotl/tests/data/primitives/setup.py @@ -0,0 +1,42 @@ +import os +from setuptools import setup, find_packages + +PACKAGE_NAME = 'test_primitives' + + +def read_package_variable(key): + """Read the value of a variable from the package without importing.""" + module_path = os.path.join(PACKAGE_NAME, '__init__.py') + with open(module_path) as module: + for line in module: + parts = line.strip().split(' ') + if parts and parts[0] == key: + return parts[-1].strip("'") + raise KeyError("'{0}' not found in '{1}'".format(key, module_path)) + + +setup( + name=PACKAGE_NAME, + version=read_package_variable('__version__'), + description='Test primitives', + author=read_package_variable('__author__'), + packages=find_packages(exclude=['contrib', 'docs', 'tests*']), + install_requires=[ + 'd3m', + ], + url='https://gitlab.com/datadrivendiscovery/tests-data', + keywords='d3m_primitive', + entry_points={ + 'd3m.primitives': [ + 'regression.monomial.Test = test_primitives.monomial:MonomialPrimitive', + 'operator.increment.Test = test_primitives.increment:IncrementPrimitive', + 'operator.sum.Test = test_primitives.sum:SumPrimitive', + 'data_generation.random.Test = test_primitives.random:RandomPrimitive', + 'operator.primitive_sum.Test = test_primitives.primitive_sum:PrimitiveSumPrimitive', + 'operator.null.TransformerTest = test_primitives.null:NullTransformerPrimitive', + 'operator.null.UnsupervisedLearnerTest = test_primitives.null:NullUnsupervisedLearnerPrimitive', + 'classification.random_classifier.Test = test_primitives.random_classifier:RandomClassifierPrimitive', + 'evaluation.compute_scores.Test = test_primitives.fake_score:FakeScorePrimitive', + ], + }, +) diff --git a/axolotl/tests/data/primitives/test_primitives/__init__.py b/axolotl/tests/data/primitives/test_primitives/__init__.py new file mode 100644 index 0000000..bce4e13 --- /dev/null +++ b/axolotl/tests/data/primitives/test_primitives/__init__.py @@ -0,0 +1,2 @@ +__author__ = 'Test team' +__version__ = '0.1.0' diff --git a/axolotl/tests/data/primitives/test_primitives/abs_sum.py b/axolotl/tests/data/primitives/test_primitives/abs_sum.py new file mode 100644 index 0000000..0b130dc --- /dev/null +++ b/axolotl/tests/data/primitives/test_primitives/abs_sum.py @@ -0,0 +1,80 @@ +import os.path +import typing + +import numpy as np # type: ignore + +from d3m import container, utils, exceptions +from d3m.metadata import hyperparams, base as metadata_base +from d3m.primitive_interfaces import base, transformer + +from . import __author__, __version__ + +__all__ = ('AbsSumPrimitive',) + + +Inputs = typing.Union[container.ndarray, container.DataFrame, container.List] +Outputs = container.List + + +class Hyperparams(hyperparams.Hyperparams): + """ + No hyper-parameters for this primitive. + """ + + pass + + +class AbsSumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + A primitive that sums the absolute value of the elements in a container and returns a list with a single value: the sum. + """ + + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata({ + 'id': '24de67db-aa08-4b66-85b2-b7be97154cf6', + 'version': __version__, + 'name': "Absolute Sum Test Primitive", + 'keywords': ['test primitive'], + 'source': { + 'name': __author__, + 'contact': 'mailto:author@example.com', + 'uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/abs_sum.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + 'location_uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/abs_sum.py'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + ], + 'python_path': 'd3m.primitives.operator.sum.AbsTest', + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.COMPUTER_ALGEBRA, + ], + 'primitive_family': metadata_base.PrimitiveFamily.OPERATOR, + }) + + def __init__(self, *, hyperparams: Hyperparams) -> None: + super().__init__(hyperparams=hyperparams) + + @base.singleton + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + result = np.abs(self._convert_value(inputs)).sum() + outputs = container.List((result,), generate_metadata=True) + return base.CallResult(outputs) + + def _convert_value(self, value: typing.Any) -> typing.Union[np.ndarray, typing.List, typing.Any]: + if isinstance(value, container.ndarray): + return value.view(np.ndarray) + elif isinstance(value, container.List): + return [self._convert_value(v) for v in value] + elif isinstance(value, container.DataFrame): + return value.values + else: + raise exceptions.InvalidArgumentTypeError('Input value must be an instance of `container.ndarray`, `container.List`, or `container.DataFrame.') diff --git a/axolotl/tests/data/primitives/test_primitives/container_hyperparam.py b/axolotl/tests/data/primitives/test_primitives/container_hyperparam.py new file mode 100644 index 0000000..10b84a5 --- /dev/null +++ b/axolotl/tests/data/primitives/test_primitives/container_hyperparam.py @@ -0,0 +1,68 @@ +import os.path +import typing + +import numpy as np # type: ignore + +from d3m import container, utils +from d3m.metadata import hyperparams, base as metadata_base +from d3m.primitive_interfaces import base, transformer + +from . import __author__, __version__ + +__all__ = ('ContainerHyperparamPrimitive',) + + +Inputs = container.DataFrame +Outputs = container.DataFrame + + +class Hyperparams(hyperparams.Hyperparams): + dataframe = hyperparams.Hyperparameter[container.DataFrame]( + default=container.DataFrame(0, index=np.arange(10), columns=['Values'], generate_metadata=True), + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + description='The values to be added to input, element-wise' + ) + + +class ContainerHyperparamPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + A primitive which uses a hyperparam of type container_argument. + """ + + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata({ + 'id': '442b600e-1144-11e9-ab14-d663bd873d93', + 'version': __version__, + 'name': "Container Hyperparam Tester", + 'keywords': ['test primitive'], + 'source': { + 'name': __author__, + 'contact': 'mailto:author@example.com', + 'uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/container_hyperparam.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + 'location_uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/container_hyperparam.py'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + ], + 'python_path': 'd3m.primitives.operator.sum.ContainerHyperparamTest', + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.COMPUTER_ALGEBRA, + ], + 'primitive_family': metadata_base.PrimitiveFamily.OPERATOR, + }) + + def __init__(self, *, hyperparams: Hyperparams) -> None: + super().__init__(hyperparams=hyperparams) + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + outputs = inputs + self.hyperparams['dataframe'] + return base.CallResult(outputs) diff --git a/axolotl/tests/data/primitives/test_primitives/data_hyperparam.py b/axolotl/tests/data/primitives/test_primitives/data_hyperparam.py new file mode 100644 index 0000000..dea065d --- /dev/null +++ b/axolotl/tests/data/primitives/test_primitives/data_hyperparam.py @@ -0,0 +1,66 @@ +import os.path +import typing + +from d3m import container, utils +from d3m.metadata import hyperparams, base as metadata_base +from d3m.primitive_interfaces import base, transformer + +from . import __author__, __version__ + +__all__ = ('DataHyperparamPrimitive',) + + +Inputs = container.DataFrame +Outputs = container.DataFrame + + +class Hyperparams(hyperparams.Hyperparams): + value = hyperparams.Hyperparameter[float]( + default=1, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description='The value to be added to input' + ) + + +class DataHyperparamPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + A primitive that requires a data argument hyperparam. + """ + + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata({ + 'id': '98582315-33f9-4fe9-91a4-5d768a123aa8', + 'version': __version__, + 'name': "Data Hyperparam Test Primitive", + 'keywords': ['test primitive'], + 'source': { + 'name': __author__, + 'contact': 'mailto:author@example.com', + 'uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/data_hyperparam.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + 'location_uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/data_hyperparam.py'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + ], + 'python_path': 'd3m.primitives.operator.sum.DataHyperparamTest', + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.COMPUTER_ALGEBRA, + ], + 'primitive_family': metadata_base.PrimitiveFamily.OPERATOR, + }) + + def __init__(self, *, hyperparams: Hyperparams) -> None: + super().__init__(hyperparams=hyperparams) + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + outputs = inputs.add(self.hyperparams['value']) + return base.CallResult(outputs) diff --git a/axolotl/tests/data/primitives/test_primitives/fail.py b/axolotl/tests/data/primitives/test_primitives/fail.py new file mode 100644 index 0000000..10df35b --- /dev/null +++ b/axolotl/tests/data/primitives/test_primitives/fail.py @@ -0,0 +1,106 @@ +import os.path +import typing + +from d3m import container, exceptions, utils +from d3m.metadata import hyperparams, base as metadata_base +from d3m.primitive_interfaces import base, transformer + +from . import __author__, __version__ + +__all__ = ('FailPrimitive',) + + +Inputs = container.DataFrame +Outputs = container.DataFrame + + +class Hyperparams(hyperparams.Hyperparams): + + method_to_fail = hyperparams.Enumeration[str]( + values=['__init__', 'set_training_data', 'fit', 'produce', 'none'], + default='produce', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + description="The name of the method the user wants this primitive to fail on.", + ) + + +class IntentionalError(Exception): + """ + Exception raised for testing purposes. + + Parameters + ---------- + class_name : str + Name of the class where the error occurred. + method_name : str + Name of the method where the error occurred. + """ + + def __init__(self, class_name: str, method_name: str) -> None: + message = f"This is an exception raised by a(n) {class_name} object in the {method_name} method" + super().__init__(message) + + +class FailPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + A primitive which fails on the requested method (given as hyper-parameter). + + Moreover, primitive does not correctly preserve state so if you pickle + and unpickle it, it does not seen itself as fitted anymore. + """ + + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata({ + 'id': 'd6dfbefa-0fb8-11e9-ab14-d663bd873d93', + 'version': __version__, + 'name': "Failure Tester", + 'keywords': ['test primitive'], + 'source': { + 'name': __author__, + 'contact': 'mailto:author@example.com', + 'uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/fail.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + 'location_uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/fail.py'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + ], + 'python_path': 'd3m.primitives.operator.null.FailTest', + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.IDENTITY_FUNCTION, + ], + 'primitive_family': metadata_base.PrimitiveFamily.OPERATOR, + }) + + def __init__(self, *, hyperparams: Hyperparams) -> None: + super().__init__(hyperparams=hyperparams) + self._conditional_fail('__init__') + self._fitted = False + + def _conditional_fail(self, method_name: str) -> None: + if self.hyperparams['method_to_fail'] == method_name: + raise IntentionalError(self.__class__.__name__, method_name) + + def set_training_data(self) -> None: # type: ignore + self._conditional_fail('set_training_data') + self._fitted = False + super().set_training_data() + + def fit(self, *, timeout: float = None, iterations: int = None) -> base.CallResult[None]: + self._conditional_fail('fit') + self._fitted = True + return super().fit(timeout=timeout, iterations=iterations) + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + self._conditional_fail('produce') + if not self._fitted: + raise exceptions.PrimitiveNotFittedError("Primitive is not fitted.") + return base.CallResult(inputs) diff --git a/axolotl/tests/data/primitives/test_primitives/fake_score.py b/axolotl/tests/data/primitives/test_primitives/fake_score.py new file mode 100644 index 0000000..f8b1080 --- /dev/null +++ b/axolotl/tests/data/primitives/test_primitives/fake_score.py @@ -0,0 +1,100 @@ +import os.path +import typing + +from d3m import container, utils +from d3m.metadata import base as metadata_base, hyperparams, problem +from d3m.primitive_interfaces import base, transformer + +from . import __author__, __version__ + +__all__ = ('FakeScorePrimitive',) + +Inputs = container.DataFrame +Outputs = container.DataFrame + + +class Hyperparams(hyperparams.Hyperparams): + pass + + +class FakeScorePrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + A primitive that takes a DataFrame and returns hard-coded fake accuracy scores. + """ + + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata( + { + 'id': '1c4d5cbd-163c-424d-8be5-0f267641ae34', + 'version': __version__, + 'name': "Generate fake scores for testing", + 'source': { + 'name': __author__, + 'contact': 'mailto:author@example.com', + 'uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/fake_score.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + 'location_uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/fake_score.py'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + ], + 'python_path': 'd3m.primitives.evaluation.compute_scores.Test', + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.ACCURACY_SCORE, + ], + 'primitive_family': metadata_base.PrimitiveFamily.EVALUATION, + }, + ) + + def produce( # type: ignore + self, *, inputs: Inputs, score_dataset: container.Dataset, timeout: float = None, + iterations: int = None, + ) -> base.CallResult[Outputs]: + outputs: typing.Dict[str, typing.List] = { + 'metric': [problem.PerformanceMetric.ACCURACY.name], + 'value': [1.0], + 'normalized': [1.0], + } + + results = container.DataFrame(data=outputs, columns=list(outputs.keys()), generate_metadata=True) + + results.metadata = results.metadata.add_semantic_type( + (metadata_base.ALL_ELEMENTS, 0), + 'https://metadata.datadrivendiscovery.org/types/PrimaryMultiKey', + ) + results.metadata = results.metadata.add_semantic_type( + (metadata_base.ALL_ELEMENTS, 1), + 'https://metadata.datadrivendiscovery.org/types/Score', + ) + results.metadata = results.metadata.add_semantic_type( + (metadata_base.ALL_ELEMENTS, 2), + 'https://metadata.datadrivendiscovery.org/types/Score', + ) + + return base.CallResult(results) + + def multi_produce( # type: ignore + self, *, produce_methods: typing.Sequence[str], inputs: Inputs, + score_dataset: container.Dataset, timeout: float = None, iterations: int = None, + ) -> base.MultiCallResult: + return self._multi_produce( + produce_methods=produce_methods, timeout=timeout, iterations=iterations, + inputs=inputs, score_dataset=score_dataset, + ) + + def fit_multi_produce( # type: ignore + self, *, produce_methods: typing.Sequence[str], inputs: Inputs, + score_dataset: container.Dataset, timeout: float = None, iterations: int = None + ) -> base.MultiCallResult: + return self._fit_multi_produce( + produce_methods=produce_methods, timeout=timeout, iterations=iterations, + inputs=inputs, score_dataset=score_dataset, + ) diff --git a/axolotl/tests/data/primitives/test_primitives/file_reader.py b/axolotl/tests/data/primitives/test_primitives/file_reader.py new file mode 100644 index 0000000..178ff18 --- /dev/null +++ b/axolotl/tests/data/primitives/test_primitives/file_reader.py @@ -0,0 +1,71 @@ +import os +import typing + +import numpy # type: ignore +import frozendict # type: ignore + +from d3m import container, utils as d3m_utils +from d3m.base import primitives +from d3m.metadata import base as metadata_base + +from . import __author__, __version__ + +__all__ = ('DummyImageReaderPrimitive',) + + +class DummyImageReaderPrimitive(primitives.FileReaderPrimitiveBase): + """ + A primitive which pretends to read columns referencing image files, + but returns just the basename of the file path as dummy value of the file, + wrapped inside a 1x1 ndarray. + """ + + _supported_media_types = ( + 'image/jpeg', + 'image/png', + ) + _file_structural_type = container.ndarray + _file_semantic_types = ('http://schema.org/ImageObject',) + + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata( + { + 'id': '4f6e56b6-4ece-444b-9354-5a2b4e575a13', + 'version': __version__, + 'name': 'Dummy image reader', + 'python_path': 'd3m.primitives.data_preprocessing.image_reader.Test', + 'keywords': ['image', 'reader', 'jpg', 'png'], + 'source': { + 'name': __author__, + 'contact': 'mailto:author@example.com', + 'uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/file_reader.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=d3m_utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.FILE_MANIPULATION, + ], + 'supported_media_types': _supported_media_types, + 'primitive_family': metadata_base.PrimitiveFamily.DATA_PREPROCESSING, + } + ) + + def _read_fileuri(self, metadata: frozendict.FrozenOrderedDict, fileuri: str) -> container.ndarray: + image_array = container.ndarray(numpy.array([[fileuri.split('/')[-1]]], dtype=object), { + 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, + 'structural_type': container.ndarray, + }, generate_metadata=False) + + image_array.metadata = image_array.metadata.update((), { + 'image_reader_metadata': { + 'foobar': 42, + }, + }) + + return image_array diff --git a/axolotl/tests/data/primitives/test_primitives/increment.py b/axolotl/tests/data/primitives/test_primitives/increment.py new file mode 100644 index 0000000..25a7cf2 --- /dev/null +++ b/axolotl/tests/data/primitives/test_primitives/increment.py @@ -0,0 +1,99 @@ +import os.path +import typing + +from d3m import container, utils +from d3m.metadata import base as metadata_base, hyperparams +from d3m.primitive_interfaces import base, transformer + +from . import __author__, __version__ + +__all__ = ('IncrementPrimitive',) + + +# It is useful to define these names, so that you can reuse it both +# for class type arguments and method signatures. +Inputs = container.DataFrame +Outputs = container.DataFrame + + +class Hyperparams(hyperparams.Hyperparams): + # We can provide a type argument to a Hyperparameter class to signal which + # structural type the Hyperparameter is. If you do not provide it, it is + # automatically detected. + # This is not a tuning parameter but a control parameter which should be decided + # once during pipeline building but then fixed and not changed during hyper-parameter + # tuning. + amount = hyperparams.Hyperparameter[float](default=1, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']) + + +class IncrementPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + # It is important to provide a docstring because this docstring is used as a description of + # a primitive. Some callers might analyze it to determine the nature and purpose of a primitive. + + """ + A primitive which increments each value by a fixed amount, by default 1. + """ + + # This should contain only metadata which cannot be automatically determined from the code. + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata({ + # Simply an UUID generated once and fixed forever. Generated using "uuid.uuid4()". + 'id': '5c9d5acf-7754-420f-a49f-90f4d9d0d694', + 'version': __version__, + 'name': "Increment Values", + # Keywords do not have a controlled vocabulary. Authors can put here whatever they find suitable. + 'keywords': ['test primitive'], + 'source': { + 'name': __author__, + 'contact': 'mailto:author@example.com', + 'uris': [ + # Unstructured URIs. Link to file and link to repo in this case. + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/increment.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + # A list of dependencies in order. These can be Python packages, system packages, or Docker images. + # Of course Python packages can also have their own dependencies, but sometimes it is necessary to + # install a Python package first to be even able to run setup.py of another package. Or you have + # a dependency which is not on PyPi. + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + # URIs at which one can obtain code for the primitive, if available. + 'location_uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/increment.py'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + ], + # The same path the primitive is registered with entry points in setup.py. + 'python_path': 'd3m.primitives.operator.increment.Test', + # Choose these from a controlled vocabulary in the schema. If anything is missing which would + # best describe the primitive, make a merge request. + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.COMPUTER_ALGEBRA, + ], + 'primitive_family': metadata_base.PrimitiveFamily.OPERATOR, + # A metafeature about preconditions required for this primitive to operate well. + 'preconditions': [ + # Instead of strings you can also use available Python enumerations. + metadata_base.PrimitivePrecondition.NO_MISSING_VALUES, + metadata_base.PrimitivePrecondition.NO_CATEGORICAL_VALUES, + ] + }) + + def __init__(self, *, hyperparams: Hyperparams) -> None: + super().__init__(hyperparams=hyperparams) + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + # If "inputs" is container.DataFrame, then also result is. + outputs = typing.cast(Outputs, inputs + float(self.hyperparams['amount'])) + + # Metadata might not be preserved through operations, so we make sure and update metadata ourselves. + # Because just values changed (but not structure) and the primitive is a transformation, we can reuse + # inputs metadata, but generate new metadata for new value to assure everything is matching. + outputs.metadata = inputs.metadata.generate(outputs) + + # Wrap it into default "CallResult" object: we are not doing any iterations. + return base.CallResult(outputs) diff --git a/axolotl/tests/data/primitives/test_primitives/monomial.py b/axolotl/tests/data/primitives/test_primitives/monomial.py new file mode 100644 index 0000000..3bf7fa6 --- /dev/null +++ b/axolotl/tests/data/primitives/test_primitives/monomial.py @@ -0,0 +1,127 @@ +import os.path +import typing + +from d3m import container, utils +from d3m.metadata import base as metadata_base, hyperparams, params +from d3m.primitive_interfaces import base, supervised_learning + +from . import __author__, __version__ + +__all__ = ('MonomialPrimitive',) + + +# It is useful to define these names, so that you can reuse it both +# for class type arguments and method signatures. +Inputs = container.List +Outputs = container.List + + +class Params(params.Params): + a: float + + +class Hyperparams(hyperparams.Hyperparams): + bias = hyperparams.Hyperparameter(default=0.0, semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']) + + +class MonomialPrimitive(supervised_learning.SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): + # It is important to provide a docstring because this docstring is used as a description of + # a primitive. Some callers might analyze it to determine the nature and purpose of a primitive. + + """ + A primitive which fits output = a * input. + """ + + # This should contain only metadata which cannot be automatically determined from the code. + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata({ + # Simply an UUID generated once and fixed forever. Generated using "uuid.uuid4()". + 'id': '4a0336ae-63b9-4a42-860e-86c5b64afbdd', + 'version': __version__, + 'name': "Monomial Regressor", + # Keywords do not have a controlled vocabulary. Authors can put here whatever they find suitable. + 'keywords': ['test primitive'], + 'source': { + 'name': __author__, + 'contact': 'mailto:author@example.com', + 'uris': [ + # Unstructured URIs. Link to file and link to repo in this case. + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/monomial.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + # A list of dependencies in order. These can be Python packages, system packages, or Docker images. + # Of course Python packages can also have their own dependencies, but sometimes it is necessary to + # install a Python package first to be even able to run setup.py of another package. Or you have + # a dependency which is not on PyPi. + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + # URIs at which one can obtain code for the primitive, if available. + 'location_uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/monomial.py'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + ], + # The same path the primitive is registered with entry points in setup.py. + 'python_path': 'd3m.primitives.regression.monomial.Test', + # Choose these from a controlled vocabulary in the schema. If anything is missing which would + # best describe the primitive, make a merge request. + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.LINEAR_REGRESSION, + ], + 'primitive_family': metadata_base.PrimitiveFamily.REGRESSION, + }) + + # Random seed is not needed, but we need it in tests to test which random seed was passed to a primitive. + def __init__(self, *, hyperparams: Hyperparams, random_seed: int = 0) -> None: + super().__init__(hyperparams=hyperparams, random_seed=random_seed) + + self._a: float = 0 + self._training_inputs: Inputs = None + self._training_outputs: Outputs = None + self._fitted: bool = False + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + if self._a is None: + raise ValueError("Calling produce before fitting.") + + # We compute the result. We use (...) here and not [...] to create a + # generator and not a list which would then just be copied into "List". + result = (self._a * input + self.hyperparams['bias'] for input in inputs) + + # We convert a regular list to container list which supports metadata attribute. + # Even if the structure of outputs is the same as inputs, conceptually, outputs are different, + # they are new data. So we do not reuse metadata from inputs but generate new metadata. + outputs: container.List = container.List(result, generate_metadata=True) + + # Wrap it into default "CallResult" object: we are not doing any iterations. + return base.CallResult(outputs) + + def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: + self._training_inputs = inputs + self._training_outputs = outputs + self._fitted = False + + def fit(self, *, timeout: float = None, iterations: int = None) -> base.CallResult[None]: + if self._fitted: + return base.CallResult(None) + + if not self._training_inputs or not self._training_inputs: + raise ValueError("Missing training data.") + + quotients = [output / input for output, input in zip(self._training_outputs, self._training_inputs) if input != 0] + self._a = sum(quotients) / len(quotients) + self._fitted = True + + return base.CallResult(None) + + def get_params(self) -> Params: + # You can pass a dict or keyword arguments. + return Params(a=self._a) + + def set_params(self, *, params: Params) -> None: + # Params are just a fancy dict. + self._a = params['a'] diff --git a/axolotl/tests/data/primitives/test_primitives/multi_data_hyperparam.py b/axolotl/tests/data/primitives/test_primitives/multi_data_hyperparam.py new file mode 100644 index 0000000..3bcbc3d --- /dev/null +++ b/axolotl/tests/data/primitives/test_primitives/multi_data_hyperparam.py @@ -0,0 +1,70 @@ +import os.path +import typing + +import numpy as np # type: ignore + +from d3m import container, utils +from d3m.metadata import hyperparams, base as metadata_base +from d3m.primitive_interfaces import base, transformer + +from . import __author__, __version__ + +__all__ = ('MultiDataHyperparamPrimitive',) + + +Inputs = container.DataFrame +Outputs = container.DataFrame + + +class Hyperparams(hyperparams.Hyperparams): + values = hyperparams.Hyperparameter[typing.List[np.float64]]( # type: ignore + default=[np.float64(1)], + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description='The values to be added to input' + ) + + +class MultiDataHyperparamPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + A primitive that requires a data argument hyperparam. + """ + + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata({ + 'id': 'ad8b8a35-9023-4f24-a628-a8f41eb2e3b0', + 'version': __version__, + 'name': "Multi Data Hyperparam Test Primitive", + 'keywords': ['test primitive'], + 'source': { + 'name': __author__, + 'contact': 'mailto:author@example.com', + 'uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/multi_data_hyperparam.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + 'location_uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/multi_data_hyperparam.py'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + ], + 'python_path': 'd3m.primitives.operator.sum.MultiDataHyperparamTest', + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.COMPUTER_ALGEBRA, + ], + 'primitive_family': metadata_base.PrimitiveFamily.OPERATOR, + }) + + def __init__(self, *, hyperparams: Hyperparams) -> None: + super().__init__(hyperparams=hyperparams) + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + outputs = inputs + for value in self.hyperparams['values']: + outputs = outputs + value + return base.CallResult(outputs) diff --git a/axolotl/tests/data/primitives/test_primitives/null.py b/axolotl/tests/data/primitives/test_primitives/null.py new file mode 100644 index 0000000..a153a2b --- /dev/null +++ b/axolotl/tests/data/primitives/test_primitives/null.py @@ -0,0 +1,219 @@ +import os.path +import typing + +from d3m import container, utils +from d3m.metadata import base as metadata_base, hyperparams, params +from d3m.primitive_interfaces import base, transformer, unsupervised_learning + +from . import __author__, __version__ + +__all__ = ('NullTransformerPrimitive', 'NullUnsupervisedLearnerPrimitive', 'NullDataFrameUnsupervisedLearnerPrimitive') + +Inputs = container.List +Outputs = container.List + + +class Hyperparams(hyperparams.Hyperparams): + pass + + +class Params(params.Params): + pass + + +class NullTransformerPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + A primitive which passes through inputs as outputs. + + It does not really care if inputs is list. + """ + + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata({ + 'id': 'e0f83c35-fe3d-4fa6-92cf-f7421408eab5', + 'version': __version__, + 'name': "Produce the same as the input", + 'keywords': ['test primitive'], + 'source': { + 'name': __author__, + 'contact': 'mailto:author@example.com', + 'uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/null.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + 'location_uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/add_primitives.py'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + ], + 'python_path': 'd3m.primitives.operator.null.TransformerTest', + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.IDENTITY_FUNCTION, + ], + 'primitive_family': metadata_base.PrimitiveFamily.OPERATOR, + }) + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + return base.CallResult( + value=inputs + ) + + +class NullUnsupervisedLearnerPrimitive(unsupervised_learning.UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): + """ + A primitive which passes through inputs as outputs. + + It does not really care if inputs is list. + """ + + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata({ + 'id': '5310d7c4-89a0-4dab-8419-3285e650105a', + 'version': __version__, + 'name': "Produce the same as the input", + 'keywords': ['test primitive'], + 'source': { + 'name': __author__, + 'contact': 'mailto:author@example.com', + 'uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/null.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + 'location_uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/add_primitives.py'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + ], + 'python_path': 'd3m.primitives.operator.null.UnsupervisedLearnerTest', + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.IDENTITY_FUNCTION, + ], + 'primitive_family': metadata_base.PrimitiveFamily.OPERATOR, + }) + + def set_training_data(self) -> None: # type: ignore + """ + A noop. + + Parameters + ---------- + """ + + return + + def fit(self, *, timeout: float = None, iterations: int = None) -> base.CallResult[None]: + """ + A noop. + """ + + return base.CallResult(None) + + def get_params(self) -> Params: + """ + A noop. + """ + + return Params() + + def set_params(self, *, params: Params) -> None: + """ + A noop. + """ + + return + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + return base.CallResult( + value=inputs + ) + + +DataframeInputs = container.DataFrame +DataframeOutputs = container.DataFrame + + +class NullDataFrameUnsupervisedLearnerPrimitive(unsupervised_learning.UnsupervisedLearnerPrimitiveBase[DataframeInputs, DataframeOutputs, Params, Hyperparams]): + """ + A primitive which passes through inputs as outputs. + + It does not really care if inputs is a Dataframe. + """ + + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata({ + 'id': '0c063f7b-98d8-4d3c-91df-6a56623b9cc3', + 'version': __version__, + 'name': "Produce the same as the input", + 'keywords': ['test primitive'], + 'source': { + 'name': __author__, + 'contact': 'mailto:author@example.com', + 'uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/null.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + 'location_uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/add_primitives.py'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + ], + 'python_path': 'd3m.primitives.operator.null.DataFrameUnsupervisedLearnerTest', + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.IDENTITY_FUNCTION, + ], + 'primitive_family': metadata_base.PrimitiveFamily.OPERATOR, + }) + + def set_training_data(self) -> None: # type: ignore + """ + A noop. + + Parameters + ---------- + """ + + return + + def fit(self, *, timeout: float = None, iterations: int = None) -> base.CallResult[None]: + """ + A noop. + """ + + return base.CallResult(None) + + def get_params(self) -> Params: + """ + A noop. + """ + + return Params() + + def set_params(self, *, params: Params) -> None: + """ + A noop. + """ + + return + + def produce(self, *, inputs: DataframeInputs, timeout: float = None, iterations: int = None) -> base.CallResult[DataframeOutputs]: + return base.CallResult( + value=inputs + ) diff --git a/axolotl/tests/data/primitives/test_primitives/postgresql.py b/axolotl/tests/data/primitives/test_primitives/postgresql.py new file mode 100644 index 0000000..300de95 --- /dev/null +++ b/axolotl/tests/data/primitives/test_primitives/postgresql.py @@ -0,0 +1,222 @@ +import tempfile +import os +import os.path +import pwd +import re +import shutil +import signal +import subprocess +import time +import typing + +import prctl # type: ignore +import psycopg2 # type: ignore + +from d3m import container, utils +from d3m.metadata import base as metadata_base, hyperparams +from d3m.primitive_interfaces import base, transformer + +from . import __author__, __version__ + +__all__ = ('PostgreSQLPrimitive',) + + +Inputs = container.List +Outputs = container.List + + +class Hyperparams(hyperparams.Hyperparams): + pass + + +class PostgreSQLPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + A primitive which which uses PostgreSQL to compute a value. + """ + + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata({ + 'id': 'f23ea340-ce22-4b15-b2f3-e63885f192b3', + 'version': __version__, + 'name': "PostgreSQL operator", + 'keywords': ['test primitive'], + 'source': { + 'name': __author__, + 'contact': 'mailto:author@example.com', + 'uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/postgresql.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.UBUNTU, + 'package': 'build-essential', + 'version': '12.4ubuntu1', + }, { + 'type': metadata_base.PrimitiveInstallationType.UBUNTU, + 'package': 'libcap-dev', + 'version': '1:2.25-1.2', + }, { + 'type': metadata_base.PrimitiveInstallationType.UBUNTU, + 'package': 'postgresql-10', + 'version': '10.8-0ubuntu0.18.04.1', + }, { + 'type': metadata_base.PrimitiveInstallationType.UBUNTU, + 'package': 'libpq-dev', + 'version': '10.8-0ubuntu0.18.04.1', + }, { + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package': 'psycopg2', + 'version': '2.8.2', + }, { + # "python-prctl" requires "build-essential" and "libcap-dev". We list it here instead of + # "setup.py" to not have to list these system dependencies for every test primitive (because + # we cannot assure this primitive annotation gets installed first). + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package': 'python-prctl', + 'version': '1.7', + }, { + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + 'location_uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/postgresql.py'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + ], + 'python_path': 'd3m.primitives.operator.postgresql.Test', + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.COMPUTER_ALGEBRA, + ], + 'primitive_family': metadata_base.PrimitiveFamily.OPERATOR, + }) + + def __init__(self, *, hyperparams: Hyperparams, temporary_directory: str = None) -> None: + super().__init__(hyperparams=hyperparams, temporary_directory=temporary_directory) + + # Initialize it early so that "__del__" has access to these attributes. + self._connection: psycopg2.connection = None + self._process: subprocess.Popen = None + self._postgresql_base: str = None + + self._postgresql_base = tempfile.mkdtemp() + os.chmod(self._postgresql_base, 0o755) + + self._config_dir = os.path.join(self._postgresql_base, 'conf') + self._data_dir = os.path.join(self._postgresql_base, 'data') + self._run_dir = os.path.join(self._postgresql_base, 'run') + self._config_file = os.path.join(self._config_dir, 'postgresql.conf') + + shutil.copytree('/etc/postgresql/10/main', self._config_dir) + shutil.copy('/etc/ssl/certs/ssl-cert-snakeoil.pem', os.path.join(self._config_dir, 'server.pem')) + shutil.copy('/etc/ssl/private/ssl-cert-snakeoil.key', os.path.join(self._config_dir, 'server.key')) + os.chmod(os.path.join(self._config_dir, 'server.key'), 0o600) + + with open(self._config_file, 'r', encoding='utf8') as config_file: + config_file_lines = config_file.readlines() + with open(self._config_file, 'w', encoding='utf8') as config_file: + for line in config_file_lines: + line = re.sub('/etc/ssl/certs/ssl-cert-snakeoil.pem', os.path.join(self._config_dir, 'server.pem'), line) + line = re.sub('/etc/ssl/private/ssl-cert-snakeoil.key', os.path.join(self._config_dir, 'server.key'), line) + line = re.sub('/var/lib/postgresql/10/main', self._data_dir, line) + line = re.sub('/etc/postgresql/10/main/pg_hba.conf', os.path.join(self._config_dir, 'pg_hba.conf'), line) + line = re.sub('/etc/postgresql/10/main/pg_ident.conf', os.path.join(self._config_dir, 'pg_ident.conf'), line) + line = re.sub('/var/run/postgresql/10-main.pid', os.path.join(self._run_dir, '10-main.pid'), line) + line = re.sub('/var/run/postgresql/10-main.pg_stat_tmp', os.path.join(self._run_dir, '10-main.pg_stat_tmp'), line) + line = re.sub('/var/run/postgresql', self._run_dir, line) + config_file.write(line) + + with open(os.path.join(self._config_dir, 'conf.d', 'local.conf'), 'w', encoding='utf8') as config_file: + # We disable TCP access. + config_file.write("listen_addresses = ''\n") + + with open(os.path.join(self._config_dir, 'pg_hba.conf'), 'w', encoding='utf8') as config_file: + config_file.write("local all all trust\n") + + # 700 is required by PostgreSQL. + os.mkdir(self._data_dir, mode=0o700) + os.mkdir(self._run_dir) + os.mkdir(os.path.join(self._run_dir, '10-main.pg_stat_tmp')) + + if os.getuid() == 0: + self._username = 'postgres' + + # We have to run PostgreSQL as non-root user. + shutil.chown(self._data_dir, 'postgres', 'postgres') + shutil.chown(self._run_dir, 'postgres', 'postgres') + shutil.chown(os.path.join(self._run_dir, '10-main.pg_stat_tmp'), 'postgres', 'postgres') + shutil.chown(os.path.join(self._config_dir, 'pg_hba.conf'), 'postgres', 'postgres') + shutil.chown(os.path.join(self._config_dir, 'pg_ident.conf'), 'postgres', 'postgres') + shutil.chown(os.path.join(self._config_dir, 'server.key'), 'postgres', 'postgres') + else: + self._username = pwd.getpwuid(os.getuid())[0] + + self._init_and_start_database() + + @staticmethod + def _process_configure() -> None: + if os.getuid() == 0: + os.setgid(shutil._get_gid('postgres')) # type: ignore + os.setuid(shutil._get_uid('postgres')) # type: ignore + + # Setting "pdeathsig" will make the process be killed if our process dies for any reason. + prctl.set_pdeathsig(signal.SIGTERM) + + def _init_and_start_database(self) -> None: + args = [ + '/usr/lib/postgresql/10/bin/initdb', + '-D', + self._data_dir, + '--locale', + 'en_US.UTF-8', + '--encoding', + 'UTF-8', + ] + + try: + subprocess.run( + args, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + encoding='utf8', check=True, preexec_fn=self._process_configure, + ) + except subprocess.CalledProcessError as error: + self.logger.error("Error running initdb: %(stdout)s", {'stdout': error.stdout}) + raise error + + args = [ + '/usr/lib/postgresql/10/bin/postgres', + '-D', + self._data_dir, + '-c', + 'config_file={config_file}'.format(config_file=self._config_file), + ] + + self._process = subprocess.Popen(args, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, encoding='utf8', preexec_fn=self._process_configure) + + # Waits for 2 seconds. + connection_error = None + for i in range(20): + try: + self._connection = psycopg2.connect(dbname=self._username, user=self._username, host=self._run_dir) + break + except psycopg2.OperationalError as error: + connection_error = error + time.sleep(0.1) + else: + raise connection_error + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + with self._connection.cursor() as cursor: + cursor.execute("SELECT 42;") + return base.CallResult(container.List([cursor.fetchone()[0]], generate_metadata=True)) + + def __del__(self) -> None: + if self._connection is not None: + self._connection.close() + self._connection = None + + if self._process is not None and self._process.poll() is None: + self._process.terminate() + + if self._postgresql_base is not None: + shutil.rmtree(self._postgresql_base, ignore_errors=True) diff --git a/axolotl/tests/data/primitives/test_primitives/primitive_hyperparam.py b/axolotl/tests/data/primitives/test_primitives/primitive_hyperparam.py new file mode 100644 index 0000000..732996a --- /dev/null +++ b/axolotl/tests/data/primitives/test_primitives/primitive_hyperparam.py @@ -0,0 +1,76 @@ +import os.path +import typing + +import pandas as pd # type: ignore + +from d3m import container, utils +from d3m.metadata import hyperparams, base as metadata_base +from d3m.primitive_interfaces import base, transformer +from test_primitives.increment import IncrementPrimitive, Hyperparams as IncrementPrimitiveHyperparams + +from . import __author__, __version__ + +__all__ = ('PrimitiveHyperparamPrimitive',) + + +Inputs = container.DataFrame +Outputs = container.DataFrame + + +class Hyperparams(hyperparams.Hyperparams): + primitive = hyperparams.Hyperparameter[base.PrimitiveBase]( + default=IncrementPrimitive(hyperparams=IncrementPrimitiveHyperparams.defaults()), + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description='The primitive instance to be passed to PrimitiveHyperparamPrimitive' + ) + + +class PrimitiveHyperparamPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + A primitive that requires a data argument hyperparam. + """ + + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata({ + 'id': 'bd67f49a-bf10-4251-9774-019add57370b', + 'version': __version__, + 'name': "Primitive Hyperparam Test Primitive", + 'keywords': ['test primitive'], + 'source': { + 'name': __author__, + 'contact': 'mailto:author@example.com', + 'uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/primitive_hyperparam.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + 'location_uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/primitive_hyperparam.py'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + ], + 'python_path': 'd3m.primitives.operator.sum.PrimitiveHyperparamTest', + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.COMPUTER_ALGEBRA, + ], + 'primitive_family': metadata_base.PrimitiveFamily.OPERATOR, + }) + + def __init__(self, *, hyperparams: Hyperparams) -> None: + super().__init__(hyperparams=hyperparams) + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + primitive = self.hyperparams['primitive'] + result = primitive.produce(inputs=inputs) + data = result.value + if isinstance(data, pd.DataFrame): + value = data.iloc[0] + else: + value = data[0] + outputs = inputs + value + return base.CallResult(outputs) diff --git a/axolotl/tests/data/primitives/test_primitives/primitive_sum.py b/axolotl/tests/data/primitives/test_primitives/primitive_sum.py new file mode 100644 index 0000000..b17e81b --- /dev/null +++ b/axolotl/tests/data/primitives/test_primitives/primitive_sum.py @@ -0,0 +1,139 @@ +import os.path +import time +import typing + +import numpy # type: ignore + +from d3m import container, exceptions, utils +from d3m.metadata import base as metadata_base, hyperparams +from d3m.primitive_interfaces import base, transformer + +from . import __author__, __version__, null + +__all__ = ('PrimitiveSumPrimitive',) + +Inputs = container.List +Outputs = container.List + + +class Hyperparams(hyperparams.Hyperparams): + # These primitives should already be fitted (or be a transformer) and they should accept + # "List" container type as an input, and return a "List" container type as an output. + # TODO: How to define this in the hyper-parameter definition? + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/210 + primitive_1 = hyperparams.Primitive[base.PrimitiveBase]( + default=null.NullTransformerPrimitive, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + ) + primitive_2 = hyperparams.Primitive[base.PrimitiveBase]( + default=null.NullTransformerPrimitive, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + ) + + +class PrimitiveSumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + # It is important to provide a docstring because this docstring is used as a description of + # a primitive. Some callers might analyze it to determine the nature and purpose of a primitive. + + """ + A primitive which element-wise sums the produced results of two other primitives. Each of those two primitives + are given inputs (a list of numbers) to this primitive first as their inputs, are expected to return a list + of numbers back, and then those lists are element-wise summed together, to produce the final list. + + This primitive exists just as a demonstration. To sum results you would otherwise just simply + sum the results directly instead of getting an instance of the primitive and call + produce methods on it. But this does allow more complicated ways of interacting with a + primitive and this primitive demonstrates it. + """ + + # This should contain only metadata which cannot be automatically determined from the code. + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata({ + # Simply an UUID generated once and fixed forever. Generated using "uuid.uuid4()". + 'id': '6b061902-5e40-4a7a-9a21-b995dce1b2aa', + 'version': __version__, + 'name': "Sum results of other primitives", + # Keywords do not have a controlled vocabulary. Authors can put here whatever they find suitable. + 'keywords': ['test primitive'], + 'source': { + 'name': __author__, + 'contact': 'mailto:author@example.com', + 'uris': [ + # Unstructured URIs. Link to file and link to repo in this case. + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/primitive_sum.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + # A list of dependencies in order. These can be Python packages, system packages, or Docker images. + # Of course Python packages can also have their own dependencies, but sometimes it is necessary to + # install a Python package first to be even able to run setup.py of another package. Or you have + # a dependency which is not on PyPi. + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + # URIs at which one can obtain code for the primitive, if available. + 'location_uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/add_primitives.py'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + ], + # The same path the primitive is registered with entry points in setup.py. + 'python_path': 'd3m.primitives.operator.primitive_sum.Test', + # Choose these from a controlled vocabulary in the schema. If anything is missing which would + # best describe the primitive, make a merge request. + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.COMPUTER_ALGEBRA, + ], + 'primitive_family': metadata_base.PrimitiveFamily.OPERATOR, + # A metafeature about preconditions required for this primitive to operate well. + 'preconditions': [ + # Instead of strings you can also use available Python enumerations. + metadata_base.PrimitivePrecondition.NO_MISSING_VALUES, + metadata_base.PrimitivePrecondition.NO_CATEGORICAL_VALUES, + ] + }) + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + primitive_1 = self.hyperparams['primitive_1'] + primitive_2 = self.hyperparams['primitive_2'] + + results = [] + + if primitive_1 is not None: + start = time.perf_counter() + results.append(primitive_1.produce(inputs=inputs, timeout=timeout, iterations=iterations)) + delta = time.perf_counter() - start + + # Decrease the amount of time available to other calls. This delegates responsibility + # of raising a "TimeoutError" exception to produce methods themselves. It also assumes + # that if one passes a negative timeout value to a produce method, it raises a + # "TimeoutError" exception correctly. + if timeout is not None: + timeout -= delta + + if primitive_2 is not None: + results.append(primitive_2.produce(inputs=inputs, timeout=timeout, iterations=iterations)) + + if not results: + raise exceptions.InvalidArgumentValueError("No primitives provided as hyper-parameters.") + + # Even if the structure of outputs is the same as inputs, conceptually, outputs are different, + # they are new data. So we do not reuse metadata from inputs but generate new metadata. + outputs = container.List([sum(x) for x in zip(*[result.value for result in results])], generate_metadata=True) + + # We return the maximum number of iterations done by any produce method we called. + iterations_done = None + for result in results: + if result.iterations_done is not None: + if iterations_done is None: + iterations_done = result.iterations_done + else: + iterations_done = max(iterations_done, result.iterations_done) + + return base.CallResult( + value=outputs, + has_finished=all(result.has_finished for result in results), + iterations_done=iterations_done, + ) diff --git a/axolotl/tests/data/primitives/test_primitives/random.py b/axolotl/tests/data/primitives/test_primitives/random.py new file mode 100644 index 0000000..6767311 --- /dev/null +++ b/axolotl/tests/data/primitives/test_primitives/random.py @@ -0,0 +1,154 @@ +import os.path +import typing + +import numpy # type: ignore + +from d3m import container, utils +from d3m.metadata import base as metadata_base, hyperparams +from d3m.primitive_interfaces import base, generator + +from . import __author__, __version__ + +__all__ = ('RandomPrimitive',) + + +Outputs = container.DataFrame + + +class Hyperparams(hyperparams.Hyperparams): + # These hyper-parameters can be both control or tuning parameter depending on their + # role in a pipeline. So it depends how a pipeline is constructed: with them having + # a fixed value or something which can be tuned. So they have two semantic types. + mu = hyperparams.Hyperparameter[float](default=0.0, semantic_types=[ + 'https://metadata.datadrivendiscovery.org/types/ControlParameter', + 'https://metadata.datadrivendiscovery.org/types/TuningParameter' + ]) + sigma = hyperparams.Hyperparameter[float](default=1.0, semantic_types=[ + 'https://metadata.datadrivendiscovery.org/types/ControlParameter', + 'https://metadata.datadrivendiscovery.org/types/TuningParameter' + ]) + + +class RandomPrimitive(generator.GeneratorPrimitiveBase[Outputs, None, Hyperparams]): + # It is important to provide a docstring because this docstring is used as a description of + # a primitive. Some callers might analyze it to determine the nature and purpose of a primitive. + + """ + A primitive which draws random samples from a normal distribution. + """ + + # This should contain only metadata which cannot be automatically determined from the code. + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata({ + # Simply an UUID generated once and fixed forever. Generated using "uuid.uuid4()". + 'id': 'df3153a1-4411-47e2-bbc0-9d5e9925ad79', + 'version': __version__, + 'name': "Random Samples", + # Keywords do not have a controlled vocabulary. Authors can put here whatever they find suitable. + 'keywords': ['test primitive'], + 'source': { + 'name': __author__, + 'contact': 'mailto:author@example.com', + 'uris': [ + # Unstructured URIs. Link to file and link to repo in this case. + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/random.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + # A list of dependencies in order. These can be Python packages, system packages, or Docker images. + # Of course Python packages can also have their own dependencies, but sometimes it is necessary to + # install a Python package first to be even able to run setup.py of another package. Or you have + # a dependency which is not on PyPi. + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + # URIs at which one can obtain code for the primitive, if available. + 'location_uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/random.py'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + ], + # The same path the primitive is registered with entry points in setup.py. + 'python_path': 'd3m.primitives.data_generation.random.Test', + # Choose these from a controlled vocabulary in the schema. If anything is missing which would + # best describe the primitive, make a merge request. + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.MERSENNE_TWISTER, + metadata_base.PrimitiveAlgorithmType.NORMAL_DISTRIBUTION, + ], + 'primitive_family': metadata_base.PrimitiveFamily.DATA_GENERATION, + }) + + # It is not necessary to limit arguments this way, but we use it in tests to test that it is supported. + def __init__(self, *, hyperparams: Hyperparams, random_seed: int = 0) -> None: + super().__init__(hyperparams=hyperparams, random_seed=random_seed) + + def produce(self, *, inputs: container.List, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + # We get as an input a list of non-negative integers, indices into the set of random values. + # For each integer we redraw the number of samples up to that index at which time we return + # the last value, the value for that index. We add one to the index because index can start + # with 0 but we want to draw at least 1 number then. + # TODO: Optimize this if the inputs are a sequence of integers, we could reuse the state. + results = [numpy.random.RandomState(self.random_seed).normal(self.hyperparams['mu'], self.hyperparams['sigma'], i + 1)[-1] for i in inputs] + + # Outputs are different from inputs, so we do not reuse metadata from inputs but create new metadata. + # We convert the list to a container DataFrame which supports metadata attribute. + outputs = container.DataFrame({'results': results}, generate_metadata=True) + + # Wrap it into default "CallResult" object: we are not doing any iterations. + return base.CallResult(outputs) + + def set_training_data(self) -> None: # type: ignore + """ + A noop. + """ + + return + + def fit(self, *, timeout: float = None, iterations: int = None) -> base.CallResult[None]: + """ + A noop. + """ + + return base.CallResult(None) + + def get_params(self) -> None: + """ + A noop. + """ + + return None + + def set_params(self, *, params: None) -> None: + """ + A noop. + """ + + return + + def fit_multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: container.List, timeout: float = None, iterations: int = None) -> base.MultiCallResult: # type: ignore + """ + A method calling ``fit`` and after that multiple produce methods at once. + + Parameters + ---------- + produce_methods : Sequence[str] + A list of names of produce methods to call. + inputs : List + The inputs given to all produce methods. + timeout : float + A maximum time this primitive should take to both fit the primitive and produce outputs + for all produce methods listed in ``produce_methods`` argument, in seconds. + iterations : int + How many of internal iterations should the primitive do for both fitting and producing + outputs of all produce methods. + + Returns + ------- + MultiCallResult + A dict of values for each produce method wrapped inside ``MultiCallResult``. + """ + + return self._fit_multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs) # type: ignore diff --git a/axolotl/tests/data/primitives/test_primitives/random_classifier.py b/axolotl/tests/data/primitives/test_primitives/random_classifier.py new file mode 100644 index 0000000..4db52c4 --- /dev/null +++ b/axolotl/tests/data/primitives/test_primitives/random_classifier.py @@ -0,0 +1,130 @@ +import os +import random +import typing + +from d3m import container, exceptions, utils +from d3m.metadata import base as metadata_base, hyperparams, params +from d3m.primitive_interfaces.base import CallResult, ContinueFitMixin +from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase + +from . import __author__, __version__ + +__all__ = ('RandomClassifierPrimitive',) + +Inputs = container.DataFrame +Outputs = container.DataFrame + + +class Params(params.Params): + classes: typing.Optional[typing.Sequence[typing.Any]] + random_state: typing.Any + + +class Hyperparams(hyperparams.Hyperparams): + pass + + +class RandomClassifierPrimitive(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], + ContinueFitMixin[Inputs, Outputs, Params, Hyperparams]): + """ + A primitive randomly classify a class. For test purposes. + + It uses the first column of ``outputs`` as a target column. + """ + + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata({ + 'id': 'b8d0d982-fc53-4a3f-8a8c-a284fdd45bfd', + 'version': __version__, + 'name': "Random Classifier", + 'python_path': 'd3m.primitives.classification.random_classifier.Test', + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.BINARY_CLASSIFICATION, + metadata_base.PrimitiveAlgorithmType.MULTICLASS_CLASSIFICATION + ], + 'primitive_family': metadata_base.PrimitiveFamily.CLASSIFICATION, + 'source': { + 'name': __author__, + 'contact': 'mailto:author@example.com', + 'uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/random_classifier.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + 'location_uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/random_classifier.py'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + ], + }) + + def __init__(self, *, hyperparams: Hyperparams, random_seed: int = 0) -> None: + super().__init__(hyperparams=hyperparams, random_seed=random_seed) + + self._random: random.Random = random.Random() + self._random.seed(random_seed) + self._training_outputs: Outputs = None + self._fitted = False + self._classes: typing.List = [] + + def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: + self._training_outputs = outputs + self._fitted = False + + def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: + if self._fitted: + return CallResult(None) + + if self._training_outputs is None: + raise exceptions.InvalidStateError("Missing training data.") + + self._classes = sorted(self._training_outputs.iloc[:, 0].unique().tolist()) + + self._fitted = True + + return CallResult(None) + + def continue_fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: + if self._training_outputs is None: + raise exceptions.InvalidStateError("Missing training data.") + + _classes = self._training_outputs.iloc[:, 0].unique().tolist() + self._classes = sorted(set(self._classes + _classes)) + + self._fitted = True + + return CallResult(None) + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + if not self._fitted: + raise exceptions.PrimitiveNotFittedError("Not fitted.") + + k = len(inputs) + predictions = self._random.choices(self._classes, k=k) # type: ignore + + result = container.DataFrame({'predictions': predictions}, generate_metadata=True) + + return CallResult(result) + + def get_params(self) -> Params: + if self._fitted: + return Params( + classes=self._classes, + random_state=self._random.getstate(), + ) + else: + return Params( + classes=None, + random_state=self._random.getstate(), + ) + + def set_params(self, *, params: Params) -> None: + self._classes = params['classes'] + self._random.setstate(params['random_state']) + if self._classes is not None: + self._fitted = True diff --git a/axolotl/tests/data/primitives/test_primitives/sum.py b/axolotl/tests/data/primitives/test_primitives/sum.py new file mode 100644 index 0000000..c9d9096 --- /dev/null +++ b/axolotl/tests/data/primitives/test_primitives/sum.py @@ -0,0 +1,151 @@ +import os.path +import pickle +import typing +from http import client + +import numpy # type: ignore + +from d3m import container, utils +from d3m.metadata import base as metadata_base, hyperparams +from d3m.primitive_interfaces import base, transformer + +from . import __author__, __version__ + +__all__ = ('SumPrimitive',) + + +DOCKER_KEY = 'summing' + +# It is useful to define these names, so that you can reuse it both +# for class type arguments and method signatures. +# This is just an example of how to define a more complicated input type, +# which is in fact more restrictive than what the primitive can really handle. +# One could probably just use "typing.Union[typing.Container]" in this case, if accepting +# a wide range of input types. +Inputs = typing.Union[container.ndarray, container.DataFrame, container.List] +Outputs = container.List + + +class Hyperparams(hyperparams.Hyperparams): + """ + No hyper-parameters for this primitive. + """ + + pass + + +class SumPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + # It is important to provide a docstring because this docstring is used as a description of + # a primitive. Some callers might analyze it to determine the nature and purpose of a primitive. + + """ + A primitive which sums all the values on input into one number. + """ + + # This should contain only metadata which cannot be automatically determined from the code. + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata({ + # Simply an UUID generated once and fixed forever. Generated using "uuid.uuid4()". + 'id': '9c00d42d-382d-4177-a0e7-082da88a29c8', + 'version': __version__, + 'name': "Sum Values", + # Keywords do not have a controlled vocabulary. Authors can put here whatever they find suitable. + 'keywords': ['test primitive'], + 'source': { + 'name': __author__, + 'contact': 'mailto:author@example.com', + 'uris': [ + # Unstructured URIs. Link to file and link to repo in this case. + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/sum.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + # A list of dependencies in order. These can be Python packages, system packages, or Docker images. + # Of course Python packages can also have their own dependencies, but sometimes it is necessary to + # install a Python package first to be even able to run setup.py of another package. Or you have + # a dependency which is not on PyPi. + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + }, { + 'type': metadata_base.PrimitiveInstallationType.DOCKER, + # A key under which information about a running container will be provided to the primitive. + 'key': DOCKER_KEY, + 'image_name': 'registry.gitlab.com/datadrivendiscovery/tests-data/summing', + # Instead of a label, an exact hash of the image is required. This assures reproducibility. + # You can see digests using "docker images --digests". + 'image_digest': 'sha256:f75e21720e44cfa29d8a8e239b5746c715aa7cf99f9fde7916623fabc30d3364', + }], + # URIs at which one can obtain code for the primitive, if available. + 'location_uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/sum.py'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + ], + # The same path the primitive is registered with entry points in setup.py. + 'python_path': 'd3m.primitives.operator.sum.Test', + # Choose these from a controlled vocabulary in the schema. If anything is missing which would + # best describe the primitive, make a merge request. + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.COMPUTER_ALGEBRA, + ], + 'primitive_family': metadata_base.PrimitiveFamily.OPERATOR, + # A metafeature about preconditions required for this primitive to operate well. + 'preconditions': [ + # Instead of strings you can also use available Python enumerations. + metadata_base.PrimitivePrecondition.NO_MISSING_VALUES, + metadata_base.PrimitivePrecondition.NO_CATEGORICAL_VALUES, + ] + }) + + def __init__(self, *, hyperparams: Hyperparams, docker_containers: typing.Dict[str, base.DockerContainer] = None) -> None: + super().__init__(hyperparams=hyperparams, docker_containers=docker_containers) + + # We cannot check for expected ports here because during class construction, a mock value is passed which has empty ports dict. + if not self.docker_containers or DOCKER_KEY not in self.docker_containers: + raise ValueError("Docker key '{docker_key}' missing among provided Docker containers.".format(docker_key=DOCKER_KEY)) + + def _convert_value(self, value: typing.Any) -> typing.Union[numpy.ndarray, typing.List, typing.Any]: + # Server does not know about container types, just standard numpy arrays and lists. + if isinstance(value, container.ndarray): + return value.view(numpy.ndarray) + elif isinstance(value, container.List): + return [self._convert_value(v) for v in value] + else: + return value + + @base.singleton + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + # In the future, we should store here data in Arrow format into + # Plasma store and just pass an ObjectId of data over HTTP. + value = self._convert_value(inputs) + data = pickle.dumps(value) + + # TODO: Retry if connection fails. + # This connection can sometimes fail because the service inside a Docker container + # is not yet ready, despite container itself already running. Primitive should retry + # a few times before aborting. + + # Primitive knows the port the container is listening on. + connection = client.HTTPConnection(self.docker_containers[DOCKER_KEY].address, port=self.docker_containers[DOCKER_KEY].ports['8000/tcp']) + # This simple primitive does not keep any state in the Docker container. + # But if your primitive does have to associate requests with a primitive, consider + # using Python's "id(self)" call to get an identifier of a primitive's instance. + self.logger.debug("HTTP request: container=%(container)s", {'container': self.docker_containers[DOCKER_KEY]}, extra={'data': value}) + connection.request('POST', '/', data, { + 'Content-Type': 'multipart/form-data', + }) + response = connection.getresponse() + self.logger.debug("HTTP response: status=%(status)s", {'status': response.status}, extra={'response': response}) + + if response.status != 200: + raise ValueError("Invalid HTTP response status: {status}".format(status=response.status)) + + result = float(response.read()) + + # Outputs are different from inputs, so we do not reuse metadata from inputs but generate new metadata. + outputs = container.List((result,), generate_metadata=True) + + # Wrap it into default "CallResult" object: we are not doing any iterations. + return base.CallResult(outputs) diff --git a/axolotl/tests/data/problems/boston_problem_1/problemDoc.json b/axolotl/tests/data/problems/boston_problem_1/problemDoc.json new file mode 100644 index 0000000..30d7d5d --- /dev/null +++ b/axolotl/tests/data/problems/boston_problem_1/problemDoc.json @@ -0,0 +1,36 @@ +{ + "about": { + "problemID": "boston_problem_1", + "problemName": "Predict median value of a home", + "problemSchemaVersion": "4.0.0", + "problemVersion": "4.0.0", + "taskKeywords": [ + "regression", + "univariate" + ] + }, + "inputs": { + "data": [ + { + "datasetID": "boston_dataset_1", + "targets": [ + { + "targetIndex": 0, + "resID": "learningData", + "colIndex": 14, + "colName": "MEDV" + } + ] + } + ], + "performanceMetrics": [ + { + "metric": "rSquared" + } + ] + }, + "expectedOutputs": { + "predictionsFile": "predictions.csv", + "scoresFile": "scores.csv" + } +} \ No newline at end of file diff --git a/axolotl/tests/data/problems/boston_problem_2/problemDoc.json b/axolotl/tests/data/problems/boston_problem_2/problemDoc.json new file mode 100644 index 0000000..61debd9 --- /dev/null +++ b/axolotl/tests/data/problems/boston_problem_2/problemDoc.json @@ -0,0 +1,36 @@ +{ + "about": { + "problemID": "boston_problem_2", + "problemName": "Predict nitrous oxide level", + "problemSchemaVersion": "4.0.0", + "problemVersion": "4.0.0", + "taskKeywords": [ + "regression", + "univariate" + ] + }, + "inputs": { + "data": [ + { + "datasetID": "boston_dataset_1", + "targets": [ + { + "targetIndex": 0, + "resID": "learningData", + "colIndex": 5, + "colName": "NOX" + } + ] + } + ], + "performanceMetrics": [ + { + "metric": "rSquared" + } + ] + }, + "expectedOutputs": { + "predictionsFile": "predictions.csv", + "scoresFile": "scores.csv" + } +} \ No newline at end of file diff --git a/axolotl/tests/data/problems/database_problem_2/problemDoc.json b/axolotl/tests/data/problems/database_problem_2/problemDoc.json new file mode 100644 index 0000000..05ad512 --- /dev/null +++ b/axolotl/tests/data/problems/database_problem_2/problemDoc.json @@ -0,0 +1,42 @@ +{ + "about": { + "problemID": "database_problem_2", + "problemName": "Database problem of type COUNTS_PER_USER", + "problemSchemaVersion": "4.0.0", + "problemVersion": "4.0.0", + "taskKeywords": [ + "regression", + "multivariate" + ] + }, + "inputs": { + "data": [ + { + "datasetID": "database_dataset_2", + "targets": [ + { + "targetIndex": 0, + "resID": "learningData", + "colIndex": 2, + "colName": "posts_count" + }, + { + "targetIndex": 1, + "resID": "learningData", + "colIndex": 3, + "colName": "comments_count" + } + ] + } + ], + "performanceMetrics": [ + { + "metric": "rootMeanSquaredError" + } + ] + }, + "expectedOutputs": { + "predictionsFile": "predictions.csv", + "scoresFile": "scores.csv" + } +} \ No newline at end of file diff --git a/axolotl/tests/data/problems/database_problem_3/problemDoc.json b/axolotl/tests/data/problems/database_problem_3/problemDoc.json new file mode 100644 index 0000000..1bf5a15 --- /dev/null +++ b/axolotl/tests/data/problems/database_problem_3/problemDoc.json @@ -0,0 +1,36 @@ +{ + "about": { + "problemID": "database_problem_3", + "problemName": "Database problem of type COMMENTS_PER_POST", + "problemSchemaVersion": "4.0.0", + "problemVersion": "4.0.0", + "taskKeywords": [ + "regression", + "univariate" + ] + }, + "inputs": { + "data": [ + { + "datasetID": "database_dataset_3", + "targets": [ + { + "targetIndex": 0, + "resID": "learningData", + "colIndex": 2, + "colName": "comments_count" + } + ] + } + ], + "performanceMetrics": [ + { + "metric": "rootMeanSquaredError" + } + ] + }, + "expectedOutputs": { + "predictionsFile": "predictions.csv", + "scoresFile": "scores.csv" + } +} \ No newline at end of file diff --git a/axolotl/tests/data/problems/database_problem_4/problemDoc.json b/axolotl/tests/data/problems/database_problem_4/problemDoc.json new file mode 100644 index 0000000..91cb471 --- /dev/null +++ b/axolotl/tests/data/problems/database_problem_4/problemDoc.json @@ -0,0 +1,37 @@ +{ + "about": { + "problemID": "database_problem_4", + "problemName": "Database problem of type HAS_USER_MADE_COMMENT_ON_POST", + "problemSchemaVersion": "4.0.0", + "problemVersion": "4.0.0", + "taskKeywords": [ + "classification", + "binary" + ] + }, + "inputs": { + "data": [ + { + "datasetID": "database_dataset_4", + "targets": [ + { + "targetIndex": 0, + "resID": "learningData", + "colIndex": 3, + "colName": "made_comment" + } + ] + } + ], + "performanceMetrics": [ + { + "metric": "f1", + "posLabel": "yes" + } + ] + }, + "expectedOutputs": { + "predictionsFile": "predictions.csv", + "scoresFile": "scores.csv" + } +} \ No newline at end of file diff --git a/axolotl/tests/data/problems/image_problem_2/problemDoc.json b/axolotl/tests/data/problems/image_problem_2/problemDoc.json new file mode 100644 index 0000000..f2b94be --- /dev/null +++ b/axolotl/tests/data/problems/image_problem_2/problemDoc.json @@ -0,0 +1,36 @@ +{ + "about": { + "problemID": "image_problem_2", + "problemName": "Multiclass image classification", + "problemDescription": "Multiclass image classification problem. Each image belongs to one of 10 classes. Based on 124_120_mnist_problem.", + "problemSchemaVersion": "4.0.0", + "problemVersion": "4.0.0", + "taskKeywords": [ + "classification", + "multiClass" + ] + }, + "inputs": { + "data": [ + { + "datasetID": "image_dataset_2", + "targets": [ + { + "targetIndex": 0, + "resID": "learningData", + "colIndex": 2, + "colName": "label" + } + ] + } + ], + "performanceMetrics": [ + { + "metric": "accuracy" + } + ] + }, + "expectedOutputs": { + "predictionsFile": "predictions.csv" + } +} \ No newline at end of file diff --git a/axolotl/tests/data/problems/iris_problem_1/dataSplits.csv b/axolotl/tests/data/problems/iris_problem_1/dataSplits.csv new file mode 100644 index 0000000..69d033e --- /dev/null +++ b/axolotl/tests/data/problems/iris_problem_1/dataSplits.csv @@ -0,0 +1,151 @@ +d3mIndex,type,repeat,fold +0,TRAIN,0,0 +1,TRAIN,0,0 +2,TEST,0,0 +3,TRAIN,0,0 +4,TEST,0,0 +5,TRAIN,0,0 +6,TRAIN,0,0 +7,TRAIN,0,0 +8,TRAIN,0,0 +9,TRAIN,0,0 +10,TRAIN,0,0 +11,TEST,0,0 +12,TRAIN,0,0 +13,TRAIN,0,0 +14,TRAIN,0,0 +15,TEST,0,0 +16,TRAIN,0,0 +17,TRAIN,0,0 +18,TRAIN,0,0 +19,TRAIN,0,0 +20,TRAIN,0,0 +21,TEST,0,0 +22,TRAIN,0,0 +23,TRAIN,0,0 +24,TEST,0,0 +25,TRAIN,0,0 +26,TEST,0,0 +27,TRAIN,0,0 +28,TEST,0,0 +29,TRAIN,0,0 +30,TRAIN,0,0 +31,TRAIN,0,0 +32,TEST,0,0 +33,TEST,0,0 +34,TRAIN,0,0 +35,TRAIN,0,0 +36,TRAIN,0,0 +37,TRAIN,0,0 +38,TRAIN,0,0 +39,TEST,0,0 +40,TEST,0,0 +41,TRAIN,0,0 +42,TRAIN,0,0 +43,TRAIN,0,0 +44,TEST,0,0 +45,TRAIN,0,0 +46,TRAIN,0,0 +47,TEST,0,0 +48,TRAIN,0,0 +49,TRAIN,0,0 +50,TEST,0,0 +51,TRAIN,0,0 +52,TEST,0,0 +53,TEST,0,0 +54,TEST,0,0 +55,TRAIN,0,0 +56,TRAIN,0,0 +57,TEST,0,0 +58,TRAIN,0,0 +59,TEST,0,0 +60,TRAIN,0,0 +61,TEST,0,0 +62,TRAIN,0,0 +63,TEST,0,0 +64,TRAIN,0,0 +65,TRAIN,0,0 +66,TEST,0,0 +67,TEST,0,0 +68,TRAIN,0,0 +69,TRAIN,0,0 +70,TRAIN,0,0 +71,TRAIN,0,0 +72,TRAIN,0,0 +73,TRAIN,0,0 +74,TRAIN,0,0 +75,TRAIN,0,0 +76,TRAIN,0,0 +77,TRAIN,0,0 +78,TRAIN,0,0 +79,TRAIN,0,0 +80,TEST,0,0 +81,TEST,0,0 +82,TEST,0,0 +83,TRAIN,0,0 +84,TRAIN,0,0 +85,TRAIN,0,0 +86,TRAIN,0,0 +87,TRAIN,0,0 +88,TRAIN,0,0 +89,TRAIN,0,0 +90,TRAIN,0,0 +91,TEST,0,0 +92,TEST,0,0 +93,TRAIN,0,0 +94,TRAIN,0,0 +95,TEST,0,0 +96,TRAIN,0,0 +97,TRAIN,0,0 +98,TRAIN,0,0 +99,TEST,0,0 +100,TRAIN,0,0 +101,TEST,0,0 +102,TEST,0,0 +103,TRAIN,0,0 +104,TRAIN,0,0 +105,TRAIN,0,0 +106,TRAIN,0,0 +107,TRAIN,0,0 +108,TEST,0,0 +109,TEST,0,0 +110,TRAIN,0,0 +111,TRAIN,0,0 +112,TRAIN,0,0 +113,TEST,0,0 +114,TRAIN,0,0 +115,TEST,0,0 +116,TRAIN,0,0 +117,TRAIN,0,0 +118,TRAIN,0,0 +119,TRAIN,0,0 +120,TRAIN,0,0 +121,TEST,0,0 +122,TEST,0,0 +123,TRAIN,0,0 +124,TRAIN,0,0 +125,TEST,0,0 +126,TRAIN,0,0 +127,TEST,0,0 +128,TRAIN,0,0 +129,TEST,0,0 +130,TRAIN,0,0 +131,TRAIN,0,0 +132,TRAIN,0,0 +133,TRAIN,0,0 +134,TEST,0,0 +135,TRAIN,0,0 +136,TRAIN,0,0 +137,TEST,0,0 +138,TRAIN,0,0 +139,TRAIN,0,0 +140,TRAIN,0,0 +141,TRAIN,0,0 +142,TRAIN,0,0 +143,TRAIN,0,0 +144,TEST,0,0 +145,TRAIN,0,0 +146,TRAIN,0,0 +147,TRAIN,0,0 +148,TRAIN,0,0 +149,TRAIN,0,0 diff --git a/axolotl/tests/data/problems/iris_problem_1/problemDoc.json b/axolotl/tests/data/problems/iris_problem_1/problemDoc.json new file mode 100644 index 0000000..7cb357b --- /dev/null +++ b/axolotl/tests/data/problems/iris_problem_1/problemDoc.json @@ -0,0 +1,45 @@ +{ + "about": { + "problemID": "iris_problem_1", + "problemName": "Distinguish Iris flowers", + "problemDescription": "Distinguish Iris flowers of three related species.", + "problemSchemaVersion": "4.0.0", + "problemVersion": "4.0.0", + "taskKeywords": [ + "classification", + "multiClass" + ] + }, + "inputs": { + "data": [ + { + "datasetID": "iris_dataset_1", + "targets": [ + { + "targetIndex": 0, + "resID": "learningData", + "colIndex": 5, + "colName": "species" + } + ] + } + ], + "dataSplits": { + "method": "holdOut", + "testSize": 0.3, + "numFolds": 0, + "stratified": false, + "numRepeats": 0, + "splitsFile": "dataSplits.csv" + }, + "performanceMetrics": [ + { + "metric": "accuracy" + } + ] + }, + "expectedOutputs": { + "predictionsFile": "predictions.csv", + "scoresFile": "scores.csv" + } +} \ No newline at end of file diff --git a/axolotl/tests/data/problems/iris_problem_2/problemDoc.json b/axolotl/tests/data/problems/iris_problem_2/problemDoc.json new file mode 100644 index 0000000..a36588e --- /dev/null +++ b/axolotl/tests/data/problems/iris_problem_2/problemDoc.json @@ -0,0 +1,36 @@ +{ + "about": { + "problemID": "iris_problem_2", + "problemName": "Distinguish Iris flowers", + "problemDescription": "Distinguish Iris flowers of three related species, without datasetID in targets.", + "problemSchemaVersion": "4.0.0", + "problemVersion": "4.0.0", + "taskKeywords": [ + "classification", + "multiClass" + ] + }, + "inputs": { + "data": [ + { + "targets": [ + { + "targetIndex": 0, + "resID": "learningData", + "colIndex": 5, + "colName": "species" + } + ] + } + ], + "performanceMetrics": [ + { + "metric": "accuracy" + } + ] + }, + "expectedOutputs": { + "predictionsFile": "predictions.csv", + "scoresFile": "scores.csv" + } +} diff --git a/axolotl/tests/data/problems/multi_dataset_problem/problemDoc.json b/axolotl/tests/data/problems/multi_dataset_problem/problemDoc.json new file mode 100644 index 0000000..c723d57 --- /dev/null +++ b/axolotl/tests/data/problems/multi_dataset_problem/problemDoc.json @@ -0,0 +1,48 @@ +{ + "about": { + "problemID": "multi_input_problem", + "problemName": "Problem associate with multiple dataset", + "problemDescription": "Distinguish Iris flowers of three related species.", + "problemSchemaVersion": "4.0.0", + "problemVersion": "4.0.0", + "taskKeywords": [ + "classification", + "multiClass" + ] + }, + "inputs": { + "data": [ + { + "datasetID": "iris_dataset_1", + "targets": [ + { + "targetIndex": 0, + "resID": "learningData", + "colIndex": 5, + "colName": "species" + } + ] + }, + { + "datasetID": "boston_dataset_1", + "targets": [ + { + "targetIndex": 0, + "resID": "learningData", + "colIndex": 14, + "colName": "MEDV" + } + ] + } + ], + "performanceMetrics": [ + { + "metric": "accuracy" + } + ] + }, + "expectedOutputs": { + "predictionsFile": "predictions.csv", + "scoresFile": "scores.csv" + } +} \ No newline at end of file diff --git a/axolotl/tests/resources/logistic_regeression.json b/axolotl/tests/resources/logistic_regeression.json new file mode 100644 index 0000000..5f30a0c --- /dev/null +++ b/axolotl/tests/resources/logistic_regeression.json @@ -0,0 +1,146 @@ +{ + "id": "b9cc24a0-30ce-4fe2-adde-77af46987f60", + "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", + "created": "2020-06-26T02:36:34.125148Z", + "inputs": [ + { + "name": "inputs" + } + ], + "outputs": [ + { + "data": "steps.4.produce", + "name": "output predictions" + } + ], + "steps": [ + { + "type": "PRIMITIVE", + "primitive": { + "id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", + "version": "0.3.0", + "python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common", + "name": "Extract a DataFrame from a Dataset" + }, + "arguments": { + "inputs": { + "type": "CONTAINER", + "data": "inputs.0" + } + }, + "outputs": [ + { + "id": "produce" + } + ] + }, + { + "type": "PRIMITIVE", + "primitive": { + "id": "d510cb7a-1782-4f51-b44c-58f0236e47c7", + "version": "0.6.0", + "python_path": "d3m.primitives.data_transformation.column_parser.Common", + "name": "Parses strings into their types" + }, + "arguments": { + "inputs": { + "type": "CONTAINER", + "data": "steps.0.produce" + } + }, + "outputs": [ + { + "id": "produce" + } + ] + }, + { + "type": "PRIMITIVE", + "primitive": { + "id": "d016df89-de62-3c53-87ed-c06bb6a23cde", + "version": "2020.6.24", + "python_path": "d3m.primitives.data_cleaning.imputer.SKlearn", + "name": "sklearn.impute.SimpleImputer" + }, + "arguments": { + "inputs": { + "type": "CONTAINER", + "data": "steps.1.produce" + } + }, + "outputs": [ + { + "id": "produce" + } + ], + "hyperparams": { + "use_semantic_types": { + "type": "VALUE", + "data": true + }, + "return_result": { + "type": "VALUE", + "data": "replace" + } + } + }, + { + "type": "PRIMITIVE", + "primitive": { + "id": "b9c81b40-8ed1-3b23-80cf-0d6fe6863962", + "version": "2020.6.24", + "python_path": "d3m.primitives.classification.logistic_regression.SKlearn", + "name": "sklearn.linear_model.logistic.LogisticRegression" + }, + "arguments": { + "inputs": { + "type": "CONTAINER", + "data": "steps.2.produce" + }, + "outputs": { + "type": "CONTAINER", + "data": "steps.2.produce" + } + }, + "outputs": [ + { + "id": "produce" + } + ], + "hyperparams": { + "use_semantic_types": { + "type": "VALUE", + "data": true + }, + "add_index_columns": { + "type": "VALUE", + "data": true + } + } + }, + { + "type": "PRIMITIVE", + "primitive": { + "id": "8d38b340-f83f-4877-baaa-162f8e551736", + "version": "0.3.0", + "python_path": "d3m.primitives.data_transformation.construct_predictions.Common", + "name": "Construct pipeline predictions output" + }, + "arguments": { + "inputs": { + "type": "CONTAINER", + "data": "steps.3.produce" + }, + "reference": { + "type": "CONTAINER", + "data": "steps.0.produce" + } + }, + "outputs": [ + { + "id": "produce" + } + ] + } + ] +} \ No newline at end of file diff --git a/axolotl/tests/resources/svc_pipeline.json b/axolotl/tests/resources/svc_pipeline.json new file mode 100644 index 0000000..13d532c --- /dev/null +++ b/axolotl/tests/resources/svc_pipeline.json @@ -0,0 +1,146 @@ +{ + "id": "c41cbe88-7caf-45a3-a7e1-77dda65709b5", + "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", + "created": "2020-06-26T02:36:35.138147Z", + "inputs": [ + { + "name": "inputs" + } + ], + "outputs": [ + { + "data": "steps.4.produce", + "name": "output predictions" + } + ], + "steps": [ + { + "type": "PRIMITIVE", + "primitive": { + "id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", + "version": "0.3.0", + "python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common", + "name": "Extract a DataFrame from a Dataset" + }, + "arguments": { + "inputs": { + "type": "CONTAINER", + "data": "inputs.0" + } + }, + "outputs": [ + { + "id": "produce" + } + ] + }, + { + "type": "PRIMITIVE", + "primitive": { + "id": "d510cb7a-1782-4f51-b44c-58f0236e47c7", + "version": "0.6.0", + "python_path": "d3m.primitives.data_transformation.column_parser.Common", + "name": "Parses strings into their types" + }, + "arguments": { + "inputs": { + "type": "CONTAINER", + "data": "steps.0.produce" + } + }, + "outputs": [ + { + "id": "produce" + } + ] + }, + { + "type": "PRIMITIVE", + "primitive": { + "id": "d016df89-de62-3c53-87ed-c06bb6a23cde", + "version": "2020.6.24", + "python_path": "d3m.primitives.data_cleaning.imputer.SKlearn", + "name": "sklearn.impute.SimpleImputer" + }, + "arguments": { + "inputs": { + "type": "CONTAINER", + "data": "steps.1.produce" + } + }, + "outputs": [ + { + "id": "produce" + } + ], + "hyperparams": { + "use_semantic_types": { + "type": "VALUE", + "data": true + }, + "return_result": { + "type": "VALUE", + "data": "replace" + } + } + }, + { + "type": "PRIMITIVE", + "primitive": { + "id": "0ae7d42d-f765-3348-a28c-57d94880aa6a", + "version": "2020.6.24", + "python_path": "d3m.primitives.classification.svc.SKlearn", + "name": "sklearn.svm.classes.SVC" + }, + "arguments": { + "inputs": { + "type": "CONTAINER", + "data": "steps.2.produce" + }, + "outputs": { + "type": "CONTAINER", + "data": "steps.2.produce" + } + }, + "outputs": [ + { + "id": "produce" + } + ], + "hyperparams": { + "use_semantic_types": { + "type": "VALUE", + "data": true + }, + "add_index_columns": { + "type": "VALUE", + "data": true + } + } + }, + { + "type": "PRIMITIVE", + "primitive": { + "id": "8d38b340-f83f-4877-baaa-162f8e551736", + "version": "0.3.0", + "python_path": "d3m.primitives.data_transformation.construct_predictions.Common", + "name": "Construct pipeline predictions output" + }, + "arguments": { + "inputs": { + "type": "CONTAINER", + "data": "steps.3.produce" + }, + "reference": { + "type": "CONTAINER", + "data": "steps.0.produce" + } + }, + "outputs": [ + { + "id": "produce" + } + ] + } + ] +} \ No newline at end of file diff --git a/axolotl/tests/test_algorithms_dummy.py b/axolotl/tests/test_algorithms_dummy.py new file mode 100644 index 0000000..429fac7 --- /dev/null +++ b/axolotl/tests/test_algorithms_dummy.py @@ -0,0 +1,55 @@ +from pathlib import Path +import unittest +import tempfile +import shutil + +from d3m.metadata import problem as problem_module +from d3m import container + +from axolotl.backend.simple import SimpleRunner +from axolotl.algorithms.dummy import DummySearch + + +class SimpleSearch(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def test_search_fit_produce(self): + problem_description, dataset = get_data() + + backend = SimpleRunner(random_seed=42, volumes_dir=None, scratch_dir=self.test_dir) + dummy_search = DummySearch(problem_description=problem_description, backend=backend) + + # check if we were able to find and fit + fitted_pipeline, pipeline_result = dummy_search.search_fit(input_data=[dataset], time_limit=100) + self.assertEqual(pipeline_result.error, None) + + # check first history entry + self.assertEqual(dummy_search.history[0].scores.values.tolist()[0], [ + 'ACCURACY', 0.9133333333333333, 0.9133333333333333, 42, 0]) + + # test if we can produce the same training input + pipeline_result = dummy_search.produce(fitted_pipeline, [dataset]) + self.assertEqual(pipeline_result.error, None) + + +def get_data(dataset_name='iris_dataset_1', problem_name='iris_problem_1'): + if problem_name: + problem_doc_path = Path( + Path(__file__).parent.absolute(), 'data', 'problems', problem_name, 'problemDoc.json' + ).as_uri() + problem_description = problem_module.get_problem(problem_doc_path) + else: + problem_description = None + + dataset_doc_path = Path(Path(__file__).parent.absolute(), 'data', 'datasets', + dataset_name, 'datasetDoc.json').as_uri() + iris_dataset = container.dataset.get_dataset(dataset_doc_path) + return problem_description, iris_dataset + + +if __name__ == '__main__': + unittest.main() diff --git a/axolotl/tests/test_autokeras.py b/axolotl/tests/test_autokeras.py new file mode 100644 index 0000000..174cdef --- /dev/null +++ b/axolotl/tests/test_autokeras.py @@ -0,0 +1,82 @@ +import pathlib +import shutil +import sys +import unittest + +import os +import tempfile + +from axolotl.algorithms.autokeras_search import AutoKerasSearch +from axolotl.backend.simple import SimpleRunner + +PROJECT_ROOT = os.path.join(os.path.dirname(__file__), '..') +sys.path.insert(0, PROJECT_ROOT) + +from d3m.metadata import problem as problem_module +from d3m import container as container_module + + +class TestAutoKeras(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + self.backend = SimpleRunner(random_seed=42, volumes_dir=None, scratch_dir=self.test_dir) + + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def test_fit(self): + test_data = os.path.join(PROJECT_ROOT, 'tests', 'data') + dataset_name = 'image_dataset_2' + + dataset_path = os.path.join( + test_data, 'datasets', dataset_name, 'datasetDoc.json') + dataset = self.__get_dataset(dataset_path) + + problem_path = os.path.join( + test_data, 'problems', dataset_name.replace('dataset', 'problem'), 'problemDoc.json') + problem = self.__get_problem(problem_path) + + tuner_base = AutoKerasSearch(problem, backend=self.backend, max_trials=1, directory=self.test_dir) + pipeline_result = tuner_base.search_fit(input_data=[dataset], time_limit=1000) + # TODO https://gitlab.com/datadrivendiscovery/jpl-primitives/-/issues/41 + self.assertNotEqual(pipeline_result.error, None) + + def _fit_cifar10(self): + test_data = os.path.join('/data/d3m/datasets/seed_datasets_current') + dataset_name = '124_174_cifar10_MIN_METADATA' + + dataset_path = os.path.join( + test_data, dataset_name, '{}_dataset'.format(dataset_name), 'datasetDoc.json') + dataset = self.__get_dataset(dataset_path) + + problem_path = os.path.join( + test_data, dataset_name, '{}_problem'.format(dataset_name), 'problemDoc.json') + problem = self.__get_problem(problem_path) + + tuner_base = AutoKerasSearch(problem, backend=self.backend, max_trials=1, directory=self.test_dir) + pipeline_result = tuner_base.search_fit(input_data=[dataset], time_limit=1000) + # TODO https://gitlab.com/datadrivendiscovery/jpl-primitives/-/issues/41 + self.assertNotEqual(pipeline_result.error, None) + + def __get_uri(self, path): + return pathlib.Path(os.path.abspath(path)).as_uri() + + def __get_problem(self, problem_path): + problem_uri = self.__get_uri(problem_path) + problem = problem_module.Problem.load(problem_uri) + return problem + + def __get_dataset(self, dataset_path): + dataset_uri = self.__get_uri(dataset_path) + dataset = container_module.dataset.get_dataset(dataset_uri) + return dataset + + +if __name__ == '__main__': + suite = unittest.TestSuite() + for test_case in ( + 'test_fit', + ): + suite.addTest(TestAutoKeras(test_case)) + unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/axolotl/tests/test_backend_ray.py b/axolotl/tests/test_backend_ray.py new file mode 100644 index 0000000..1eeed1a --- /dev/null +++ b/axolotl/tests/test_backend_ray.py @@ -0,0 +1,105 @@ +import ray +import json +from pathlib import Path +import unittest +import tempfile +import shutil + +from d3m.metadata import problem as problem_module +from d3m import container + +from axolotl.backend.ray import RayRunner +from axolotl.utils import schemas as schemas_utils, pipeline as pipeline_utils + + +class SimpleRunnerTestCase(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def test_fit_produce_pipelines(self): + pipeline = get_classification_pipeline() + problem_description, dataset = get_data() + ray_runner = RayRunner(random_seed=42, volumes_dir=None, scratch_dir=self.test_dir, n_workers=1) + result = ray_runner.fit_pipeline(problem_description=problem_description, + pipeline=pipeline, input_data=[dataset]) + + self.assertEqual(result.status, 'COMPLETED') + + result = ray_runner.produce_pipeline(fitted_pipeline_id=result.fitted_pipeline_id, input_data=[dataset]) + self.assertEqual(result.status, 'COMPLETED') + + def test_evaluate_pipeline(self): + pipeline = get_classification_pipeline() + ray_runner = RayRunner(random_seed=42, volumes_dir=None, scratch_dir=self.test_dir, n_workers=1) + problem_description, dataset = get_data() + data_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA") + scoring_pipeline = schemas_utils.get_scoring_pipeline() + + no_split = schemas_utils.DATA_PREPARATION_PARAMS['no_split'] + + result = ray_runner.evaluate_pipeline( + problem_description=problem_description, pipeline=pipeline, + input_data=[dataset], metrics=schemas_utils.MULTICLASS_CLASSIFICATION_METRICS, + data_preparation_pipeline=data_pipeline, scoring_pipeline=scoring_pipeline, + data_preparation_params=no_split + ) + + self.assertEqual(result.error, None) + self.assertEqual(result.scores.values.tolist(), [ + ['ACCURACY', 0.9133333333333333, 0.9133333333333333, 42, 0], + ['F1_MICRO', 0.9133333333333333, 0.9133333333333333, 42, 0], + ['F1_MACRO', 0.9123688388315397, 0.9123688388315397, 42, 0]] + ) + + def test_evaluate_pipelines(self): + pipeline = get_classification_pipeline() + ray_runner = RayRunner(random_seed=42, volumes_dir=None, scratch_dir=self.test_dir, n_workers=1) + problem_description, dataset = get_data() + data_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA") + scoring_pipeline = schemas_utils.get_scoring_pipeline() + + no_split = schemas_utils.DATA_PREPARATION_PARAMS['no_split'] + + results = ray_runner.evaluate_pipelines( + problem_description=problem_description, pipelines=[pipeline] * 3, + input_data=[dataset], metrics=schemas_utils.MULTICLASS_CLASSIFICATION_METRICS, + data_preparation_pipeline=data_pipeline, scoring_pipeline=scoring_pipeline, + data_preparation_params=no_split + ) + + for result in results: + self.assertEqual(result.error, None) + self.assertEqual(result.status, 'COMPLETED') + + +def get_classification_pipeline(): + with open(schemas_utils.PIPELINES_DB_DIR) as file: + default_pipelines = json.load(file) + + return pipeline_utils.load_pipeline(default_pipelines['CLASSIFICATION'][0]) + + +def get_data(dataset_name='iris_dataset_1', problem_name='iris_problem_1'): + if problem_name: + problem_doc_path = Path( + Path(__file__).parent.absolute(), 'data', 'problems', problem_name, 'problemDoc.json' + ).as_uri() + problem_description = problem_module.get_problem(problem_doc_path) + else: + problem_description = None + + dataset_doc_path = Path(Path(__file__).parent.absolute(), 'data', 'datasets', + dataset_name, 'datasetDoc.json').as_uri() + iris_dataset = container.dataset.get_dataset(dataset_doc_path) + return problem_description, iris_dataset + + +if __name__ == '__main__': + ray.init() + unittest.main() + ray.shutdown() + + diff --git a/axolotl/tests/test_backend_simple.py b/axolotl/tests/test_backend_simple.py new file mode 100644 index 0000000..93abe92 --- /dev/null +++ b/axolotl/tests/test_backend_simple.py @@ -0,0 +1,82 @@ +import json +from pathlib import Path +import unittest +import tempfile +import shutil + +from d3m.metadata import problem as problem_module +from d3m import container + +from axolotl.backend.simple import SimpleRunner +from axolotl.utils import schemas as schemas_utils, pipeline as pipeline_utils + + +class SimpleRunnerTestCase(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def test_fit_produce_pipelines(self): + pipeline = get_classification_pipeline() + problem_description, dataset = get_data() + simple_runner = SimpleRunner(random_seed=42, volumes_dir=None, scratch_dir=self.test_dir) + result = simple_runner.fit_pipeline(problem_description=problem_description, pipeline=pipeline, + input_data=[dataset]) + self.assertEqual(result.status, 'COMPLETED') + + result = simple_runner.produce_pipeline(fitted_pipeline_id=result.fitted_pipeline_id, input_data=[dataset]) + self.assertEqual(result.status, 'COMPLETED') + + def test_evaluate_pipelines(self): + pipeline = get_classification_pipeline() + simple_runner = SimpleRunner(random_seed=42, volumes_dir=None, scratch_dir=self.test_dir) + problem_description, dataset = get_data() + data_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA") + scoring_pipeline = schemas_utils.get_scoring_pipeline() + + no_split = schemas_utils.DATA_PREPARATION_PARAMS['no_split'] + + result = simple_runner.evaluate_pipeline( + problem_description=problem_description, pipeline=pipeline, + input_data=[dataset], metrics=schemas_utils.MULTICLASS_CLASSIFICATION_METRICS, + data_preparation_pipeline=data_pipeline, scoring_pipeline=scoring_pipeline, + data_preparation_params=no_split + ) + + # result = list(results.values())[0] + self.assertEqual(result.error, None) + self.assertEqual(result.scores.values.tolist(), [ + ['ACCURACY', 0.9133333333333333, 0.9133333333333333, 42, 0], + ['F1_MICRO', 0.9133333333333333, 0.9133333333333333, 42, 0], + ['F1_MACRO', 0.9123688388315397, 0.9123688388315397, 42, 0]] + ) + + +def get_classification_pipeline(): + with open(schemas_utils.PIPELINES_DB_DIR) as file: + default_pipelines = json.load(file) + + return pipeline_utils.load_pipeline(default_pipelines['CLASSIFICATION'][0]) + + +def get_data(dataset_name='iris_dataset_1', problem_name='iris_problem_1'): + if problem_name: + problem_doc_path = Path( + Path(__file__).parent.absolute(), 'data', 'problems', problem_name, 'problemDoc.json' + ).as_uri() + problem_description = problem_module.get_problem(problem_doc_path) + else: + problem_description = None + + dataset_doc_path = Path(Path(__file__).parent.absolute(), 'data', 'datasets', + dataset_name, 'datasetDoc.json').as_uri() + iris_dataset = container.dataset.get_dataset(dataset_doc_path) + return problem_description, iris_dataset + + +if __name__ == '__main__': + unittest.main() + + diff --git a/axolotl/tests/test_bayesian.py b/axolotl/tests/test_bayesian.py new file mode 100644 index 0000000..53bc92a --- /dev/null +++ b/axolotl/tests/test_bayesian.py @@ -0,0 +1,93 @@ +import pathlib + +import ray +import shutil +import sys +import unittest + +import os +import tempfile +from axolotl.backend.ray import RayRunner + +from axolotl.algorithms.bayesian_search import BayesianSearch +from axolotl.backend.simple import SimpleRunner + +PROJECT_ROOT = os.path.join(os.path.dirname(__file__), '..') +sys.path.insert(0, PROJECT_ROOT) + +from d3m.metadata import problem as problem_module +from d3m import container as container_module +from axolotl.utils import pipeline as pipeline_utils + + +class TestBayesianSearch(unittest.TestCase): + def setUp(self): + self.test_data = os.path.join(PROJECT_ROOT, 'tests', 'data') + dataset_name = 'iris_dataset_1' + problem = self.__get_problem(dataset_name) + self.problem = problem + self.dataset = self.__get_dataset(dataset_name) + self.test_dir = tempfile.mkdtemp() + backend = SimpleRunner(random_seed=42, volumes_dir=None, scratch_dir=self.test_dir) + self.tuner_base = BayesianSearch(problem, backend=backend, max_trials=10, directory=self.test_dir, + num_initial_points=5) + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def test_fit(self): + _, pipeline_result = self.tuner_base.search_fit(input_data=[self.dataset], time_limit=60) + self.assertEqual(pipeline_result.error, None) + + def test_fit_svc(self): + pipeline_info = os.path.join(os.path.dirname(__file__), 'resources', 'svc_pipeline.json') + pipeline = pipeline_utils.load_pipeline(pipeline_info) + _, pipeline_result = self.tuner_base.search_fit(input_data=[self.dataset], time_limit=60, + pipeline_candidates=[pipeline]) + self.assertEqual(pipeline_result.error, None) + + def test_fit_lr(self): + pipeline_info = os.path.join(os.path.dirname(__file__), 'resources', 'logistic_regeression.json') + pipeline = pipeline_utils.load_pipeline(pipeline_info) + _, pipeline_result = self.tuner_base.search_fit(input_data=[self.dataset], time_limit=60, + pipeline_candidates=[pipeline]) + self.assertEqual(pipeline_result.error, None) + + def test_fit_ray(self): + if not ray.is_initialized(): + ray.init() + backend = RayRunner(random_seed=42, volumes_dir=None, scratch_dir=self.test_dir) + tuner_base = BayesianSearch(self.problem, backend=backend, max_trials=30, directory=self.test_dir, + num_initial_points=5) + _, pipeline_result = tuner_base.search_fit(input_data=[self.dataset], time_limit=100) + self.assertEqual(pipeline_result.error, None) + ray.shutdown() + + def __get_uri(self, path): + return pathlib.Path(os.path.abspath(path)).as_uri() + + def __get_problem(self, dataset_name): + problem_path = os.path.join( + self.test_data, 'problems', dataset_name.replace('dataset', 'problem'), 'problemDoc.json') + problem_uri = self.__get_uri(problem_path) + problem = problem_module.Problem.load(problem_uri) + return problem + + def __get_dataset(self, dataset_name): + dataset_path = os.path.join( + self.test_data, 'datasets', dataset_name, 'datasetDoc.json') + dataset_uri = self.__get_uri(dataset_path) + dataset = container_module.dataset.get_dataset(dataset_uri) + return dataset + + +if __name__ == '__main__': + suite = unittest.TestSuite() + for test_case in ( + 'test_fit', + 'test_fit_ray', + 'test_fit_lr', + 'test_fit_svc', + ): + suite.addTest(TestBayesianSearch(test_case)) + unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/axolotl/tests/test_predefine_pipelines.py b/axolotl/tests/test_predefine_pipelines.py new file mode 100644 index 0000000..5e9991c --- /dev/null +++ b/axolotl/tests/test_predefine_pipelines.py @@ -0,0 +1,85 @@ +import os +import pathlib +import unittest +import sys +PROJECT_ROOT = os.path.join(os.path.dirname(__file__), '..') +sys.path.insert(0, PROJECT_ROOT) + +from d3m.runtime import Runtime +from d3m.metadata import base as metadata_base, problem as problem_module +from d3m import container as container_module + +import axolotl.predefined_pipelines as predefined_pipelines + + +class TestPredefined(unittest.TestCase): + def setUp(self): + self.test_data = os.path.join(PROJECT_ROOT, 'tests', 'data') + + def tearDown(self): + pass + + def test_fetch_from_file(self): + dataset_name = 'iris_dataset_1' + problem = self.__get_problem(dataset_name) + dataset = self.__get_dataset(dataset_name) + predefined_path = os.path.join(PROJECT_ROOT, 'axolotl/utils/resources/default_pipelines.json') + pipelines = predefined_pipelines.fetch_from_file(problem, predefined_path) + self.assertNotEqual(len(pipelines), 0) + result = self.__run_pipeline(pipelines[0], dataset) + result.check_success() + self.assertEqual(result.error, None) + + def test__fetch_from_preprocessors(self): + dataset_name = 'iris_dataset_1' + problem = self.__get_problem(dataset_name) + dataset = self.__get_dataset(dataset_name) + pipelines = predefined_pipelines._fetch_from_preprocessors(dataset, problem) + self.assertNotEqual(len(pipelines), 0) + result = self.__run_pipeline(pipelines[0], dataset) + result.check_success() + self.assertEqual(result.error, None) + + def test_fetch(self): + dataset_name = 'iris_dataset_1' + problem = self.__get_problem(dataset_name) + dataset = self.__get_dataset(dataset_name) + pipelines = predefined_pipelines.fetch(dataset, problem) + self.assertNotEqual(len(pipelines), 0) + result = self.__run_pipeline(pipelines[-1], dataset) + result.check_success() + self.assertEqual(result.error, None) + + def __get_uri(self, path): + return pathlib.Path(os.path.abspath(path)).as_uri() + + def __get_problem(self, dataset_name): + problem_path = os.path.join( + self.test_data, 'problems', dataset_name.replace('dataset', 'problem'), 'problemDoc.json') + problem_uri = self.__get_uri(problem_path) + problem = problem_module.Problem.load(problem_uri) + return problem + + def __get_dataset(self, dataset_name): + dataset_path = os.path.join( + self.test_data, 'datasets', dataset_name, 'datasetDoc.json') + dataset_uri = self.__get_uri(dataset_path) + dataset = container_module.dataset.get_dataset(dataset_uri) + return dataset + + def __run_pipeline(self, pipeline_description, data, volume_dir='/volumes'): + runtime = Runtime(pipeline=pipeline_description, context=metadata_base.Context.TESTING, volumes_dir=volume_dir) + fit_result = runtime.fit([data]) + return fit_result + + +if __name__ == '__main__': + suite = unittest.TestSuite() + for test_case in ( + 'test_fetch_from_file', + 'test__fetch_from_preprocessors', + 'test_fetch', + + ): + suite.addTest(TestPredefined(test_case)) + unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/axolotl/tests/test_preprocessor.py b/axolotl/tests/test_preprocessor.py new file mode 100644 index 0000000..d58110d --- /dev/null +++ b/axolotl/tests/test_preprocessor.py @@ -0,0 +1,246 @@ +import argparse +import pathlib + +import shutil +import sys + +import os +import unittest +from pprint import pprint +PROJECT_ROOT = os.path.join(os.path.dirname(__file__), '..') +sys.path.insert(0, PROJECT_ROOT) + +# from TimeSeriesD3MWrappers.primitives.classification_knn import Kanine +from d3m import container as container_module, index +from d3m.metadata import base as metadata_base, problem as problem_module +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import PrimitiveStep +from d3m.metadata.problem import TaskKeyword +from d3m.runtime import Runtime +from sklearn_wrap.SKLogisticRegression import SKLogisticRegression + +from axolotl.predefined_pipelines import preprocessor +from axolotl.utils import pipeline as pipeline_utils + + +def run_pipeline(pipeline_description, data, volume_dir='/volumes'): + runtime = Runtime(pipeline=pipeline_description, context=metadata_base.Context.TESTING, volumes_dir=volume_dir) + fit_result = runtime.fit([data]) + return fit_result + + +def add_classifier(pipeline_description, dataset_to_dataframe_step, attributes, targets): + lr = PrimitiveStep(primitive=SKLogisticRegression) + lr.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, + data_reference=attributes) + lr.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, + data_reference=targets) + lr.add_output('produce') + pipeline_description.add_step(lr) + + construct_pred = PrimitiveStep( + primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) + construct_pred.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, + data_reference=pipeline_utils.int_to_step(lr.index)) + construct_pred.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, + data_reference=dataset_to_dataframe_step) + construct_pred.add_output('produce') + pipeline_description.add_step(construct_pred) + # Final Output + pipeline_description.add_output(name='output predictions', + data_reference=pipeline_utils.int_to_step(construct_pred.index)) + + +# def add_time_series_specific_classifier(pipeline_description, attributes, targets): +# k = PrimitiveStep(primitive=Kanine) +# k.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, +# data_reference=attributes) +# k.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, +# data_reference=targets) +# k.add_output('produce') +# pipeline_description.add_step(k) +# pipeline_description.add_output(name='output predictions', +# data_reference=pipeline_utils.int_to_step(k.index)) +# return k + + +def _remove_volatile(target_pipe, predef_pipe): + target_pipe = target_pipe.to_json_structure() + for step in target_pipe['steps']: + del step['primitive']['digest'] + subset = {k: v for k, v in target_pipe.items() if k in predef_pipe} + return subset + + +class TestPreprocessor(unittest.TestCase): + time_series_data: container_module.Dataset = None + temp_dir: str = os.path.join(os.path.dirname(__file__), 'temp') + + @classmethod + def setUpClass(cls) -> None: + cls.maxDiff = None + cls.test_data = os.path.join(PROJECT_ROOT, 'tests', 'data') + # cls.time_series_data = datasets.get('timeseries_dataset_2') + # cls.tabular_classification_data = datasets.get('iris_dataset_1') + # cls.image_data = datasets.get('image_dataset_1') + # cls.audio_dataset = datasets.get('audio_dataset_1') + + @classmethod + def tearDownClass(cls): + for dir_name in ( + # cls.test_dir + 'solutions', + # cls.test_dir + 'fitted_solutions', + ): + if os.path.exists(dir_name): + shutil.rmtree(dir_name) + + # def test_timeseries_tabular(self): + # pp = preprocessor.get_preprocessor(task=metadata_base.PrimitiveFamily.TIME_SERIES_CLASSIFICATION.name, + # treatment=metadata_base.PrimitiveFamily.CLASSIFICATION.name, + # data_types=[TaskKeyword.TIME_SERIES], semi=False, + # inputs_metadata=self.time_series_data.metadata, problem=None, + # main_resource='learningData')[0] + # add_classifier(pp.pipeline_description, pp.dataset_to_dataframe_step, pp.attributes, pp.targets) + # result = run_pipeline(pp.pipeline_description, self.time_series_data) + # result.check_success() + # self.assertEqual(result.error, None) + # + # def test_timeseries_specific(self): + # pp = preprocessor.get_preprocessor(task=metadata_base.PrimitiveFamily.TIME_SERIES_CLASSIFICATION.name, + # treatment=metadata_base.PrimitiveFamily.TIME_SERIES_CLASSIFICATION.name, + # data_types=[TaskKeyword.TIME_SERIES], semi=False, + # inputs_metadata=self.time_series_data.metadata, problem=None, + # main_resource='learningData')[0] + # + # add_time_series_specific_classifier(pp.pipeline_description, pp.attributes, pp.targets) + # result = run_pipeline(pp.pipeline_description, self.time_series_data) + # result.check_success() + # self.assertEqual(result.error, None) + + def test_TabularPreprocessor(self): + dataset_name = 'iris_dataset_1' + problem = self.__get_problem(dataset_name) + dataset = self.__get_dataset(dataset_name) + pp = preprocessor.get_preprocessor( + input_data=dataset, + problem=problem, + treatment=metadata_base.PrimitiveFamily.CLASSIFICATION.name, + )[0] + add_classifier(pp.pipeline_description, pp.dataset_to_dataframe_step, pp.attributes, pp.targets) + result = run_pipeline(pp.pipeline_description, dataset) + # pprint(pp.pipeline_description.to_json_structure()) + result.check_success() + self.assertEqual(result.error, None) + + # def test_image_tensor(self): + # pp = preprocessor.get_preprocessor(task=metadata_base.PrimitiveFamily.DIGITAL_IMAGE_PROCESSING.name, + # treatment=metadata_base.PrimitiveFamily.DIGITAL_IMAGE_PROCESSING.name, + # data_types=[TaskKeyword.IMAGE], semi=False, + # inputs_metadata=self.image_data.metadata, problem=None, + # main_resource='learningData')[0] + # add_classifier(pp.pipeline_description, pp.dataset_to_dataframe_step, pp.attributes, pp.targets) + # # pprint(pp.pipeline_description.to_json_structure()) + # result = run_pipeline(pp.pipeline_description, self.image_data) + # result.check_success() + # self.assertEqual(result.error, None) + + # TODO update static files on the CI + # def test_ImageDataFramePreprocessor(self): + # dataset_name = 'image_dataset_2' + # problem = self.__get_problem(dataset_name) + # dataset = self.__get_dataset(dataset_name) + # problem['problem']['task_keywords'].append(TaskKeyword.IMAGE) + # pp = preprocessor.get_preprocessor( + # input_data=dataset, + # problem=problem, + # treatment=metadata_base.PrimitiveFamily.CLASSIFICATION.name, + # )[0] + # volume = os.path.join(PROJECT_ROOT, 'tests') + # add_classifier(pp.pipeline_description, pp.dataset_to_dataframe_step, pp.attributes, pp.targets) + # # pprint(pp.pipeline_description.to_json_structure()) + # result = run_pipeline(pp.pipeline_description, dataset, volume_dir=volume) + # result.check_success() + # self.assertEqual(result.error, None) + + # TODO need to augment text_dataset_1 + # def test_TextPreprocessor(self): + # dataset_name = 'text_dataset_1' + # # No text_problem_1, so I use iris_problem instead + # problem = self.__get_problem('iris_problem_1') + # problem['problem']['task_keywords'] = [TaskKeyword.CLASSIFICATION, TaskKeyword.TEXT] + # dataset = self.__get_dataset(dataset_name) + # # TextSent2VecPreprocessor, TextPreprocessor + # pp = preprocessor.get_preprocessor( + # input_data=dataset, + # problem=problem, + # treatment=metadata_base.PrimitiveFamily.CLASSIFICATION.name, + # )[-1] + # add_classifier(pp.pipeline_description, pp.dataset_to_dataframe_step, pp.attributes, pp.targets) + # pprint(pp.pipeline_description.to_json_structure()) + # result = run_pipeline(pp.pipeline_description, dataset) + # result.check_success() + # self.assertEqual(result.error, None) + + # def test_timeseries_forecasting_tabular(self): + # dataset = datasets.get('timeseries_dataset_1') + # pp = preprocessor.get_preprocessor(task=metadata_base.PrimitiveFamily.TIME_SERIES_FORECASTING.name, + # treatment=metadata_base.PrimitiveFamily.TIME_SERIES_FORECASTING.name, + # data_types=[TaskKeyword.TIME_SERIES.name, TaskKeyword.TABULAR.name], + # semi=False, inputs_metadata=dataset.metadata, problem=None, + # main_resource='learningData')[0] + # + # add_classifier(pp.pipeline_description, pp.dataset_to_dataframe_step, pp.attributes, pp.targets) + # result = run_pipeline(pp.pipeline_description, dataset) + # pprint(pp.pipeline_description.to_json_structure()) + # result.check_success() + # self.assertEqual(result.error, None) + + # TODO need to update tests/data/datasets/audio_dataset_1 + # def test_AudioPreprocessor(self): + # dataset_name = 'audio_dataset_1' + # # No audio_problem_1, so I use iris_problem instead + # problem = self.__get_problem('iris_problem_1') + # problem['problem']['task_keywords'] = [TaskKeyword.AUDIO, TaskKeyword.VIDEO] + # dataset = self.__get_dataset(dataset_name) + # pp = preprocessor.get_preprocessor( + # input_data=dataset, + # problem=problem, + # treatment=metadata_base.PrimitiveFamily.DIGITAL_SIGNAL_PROCESSING.name, + # )[-1] + # volume = os.path.join(PROJECT_ROOT, 'tests') + # add_classifier(pp.pipeline_description, pp.dataset_to_dataframe_step, pp.attributes, pp.targets) + # pprint(pp.pipeline_description.to_json_structure()) + # result = run_pipeline(pp.pipeline_description, dataset, volume_dir=volume) + # result.check_success() + # + # self.assertEqual(result.error, None) + + def __get_uri(self, path): + return pathlib.Path(os.path.abspath(path)).as_uri() + + def __get_problem(self, dataset_name): + problem_path = os.path.join( + self.test_data, 'problems', dataset_name.replace('dataset', 'problem'), 'problemDoc.json') + problem_uri = self.__get_uri(problem_path) + problem = problem_module.Problem.load(problem_uri) + return problem + + def __get_dataset(self, dataset_name): + dataset_path = os.path.join( + self.test_data, 'datasets', dataset_name, 'datasetDoc.json') + dataset_uri = self.__get_uri(dataset_path) + dataset = container_module.dataset.get_dataset(dataset_uri) + return dataset + + +# if __name__ == '__main__': +# suite = unittest.TestSuite() +# for test_case in ( +# # 'test_ImageDataFramePreprocessor', +# 'test_TabularPreprocessor', +# # 'test_AudioPreprocessor', +# # 'test_TextPreprocessor', +# +# ): +# suite.addTest(TestPreprocessor(test_case)) +# unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/axolotl/tests/test_random_search.py b/axolotl/tests/test_random_search.py new file mode 100644 index 0000000..6067d39 --- /dev/null +++ b/axolotl/tests/test_random_search.py @@ -0,0 +1,91 @@ +import pathlib + +import ray +import shutil +import sys +import unittest + +import os +import tempfile +from axolotl.backend.ray import RayRunner + +from axolotl.algorithms.random_search import RandomSearch +from axolotl.backend.simple import SimpleRunner + +PROJECT_ROOT = os.path.join(os.path.dirname(__file__), '..') +sys.path.insert(0, PROJECT_ROOT) + +from d3m.metadata import problem as problem_module +from d3m import container as container_module +from axolotl.utils import pipeline as pipeline_utils + + +class TestRandomSearch(unittest.TestCase): + def setUp(self): + self.test_data = os.path.join(PROJECT_ROOT, 'tests', 'data') + dataset_name = 'iris_dataset_1' + problem = self.__get_problem(dataset_name) + self.problem = problem + self.dataset = self.__get_dataset(dataset_name) + self.test_dir = tempfile.mkdtemp() + backend = SimpleRunner(random_seed=42, volumes_dir=None, scratch_dir=self.test_dir) + self.tuner_base = RandomSearch(problem, backend=backend, max_trials=10, directory=self.test_dir) + + def tearDown(self): + shutil.rmtree(self.test_dir) + + def test_fit(self): + _, pipeline_result = self.tuner_base.search_fit(input_data=[self.dataset], time_limit=60) + self.assertEqual(pipeline_result.error, None) + + def test_fit_svc(self): + pipeline_info = os.path.join(os.path.dirname(__file__), 'resources', 'svc_pipeline.json') + pipeline = pipeline_utils.load_pipeline(pipeline_info) + _, pipeline_result = self.tuner_base.search_fit(input_data=[self.dataset], time_limit=60, + pipeline_candidates=[pipeline]) + self.assertEqual(pipeline_result.error, None) + + def test_fit_lr(self): + pipeline_info = os.path.join(os.path.dirname(__file__), 'resources', 'logistic_regeression.json') + pipeline = pipeline_utils.load_pipeline(pipeline_info) + _, pipeline_result = self.tuner_base.search_fit(input_data=[self.dataset], time_limit=60, + pipeline_candidates=[pipeline]) + self.assertEqual(pipeline_result.error, None) + + def test_fit_ray(self): + if not ray.is_initialized(): + ray.init() + backend = RayRunner(random_seed=42, volumes_dir=None, scratch_dir=self.test_dir) + tuner_base = RandomSearch(self.problem, backend=backend, max_trials=30, directory=self.test_dir) + _, pipeline_result = tuner_base.search_fit(input_data=[self.dataset], time_limit=60) + self.assertEqual(pipeline_result.error, None) + ray.shutdown() + + def __get_uri(self, path): + return pathlib.Path(os.path.abspath(path)).as_uri() + + def __get_problem(self, dataset_name): + problem_path = os.path.join( + self.test_data, 'problems', dataset_name.replace('dataset', 'problem'), 'problemDoc.json') + problem_uri = self.__get_uri(problem_path) + problem = problem_module.Problem.load(problem_uri) + return problem + + def __get_dataset(self, dataset_name): + dataset_path = os.path.join( + self.test_data, 'datasets', dataset_name, 'datasetDoc.json') + dataset_uri = self.__get_uri(dataset_path) + dataset = container_module.dataset.get_dataset(dataset_uri) + return dataset + + +if __name__ == '__main__': + suite = unittest.TestSuite() + for test_case in ( + 'test_fit', + 'test_fit_two', + 'test_fit_lr', + 'test_fit_ray', + ): + suite.addTest(TestRandomSearch(test_case)) + unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/d3m/CODE_STYLE.md b/d3m/CODE_STYLE.md new file mode 100644 index 0000000..7dc0afa --- /dev/null +++ b/d3m/CODE_STYLE.md @@ -0,0 +1,258 @@ +# Code Style + +## Python + +**Consistency is the main code style guideline** and if in doubt, try to find a similar existing code and style +your code the same. Our code style is very similar to [PEP8](https://www.python.org/dev/peps/pep-0008/) with few +more details. + +Indent with 4 spaces. Never with tabs. No trailing whitespace. + +**Be verbose**. Always fully spell out any part of the function, class, or variable name. + +### Blank lines + +Use blank lines to organize long code blocks into units of what they do. Often a block is preceded by a +comment, explaining what the block does. + +This will help someone new understand the code quicker when they read it. You are leaving little hints behind, +what parts of code to understand as one unit, one step of your algorithm. Imagine you were writing the code +to be published in an article and you try to make everything as easy to learn as possible. It's the same +here, because we assume our teammates are going to use the code after us. + +Comments always have one blank line before them, except when they are the first line of an indented block of code. + +```python +for item in items: + # No new line above this comment. + ... + +# New line above this comment. +... +``` + +Do not have multiple (two or more) blank lines beyond what is expected by PEP8. + +### Line wrapping + +We **do not wrap lines** except when logically reasonable or when it greatly increases readability +(we still wrap logically and not just at the end of the line). + +We do wrap comments at the 120 characters right margin. If the comment wraps to two lines, balance the lines +so they are both approximately the same length. + +The closing brace/bracket/parenthesis on multi-line constructs should align with the first character of the +line that starts the multi-line construct, as in: + +```python +my_list = [ + 1, 2, 3, + 4, 5, 6, +] +result = some_function_that_takes_arguments( + 'a', 'b', 'c', + 'd', 'e', 'f', +) +``` + +Always include a trailing comma in such cases. + +When defining a function which takes too many arguments to leave all of them in one line, use hanging indentation: + +```python +def some_function_that_takes_arguments( + a, b, c, + d, e, f, +): + return a + b + c + d + e + f +``` + +[Black](https://github.com/python/black) generally formats according to this style so you can use +it to help you. + +### Strings + +Use `'single_quote_strings'` for constant strings and `"double_quote_strings"` for any string shown to the +user (like exception messages, or warning). A general guideline is: if a string might be ever translated to a +different language, use double quotes for it. + +This means all dictionary key names should use single quotes. + +Always use keyword based string formatting. When only simple variable name interpolation is being done, +[f-Strings](https://realpython.com/python-f-strings/) are the preferred format. + +```python +f"Value is '{value}' and message is '{message}'." +``` + +If longer expressions are being computed, then `.format()` should be used, with keywords. + +```python +"This complicated string lists all values: {values}".format( + values=[x.lowercase() for x in values], +) +``` + +Inline values wrap inside messages with `'`. If value is at the end of the message, there is no +need for wrapping and also no need for trailing dot. + +When creating logging statements, use `%`-based format, also with keyword based arguments. + +```python +logger.misc("Step '%(requirement)s'.", {'requirement': requirement}) +``` + +### Logging + +Use [Python logging facility](https://docs.python.org/3/library/logging.html) for all output and never use +`print()` (except when used in CLI commands). Obtain `logger` instance by using `__name__`, at the very +beginning of the module: + +```python +import logging + +logger = logging.getLogger(__name__) +``` + +### Imports + +Imports should be **just modules** divided into multiples sections, in order from more global to more local, separated by empty line: + * core Python packages + * external Python packages + * non-local imports (for example, imports from some other top-level `d3m.` module) + * local relative imports for the current module and sub-modules + +Inside each section, imports should be ordered alphabetically, first based on package name, then on model imported. +Each package should be on its own line, but importing multiple modules from the same package should be in one line. + +Example: + +```python +import os +import time + +import numpy +from sklearn import metrics, model_selection + +from d3m import exceptions +from d3m.metadata import problem + +from . import data +``` + +If you are importing multiple modules with the same name from different package, rename more global one with a prefix +of the package: + +```python +from sklearn import metrics as sklearn_metrics + +from d3m import metrics +``` + +### Docstrings + +Every class, method and function has a docstring with description. Docstrings should be split into multiple lines +when needed to improve readability. Docstrings should use the [numpy style](https://numpydoc.readthedocs.io/en/latest/format.html#docstring-standard) +to document arguments, return value and everything else, which means also using [ReST/Sphinx](http://www.sphinx-doc.org/en/stable/rest.html) +syntax for formatting. + +Always separate the docstring from the rest of the code with an empty line, and have `"""` on their own line, even +for one-line docstrings. + +We use a custom [Python metaclasses](https://docs.python.org/3/reference/datamodel.html#metaclasses) for d3m classes which +[automatically inherits or extends docstrings from parent methods](https://github.com/meowklaski/custom_inherit): + +```python +from d3m import utils + + +class MyBaseClass(metaclass=utils.Metaclass): + pass + + +class MyAbstractBaseClass(metaclass=utils.AbstractMetaclass): + pass +``` + +### Comments + +Both standalone one-sentence one-line comments and multi-sentence comments should have grammatically correct punctuation. +For formatting, use [ReST/Sphinx](http://www.sphinx-doc.org/en/stable/rest.html) syntax. + +- When you are explaining what the code will do, end the sentence with a dot. + + ```python + # Calculate total value. + value = quantity * price + ``` +- Short after-the-line comments (which should not be sentences) do not have an ending dot: + + ```python + sleep(10) # seconds + ``` + +- Titles that are separating sections of code are also not a sentence (no dot). + + ```python + ### Vector operations ### + + def dot_product(vector1, vector2): + ... + + def cross_product(vector1, vector2): + ... + + ### Matrix operations ### + + def transform(vector, matrix): + ... + ``` + +If TODO comments cannot be a short one-line with grammatically correct punctuation, then split it into multiple lines in this way: + +```python +# TODO: Short description of a TODO. +# A longer description of what we could potentially do here. Maybe we +# could do X or Y, but Y has this consequences. We should probably +# wait for server rendering feature to be implemented. +# See: https://github.com/example/project/issues/123 +``` + +Try to keep the formatting of the first line exactly as shown above so that it is easier parsed by IDEs. +Including the space after `#` and space after `:`. + +## Code repository + +Commit often and make sure each commit is a rounded change. Do not squash commits, unless that helps making a set of commits +into a clearer change. We leave unsuccessful attempts in the repository because maybe in the future we can come back to them +and use them, maybe in a different context or way. + +For almost all changes to the repository, we make first a feature branch from `devel` branch. We make all necessary changes in +that new branch, potentially make multiple commits. We make a merge request against the `devel` branch for the change +to be reviewed and merged. We should make a merge request even before all changes are finished so that others can comment +and discuss the development. We can continue adding more commits to this branch even after the merge request has been made +and GitLab will update the merge request automatically. Until a merge request is finished and is deemed ready to be merged +by its author, merge request's title should be prefixed with `WIP:` so that it is clear that it is not yet meant +to be merged (and thoroughly reviewed). Make sure you include also a change to the [changelog](#changelog) in your merge request. + +### Changelog + +We are maintaining `HISTORY.md` file where we document changes to the project so that +everyone involved can have one location where they can see what has changed and what +they might adapt in their code or the way they are working on the project. + +### Commit messages + +Commit messages should be descriptive and full sentences, with grammatically correct punctuation. +If possible, they should reference relevant tickets (by appending something like `See #123.`) or even close them +(`Fixes #123.`). GitLab recognizes that. If longer commit message is suitable (which is always a good thing), +first one line summary should be made (50 characters is a soft limit), followed by an empty line, followed +by a multi-line message: + + Added artificially lowering of the height in IE. + + In IE there is a problem when rendering when user is located + higher than 2000m. By artificially lowering the height rendering + now works again. + + Fixes #123. diff --git a/d3m/HISTORY.md b/d3m/HISTORY.md new file mode 100644 index 0000000..7989ca6 --- /dev/null +++ b/d3m/HISTORY.md @@ -0,0 +1,1823 @@ +## v2020.5.18 + +### Enhancements + +* Scoring primitive and pipeline now accept new hyper-parameter `all_labels` + which can be used to provide information about all labels possible in a target + column. + [#431](https://gitlab.com/datadrivendiscovery/d3m/-/issues/431) + [!377](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/377) +* Added `all_distinct_values` metadata field which can contain all values (labels) + which can be in a column. This is meant to be used on target columns to help + implementing `ContinueFitMixin` in a primitive which might require knowledge + of all possible labels before starting fitting on a subset of data. + [#447](https://gitlab.com/datadrivendiscovery/d3m/-/issues/447) + [!377](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/377) +* Reference runtime now does not keep primitive instances in memory anymore + but uses `get_params`/`set_params` to retain and reuse only primitive's parameters. + This makes memory usage lower and allows additional resource releasing when primitive's + object is freed (e.g., releasing GPUs). + [#313](https://gitlab.com/datadrivendiscovery/d3m/-/issues/313) + [!376](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/376) +* Added support for version 4.1.0 of D3M dataset schema: + * Added `MONTHS` to column's `time_granularity` metadata. + [!340](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/340) + * Added mean reciprocal rank and hits at k metrics. + [!361](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/361) + * Added `https://metadata.datadrivendiscovery.org/types/Rank` semantic type + and `rank_for` metadata field. `PerformanceMetric` classes have now + `requires_rank` method. + [!372](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/372) + * Added `NESTED` task keyword. + [!372](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/372) + * Added `file_columns_count` metadata field and updated `file_columns` metadata field + with additional sub-fields. Also renamed sub-field `name` to `column_name` and added + `column_index` sub-fields to `file_columns` metadata. + [!372](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/372) + **Backwards incompatible.** +* Moved high-level primitive base classes for file readers and dataset splitting + from common primitives to d3m core package. + [!120](https://gitlab.com/datadrivendiscovery/common-primitives/-/merge_requests/120) + [!339](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/339) +* A warning is issued if a primitive uses a global random source + during pipeline execution. Such behavior can make pipeline + execution not reproducible. + [#384](https://gitlab.com/datadrivendiscovery/d3m/-/issues/384) + [!365](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/365) +* CLI accepts `--logging-level` argument to configure which logging + messages are printed to the console. + [!360](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/360) +* Output to stdout/stderr during pipeline execution is now not suppressed + anymore, which makes it possible to debug pipeline execution using pdb. + Stdout/stderr is at the same time still logged to Python logging. + [#270](https://gitlab.com/datadrivendiscovery/d3m/-/issues/270) + [!360](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/360) +* Redirect from stdout to Python logging now operates per lines and + not per write operations, makes logs more readable. + [#168](https://gitlab.com/datadrivendiscovery/d3m/-/issues/168) + [!358](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/358) +* Made sure that multi-label metrics work correctly. + [#370](https://gitlab.com/datadrivendiscovery/d3m/-/issues/370) + [!343](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/343) +* Implemented ROC AUC metrics. They require predictions to include + confidence for all possible labels. + [#317](https://gitlab.com/datadrivendiscovery/d3m/-/issues/317) + [!318](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/318) +* Additional (non-standard) performance metrics can now be registered + using `PerformanceMetric.register_metric` class method. + [#207](https://gitlab.com/datadrivendiscovery/d3m/-/issues/207) + [!348](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/348) +* All D3M enumerations can now be extended with additional values + through `register_value` class method. This allows one to add values + to existing standard values (which come from the metadata schema). + Internally, enumeration values are now represented as strings and not + integers anymore. + [#438](https://gitlab.com/datadrivendiscovery/d3m/-/issues/438) + [!348](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/348) + **Could be backwards incompatible.** +* Added CLI to validate primitive descriptions for metalearning database + (`python3 -m d3m index validate`). + [!333](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/333) +* Raise an exception during dataset loading if `targets.csv` file does + not combine well with the dataset entry point. + [!330](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/330) + +### Bugfixes + +* CLI now displays correct error messages for invalid arguments to subcommands. + [#409](https://gitlab.com/datadrivendiscovery/d3m/-/issues/409) + [!368](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/368) +* Reference runtime does not call `fit` and `produce` + methods in a loop anymore. This mitigates an infinite loop for misbehaving primitives. + [!364](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/364) +* During pipeline execution all Python logging is now recorded in the + pipeline run and it does not depend anymore on logging level otherwise + configured during execution. + [!360](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/360) +* Default sampling code for hyper-parameters now makes sure to return + values in original types and not numpy ones. + [#440](https://gitlab.com/datadrivendiscovery/d3m/-/issues/440) + [!352](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/352) +* We now ensure that file handles opened for CLI commands are flushed + so that data is not lost. + [#436](https://gitlab.com/datadrivendiscovery/d3m/issues/436) + [!335](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/335) +* Fixed saving exposed produced outputs for `fit-score` CLI command when + scoring failed. + [!341](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/341) +* Made sure `time_granularity` metadata is saved when saving a D3M dataset. + [!340](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/340) +* Changed version of GitPython dependency to 3.1.0 to fix older versions + being broken because of its own unconstrained upper dependency. + [!336](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/336) +* Fixed how paths are constructed when exposing and saving produced values. + [!336](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/336) + +### Other + +* Added guides to the documentation. + [!351](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/351) + [!374](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/374) +* Removed type annotations from docstrings. Python type annotations are now used instead when rendering documentation. + [#239](https://gitlab.com/datadrivendiscovery/d3m/-/issues/239) + [!371](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/371) +* Renamed `blacklist` in `d3m.index.load_all` and `primitives_blacklist` in `d3m.metadata.pipeline.Resolver` + to `blocklist` and `primitives_blocklist`, respectively. + **Backwards incompatible.** +* Removed `https://metadata.datadrivendiscovery.org/types/GPUResourcesUseParameter` + semantic type. Added `can_use_gpus` primitive metadata field to signal that + the primitive can use GPUs if available. + [#448](https://gitlab.com/datadrivendiscovery/d3m/-/issues/448) + [!369](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/369) + **Backwards incompatible.** +* Clarified that hyper-parameters using `https://metadata.datadrivendiscovery.org/types/CPUResourcesUseParameter` + should have 1 as default value. + [!369](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/369) +* Clarified that it is not necessary to call `fit` before calling + `continue_fit`. +* `index` CLI command has been renamed to `primitive` CLI command. + [#437](https://gitlab.com/datadrivendiscovery/d3m/-/issues/437) + [!363](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/363) +* `numpy.matrix` has been removed as an allowed container type, as it + was deprecated by NumPy. + [#230](https://gitlab.com/datadrivendiscovery/d3m/-/issues/230) + [!362](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/362) + **Backwards incompatible.** +* CLI has now `--version` command which returns the version of the d3m + core package itself. + [#378](https://gitlab.com/datadrivendiscovery/d3m/-/issues/378) + [!359](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/359) +* Upgraded schemas to JSON Schema draft 7, and upgraded Python `jsonschema` + dependency to version 3. + [#392](https://gitlab.com/datadrivendiscovery/d3m/-/issues/392) + [!342](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/342) +* Added a Primitive Good Citizen Checklist to documentation, documenting + some best practices when writing a primitive. + [#127](https://gitlab.com/datadrivendiscovery/d3m/-/issues/127) + [!347](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/347) + [!355](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/355) +* Updated upper bounds of core dependencies to latest available versions. + [!337](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/337) +* Added to `algorithm_types`: + * `SAMPLE_SELECTION` + * `SAMPLE_MERGING` + * `MOMENTUM_CONTRAST` + * `CAUSAL_ANALYSIS` + + [!332](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/332) + [!357](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/357) + [!373](https://gitlab.com/datadrivendiscovery/d3m/-/merge_requests/373) + +## v2020.1.9 + +### Enhancements + +* Support for D3M datasets with minimal metadata. + [#429](https://gitlab.com/datadrivendiscovery/d3m/issues/429) + [!327](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/327) +* Pipeline runs (and in fact many other input documents) can now be directly used gzipped + in all CLI commands. They have to have filename end with `.gz` for decompression to happen + automatically. + [#420](https://gitlab.com/datadrivendiscovery/d3m/issues/420) + [!317](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/317) +* Made problem descriptions again more readable when converted to JSON. + [!316](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/316) +* Improved YAML handling to encourage faster C implementation. + [#416](https://gitlab.com/datadrivendiscovery/d3m/issues/416) + [!313](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/313) + +### Bugfixes + +* Fixed the error message if all required CLI arguments are not passed to the runtime. + [#411](https://gitlab.com/datadrivendiscovery/d3m/issues/411) + [!319](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/319) +* Removed assumption that all successful pipeline run steps have method calls. + [#422](https://gitlab.com/datadrivendiscovery/d3m/issues/422) + [!320](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/320) +* Fixed "Duplicate problem ID" warnings when multiple problem descriptions + have the same problem ID, but in fact they are the same problem description. + No warning is made in this case anymore. + [#417](https://gitlab.com/datadrivendiscovery/d3m/issues/417) + [!321](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/321) +* Fixed the use of D3M container types in recent versions of Keras and TensorFlow. + [#426](https://gitlab.com/datadrivendiscovery/d3m/issues/426) + [!322](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/322) +* Fixed `validate` CLI commands to work on YAML files. + +### Other + +* Updated upper bounds of core dependencies to latest available versions. + [#427](https://gitlab.com/datadrivendiscovery/d3m/issues/427) + [!325](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/325) +* Refactored default pipeline run parser implementation to make it + easier to provide alternative dataset and problem resolvers. + [!314](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/314) +* Moved out local test primitives into [`tests/data` git submodule](https://gitlab.com/datadrivendiscovery/tests-data). + Now all test primitives are in one place. + [#254](https://gitlab.com/datadrivendiscovery/d3m/issues/254) + [!312](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/312) + +## v2019.11.10 + +* Support for version 4.0.0 of D3M dataset schema has been added. +* D3M core package now supports loading directly datasets from OpenML. +* When saving `Dataset` object to D3M dataset format, metadata is now preserved. +* NetworkX objects are not anymore container types and are not allowed + anymore to be passed as values between primitives. +* "Meta" files are not supported anymore by the runtime. Instead save a + pipeline run with configuration of the run you want, and use the pipeline + run to re-run using that configuration. + +### Enhancements + +* Primitive family `REMOTE_SENSING` has been added. + [!310](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/310) +* Added support for version 4.0.0 of D3M dataset schema: + * There are no more `NODE` and `EDGE` references (used in graph datasets), + but only `NODE_ATTRIBUTE` and `EDGE_ATTRIBUTE`. + * `time_granularity` can now be present on a column. + * `forecasting_horizon` can now be present in a problem description. + * `task_type` and `task_subtype` have been merged into `task_keywords`. + As a consequence, Python `TaskType` and `TaskSubtype` were replaced + with `TaskKeyword`. + + [#401](https://gitlab.com/datadrivendiscovery/d3m/issues/401) + [!310](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/310) + **Backwards incompatible.** + +* Added OpenML dataset loader. Now you can pass an URL to a OpenML dataset + and it will be downloaded and converted to a `Dataset` compatible object, + with including many of available meta-features. Combined with support + for saving datasets, this now allows easy conversion between OpenML + datasets and D3M datasets, e.g., `python3 -m d3m dataset convert -i https://www.openml.org/d/61 -o out/datasetDoc.json`. + [#252](https://gitlab.com/datadrivendiscovery/d3m/issues/252) + [!309](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/309) +* When saving and loading D3M datasets, metadata is now preserved. + [#227](https://gitlab.com/datadrivendiscovery/d3m/issues/227) + [!265](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/265) +* Metadata can now be converted to a JSON compatible structure in a + reversible manner. + [#373](https://gitlab.com/datadrivendiscovery/d3m/issues/373) + [!308](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/308) +* Pipeline run now records if a pipeline was run as a standard pipeline + under `run.is_standard_pipeline` field. + [#396](https://gitlab.com/datadrivendiscovery/d3m/issues/396) + [!249](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/249) +* "meta" files have been replaced with support for rerunning pipeline runs. + Instead of configuring a "meta" file with configuration how to run a + pipeline, simply provide an example pipeline run which demonstrates how + the pipeline was run. Runtime does not have `--meta` argument anymore, + but has now `--input-run` argument instead. + [#202](https://gitlab.com/datadrivendiscovery/d3m/issues/202) + [!249](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/249) + **Backwards incompatible.** +* Changed `LossFunctionMixin` to support multiple loss functions. + [#386](https://gitlab.com/datadrivendiscovery/d3m/issues/386) + [!305](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/305) + **Backwards incompatible.** +* Pipeline equality and hashing functions now have `only_control_hyperparams` + argument which can be set to use only control hyper-parameters when doing + comparisons. + [!289](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/289) +* Pipelines and other YAML files are now recognized with both `.yml` and + `.yaml` file extensions. + [#375](https://gitlab.com/datadrivendiscovery/d3m/issues/375) + [!302](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/302) +* `F1Metric`, `F1MicroMetric`, and `F1MacroMetric` can now operate on + multiple target columns and average scores for all of them. + [#400](https://gitlab.com/datadrivendiscovery/d3m/issues/400) + [!298](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/298) +* Pipelines and pipeline runs can now be serialized with Arrow. + [#381](https://gitlab.com/datadrivendiscovery/d3m/issues/381) + [!290](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/290) +* `describe` CLI commands now accept `--output` argument to control where + their output is saved to. + [!279](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/279) + +### Bugfixes + +* Made exposed outputs be stored even in the case of an exception. + [#380](https://gitlab.com/datadrivendiscovery/d3m/issues/380) + [!304](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/304) +* Fixed `source.from` metadata in datasets and problem descriptions + and its validation for metalearning database. + [#363](https://gitlab.com/datadrivendiscovery/d3m/issues/363) + [!303](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/303) +* Fixed pipeline run references when running the runtime through + evaluation command. + [#395](https://gitlab.com/datadrivendiscovery/d3m/issues/395) + [!294](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/294) +* The core package scoring primitive has been updated to have digest. + This allows the core package scoring pipeline to have it as well. + This changes makes it required for the core package to be installed + in editable mode (`pip3 install -e ...`) when being installed from the + git repository. + [!280](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/280) + **Backwards incompatible.** + +### Other + +* Few top-level runtime functions had some of their arguments moved + to keyword-only arguments: + * `fit`: `problem_description` + * `score`: `scoring_pipeline`, `problem_description`, `metrics`, `predictions_random_seed` + * `prepare_data`: `data_pipeline`, `problem_description`, `data_params` + * `evaluate`: `data_pipeline`, `scoring_pipeline`, `problem_description`, `data_params`, `metrics` + + [#352](https://gitlab.com/datadrivendiscovery/d3m/issues/352) + [!301](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/301) + **Backwards incompatible.** + +* `can_accept` method has been removed from primitive interfaces. + [#334](https://gitlab.com/datadrivendiscovery/d3m/issues/334) + [!300](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/300) + **Backwards incompatible.** +* NetworkX objects are not anymore container types and are not allowed + anymore to be passed as values between primitives. Dataset loader now + does not convert a GML file to a NetworkX object but represents it + as a files collection resource. A primitive should then convert that + resource into a normalized edge-list graph representation. + [#349](https://gitlab.com/datadrivendiscovery/d3m/issues/349) + [!299](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/299) + **Backwards incompatible.** +* `JACCARD_SIMILARITY_SCORE` metric is now a binary metric and requires + `pos_label` parameter. + [!299](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/299) + **Backwards incompatible.** +* Updated core dependencies. Some important packages are now at versions: + * `tensorflow`: 2.0.0 + * `keras`: 2.3.1 + * `torch`: 1.3.0.post2 + * `theano`: 1.0.4 + * `scikit-learn`: 0.21.3 + * `numpy`: 1.17.3 + * `pandas`: 0.25.2 + * `networkx`: 2.4 + * `pyarrow`: 0.15.1 + * `scipy`: 1.3.1 + + [#398](https://gitlab.com/datadrivendiscovery/d3m/issues/398) + [#379](https://gitlab.com/datadrivendiscovery/d3m/issues/379) + [!299](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/299) + +* Primitive family `DIMENSIONALITY_REDUCTION` has been added. + [!284](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/284) +* Added to `algorithm_types`: + * `POLYNOMIAL_REGRESSION` + * `IMAGENET` + * `RETINANET` + + [!306](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/306) + +* `--process-dependency-link` is not anymore suggested to be used when + installing primitives. +* `sample_rate` metadata field inside `dimension` has been renamed to + `sampling_rate` to make it consistent across metadata. This field + should contain a sampling rate used for the described dimension, + when values in the dimension are sampled. + **Backwards incompatible.** + +## v2019.6.7 + +### Enhancements + +* Dataset loading has been optimized for the case when only one file + type exists in a file collection. Metadata is also simplified in this case. + [#314](https://gitlab.com/datadrivendiscovery/d3m/issues/314) + [!277](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/277) +* Support defining unfitted primitives in the pipeline for passing them + to another primitive as a hyper-parameter. Unfitted primitives do not + have any input connected and runtime just creates a primitive instance + but does not fit or produce them. It then passes this primitive instance + to another primitive as a hyper-parameter value. + [!274](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/274) +* When saving datasets, we now use hard-linking of files when possible. + [#368](https://gitlab.com/datadrivendiscovery/d3m/issues/368) + [!271](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/271) + +### Bugfixes + +* Specifying `-E` to the `d3m runtime` CLI now exposes really all outputs + of all steps and not just pipeline outputs. + [#367](https://gitlab.com/datadrivendiscovery/d3m/issues/367) + [!270](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/270) +* Fixed minor issues when loading sklearn example datasets. +* Fixed PyPi metadata of the package. + [!267](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/267) +* When saving D3M dataset, also structural type information is now used to set + column type. + [#339](https://gitlab.com/datadrivendiscovery/d3m/issues/339) + [!255](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/255) +* When saving D3M dataset, update digest of saved dataset to digest of + what has been saved. + [#340](https://gitlab.com/datadrivendiscovery/d3m/issues/340) + [!262](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/262) + +### Other + +* Pipeline's `get_exposable_outputs` method has been renamed to `get_producing_outputs`. + [!270](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/270) +* Updating columns from DataFrame returned from `DataFrame.select_columns` + does not raise a warning anymore. + [!268](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/268) +* Added `scipy==1.2.1` as core dependency. + [!266](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/266) +* Added code style guide to the repository. + [!260](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/260) +* Added to `algorithm_types`: + + * `ITERATIVE_LABELING` + + [!276](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/276) + +## v2019.5.8 + +* This release contains an implementation of `D3MDatasetSaver` so `Dataset` objects + can now be saved using their `save` method into D3M dataset format. +* Additional hyper-parameters classes have been defined and existing improved. + Probably the most useful addition is `List` hyper-parameter which allows + repeated values with order of values (in contrast with `Set`). +* Standard graph representation has been standardized (a nodelist table and an + edge list table) and related semantic types have been added to mark source + and target columns for edges. +* Standard time-series representation has been standardized (a long format) + and related semantic types have been added to identify columns to index + time-series by. +* Feature construction primitive should mark newly constructed attributes + with `https://metadata.datadrivendiscovery.org/types/ConstructedAttribute` + semantic type. +* There are now mixins available to define primitives which can be used to + describe neural networks as pipelines. +* There is now a single command line interface for the core package under + `python3 -m d3m`. + +### Enhancements + +* Runtime now raises an exception if target columns from problem description + could not be found in provided input datasets. + [#281](https://gitlab.com/datadrivendiscovery/d3m/issues/281) + [!155](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/155) +* Core package command line interfaces have been consolidated and revamped + and are now all available under single `python3 -m d3m`. + [#338](https://gitlab.com/datadrivendiscovery/d3m/issues/338) + [!193](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/193) + [!233](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/233) +* Added `--expose-produced-outputs` argument runtime CLI to allow saving + to a directory produced outputs of all primitives from pipeline's run. + Useful for debugging. + [#206](https://gitlab.com/datadrivendiscovery/d3m/issues/206) + [!223](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/223) +* CSVLoader and SklearnExampleLoader dataset loaders now add + `d3mIndex` column if one does not exist already. + [#266](https://gitlab.com/datadrivendiscovery/d3m/issues/266) + [!202](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/202) +* Added `--not-standard-pipeline` argument to `fit`, `produce`, and `fit-produce` + runtime CLI to allow running non-standard pipelines. + [#312](https://gitlab.com/datadrivendiscovery/d3m/issues/312) + [!228](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/228) +* Sampling `Bounded` and base `Hyperparameter` hyper-parameter now issues + a warning that sampling of those hyper-parameters is ill-defined. + [!220](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/220) +* `Bounded` hyper-parameter with both bounds now samples from uniform + distribution. + [!220](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/220) +* Added new hyper-parameter classes: `SortedSet`, `List`, and `SortedList`. + [#236](https://gitlab.com/datadrivendiscovery/d3m/issues/236) + [#292](https://gitlab.com/datadrivendiscovery/d3m/issues/292) + [!219](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/219) +* All bounded hyper-parameter classes now accept additional arguments to + control if bounds are inclusive or exclusive. + [#199](https://gitlab.com/datadrivendiscovery/d3m/issues/199) + [!215](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/215) +* `Dataset` objects can now be saved to D3M dataset format by + calling `save` method on them. + [#31](https://gitlab.com/datadrivendiscovery/d3m/issues/31) + [#344](https://gitlab.com/datadrivendiscovery/d3m/issues/344) + [!96](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/96) + [!217](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/217) + +### Bugfixes + +* Fixed `NormalizeMutualInformationMetric` implementation. + [#357](https://gitlab.com/datadrivendiscovery/d3m/issues/357) + [!257](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/257) +* JSON representation of `Union` hyper-parameter values and other + pickled hyper-parameter values has been changed to assure better + interoperability. + [#359](https://gitlab.com/datadrivendiscovery/d3m/issues/359) + [!256](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/256) + **Backwards incompatible.** +* All d3m schemas are now fully valid according to JSON schema draft v4. + [#79](https://gitlab.com/datadrivendiscovery/d3m/issues/79) + [!233](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/233) +* Fixed an error when saving a fitted pipeline to stdout. + [#353](https://gitlab.com/datadrivendiscovery/d3m/issues/353) + [!250](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/250) +* Hyper-parameters cannot use `NaN` and infinity floating-point values + as their bounds. This assures compatibility with JSON. + [#324](https://gitlab.com/datadrivendiscovery/d3m/issues/324) + [!237](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/237) + **Backwards incompatible.** +* Pipelines are now exported to JSON in strict compliance of the + JSON specification. + [#323](https://gitlab.com/datadrivendiscovery/d3m/issues/323) + [!238](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/238) +* Runtime execution does not fail anymore if predictions cannot be converted + to JSON for pipeline run. A warning is issued instead. + [#347](https://gitlab.com/datadrivendiscovery/d3m/issues/347) + [!227](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/227) +* Better support for running reference runtime without exceptions on non-Linux + operating systems. + [#246](https://gitlab.com/datadrivendiscovery/d3m/issues/246) + [!218](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/218) +* Strict checking of dataset, pipeline and primitive digests against those provided + in metadata are now correctly controlled using `--strict-digest`/`strict_digest` + arguments. + [#346](https://gitlab.com/datadrivendiscovery/d3m/issues/346) + [!213](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/213) +* Fixed error propagation in `evaluate` runtime function, if error + happens during scoring. + [!210](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/210) +* Fixed accessing container DataFrame's `metadata` attribute when + DataFrame also contains a column with the name `metadata`. + [#330](https://gitlab.com/datadrivendiscovery/d3m/issues/330) + [!201](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/201) +* Fixed `.meta` file resolving when `--datasets` runtime argument + is not an absolute path. + [!194](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/194) +* Fixed `get_relations_graph` resolving of column names (used in `Denormalize` + common primitive). + [!196](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/196) + +### Other + +* Other validation functions for metalearning documents. This includes + also CLI to validate. + [#220](https://gitlab.com/datadrivendiscovery/d3m/issues/220) + [!233](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/233) +* Pipeline run schema now requires scoring dataset inputs to be recorded + if a data preparation pipeline has not been used. + [!243](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/243) + **Backwards incompatible.** +* Core package now provides standard scoring primitive and scoring pipeline + which are used by runtime by default. + [#307](https://gitlab.com/datadrivendiscovery/d3m/issues/307) + [!231](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/231) +* Pipeline run can now be generated also for a subset of non-standard + pipelines: those which have all inputs of `Dataset` type. + [!232](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/232) +* Pipeline run now also records a normalized score, if available. + [!230](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/230) +* Pipeline `context` field has been removed from schema and implementation. + [!229](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/229) +* Added `pure_primitive` field to primitive's metadata so that primitives + can mark themselves as not pure (by default all primitives are seen as pure). + [#331](https://gitlab.com/datadrivendiscovery/d3m/issues/331) + [!226](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/226) +* `Metadata` methods `to_json_structure` and `to_simple_structure` has been + modified to not return anymore internal metadata representation but + metadata representation equivalent to what you get from `query` call. + To obtain internal representation use `to_internal_json_structure` + and `to_internal_simple_structure`. + [!225](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/225) + **Backwards incompatible.** +* `NeuralNetworkModuleMixin` and `NeuralNetworkObjectMixin` have been + added to primitive interfaces to support representing neural networks + as pipelines. + [#174](https://gitlab.com/datadrivendiscovery/d3m/issues/174) + [!87](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/87) +* `get_loss_function` has been renamed to `get_loss_metric` in + `LossFunctionMixin`. + [!87](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/87) + **Backwards incompatible.** +* `UniformInt`, `Uniform`, and `LogUniform` hyper-parameter classes now + subclass `Bounded` class. + [!216](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/216) +* Metrics do not have default parameter values anymore, cleaned legacy + parts of code assuming so. + [!212](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/212) +* Added new semantic types: + * `https://metadata.datadrivendiscovery.org/types/EdgeSource` + * `https://metadata.datadrivendiscovery.org/types/DirectedEdgeSource` + * `https://metadata.datadrivendiscovery.org/types/UndirectedEdgeSource` + * `https://metadata.datadrivendiscovery.org/types/SimpleEdgeSource` + * `https://metadata.datadrivendiscovery.org/types/MultiEdgeSource` + * `https://metadata.datadrivendiscovery.org/types/EdgeTarget` + * `https://metadata.datadrivendiscovery.org/types/DirectedEdgeTarget` + * `https://metadata.datadrivendiscovery.org/types/UndirectedEdgeTarget` + * `https://metadata.datadrivendiscovery.org/types/SimpleEdgeTarget` + * `https://metadata.datadrivendiscovery.org/types/MultiEdgeTarget` + * `https://metadata.datadrivendiscovery.org/types/ConstructedAttribute` + * `https://metadata.datadrivendiscovery.org/types/SuggestedGroupingKey` + * `https://metadata.datadrivendiscovery.org/types/GroupingKey` + + [#134](https://gitlab.com/datadrivendiscovery/d3m/issues/134) + [#348](https://gitlab.com/datadrivendiscovery/d3m/issues/348) + [!211](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/211) + [!214](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/214) + +* Updated core dependencies. Some important packages are now at versions: + * `scikit-learn`: 0.20.3 + * `pyarrow`: 0.13.0 + + [!206](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/206) + +* Clarified in primitive interface documentation that if primitive should have been + fitted before calling its produce method, but it has not been, primitive should + raise a ``PrimitiveNotFittedError`` exception. + [!204](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/204) +* Added to `algorithm_types`: + + * `EQUI_JOIN` + * `DATA_RETRIEVAL` + * `DATA_MAPPING` + * `MAP` + * `INFORMATION_THEORETIC_METAFEATURE_EXTRACTION` + * `LANDMARKING_METAFEATURE_EXTRACTION` + * `MODEL_BASED_METAFEATURE_EXTRACTION` + * `STATISTICAL_METAFEATURE_EXTRACTION` + * `VECTORIZATION` + * `BERT` + + [!160](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/160) + [!186](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/186) + [!224](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/224) + [!247](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/247) + +* Primitive family `METAFEATURE_EXTRACTION` has been renamed to `METALEARNING`. + [!160](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/160) + **Backwards incompatible.** + +## v2019.4.4 + +* With this release metadata is not automatically generated anymore when DataFrame or ndarray + is being wrapped into a corresponding container type. Now you have to explicitly set + `generate_metadata` constructor argument to `True` or call `generate` method on metadata + object afterwards. + This has been changed to improve performance of many primitives and operations on + container types which were slowed down because of unnecessary and unexpected + generation of metadata. + This change requires manual inspection of primitive's code to determine what change + is necessary. Some suggestions what to look for: + * `set_for_value` method has been deprecated: generally it can be replaced with `generate` + call, or even removed in some cases: + * `value.metadata = value.metadata.set_for_value(value, generate_metadata=False)` remove. + * `value.metadata = new_metadata.set_for_value(value, generate_metadata=False)` replace with `value.metadata = new_metadata`. + * `value.metadata = new_metadata.set_for_value(value, generate_metadata=True)` replace with `value.metadata = new_metadata.generate(value)`. + * `clear` method has been deprecated: generally you can now instead simply create + a fresh instance of `DataMetadata`, potentially calling `generate` as well: + * `outputs_metadata = inputs_metadata.clear(new_metadata, for_value=outputs, generate_metadata=True)` replace with + `outputs_metadata = metadata_base.DataMetadata(metadata).generate(outputs)`. + * `outputs_metadata = inputs_metadata.clear(for_value=outputs, generate_metadata=False)` replace with + `outputs_metadata = metadata_base.DataMetadata()`. + * Search for all calls to constructors of `container.List`, `container.ndarray`, + `container.Dataset`, `container.DataFrame` container types and explicitly set + `generate_metadata` to `True`. Alternatively, you can also manually update + metadata instead of relying on automatic metadata generation. + * The main idea is that if you are using automatic metadata generation in your primitive, + make sure you generate it only once, just before you return container type from + your primitive. Of course, if you call code which expects metadata from inside your primitive, + you might have to assure or generate metadata before calling that code as well. + +### Enhancements + +* Primitives now get a `temporary_directory` constructor argument pointing + to a directory they can use to store any files for the duration of current pipeline + run phase. The main intent of this temporary directory is to store files referenced + by any ``Dataset`` object your primitive might create and followup primitives in + the pipeline should have access to. To support configuration of the location of these + temporary directories, the reference runtime now has a `--scratch` command line argument + and corresponding `scratch_dir` constructor argument. + [#306](https://gitlab.com/datadrivendiscovery/d3m/issues/306) + [!190](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/190) +* Made sure that number of inputs provided to the runtime has to match the number of inputs a pipeline accepts. + [#301](https://gitlab.com/datadrivendiscovery/d3m/issues/301) + [!183](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/183) +* Supported MIT-LL dataset and problem schemas version 3.3.0. Now all suggested targets and suggested privileged data + columns are now by default also attributes. Runtime makes sure that if any column is marked as problem description's + target it is not marked as an attribute anymore. + [#291](https://gitlab.com/datadrivendiscovery/d3m/issues/291) + [!182](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/182) + **Backwards incompatible.** +* `steps` and `method_calls` made optional in pipeline run schema to allow easier recording of failed pipelines. + [!167](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/167) +* Pipeline run now records also start and end timestamps of pipelines and steps. + [#258](https://gitlab.com/datadrivendiscovery/d3m/issues/258) + [!162](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/162) +* `Metadata` has two new methods to query metadata, `query_field` and `query_field_with_exceptions` + which you can use when you want to query just a field of metadata, and not whole metadata object. + Similarly, `DataMetadata` has a new method `query_column_field`. +* `DataMetadata`'s `generate` method has now `compact` argument to control it automatically + generated metadata is compacted (if all elements of a dimension have equal metadata, it is + compacted into `ALL_ELEMENTS` selector segment) or not (default). + There is also a `compact` method available on `Metadata` to compact metadata on demand. +* Automatically generated metadata is not automatically compacted anymore by default + (compacting is when all elements of a dimension have equal metadata, moving that + metadata `ALL_ELEMENTS` selector segment). +* `generate_metadata` argument of container types' constructors has been switched + from default `True` to default `False` to prevent unnecessary and unexpected + generation of metadata, slowing down execution of primitives. Moreover, + `DataMetadata` has now a method `generate` which can be used to explicitly + generate and update metadata given a data value. + Metadata methods `set_for_value` and `clear` have been deprecated and can + be generally replaced with `generate` call, or creating a new metadata + object, or removing the call. + **Backwards incompatible.** + [#143](https://gitlab.com/datadrivendiscovery/d3m/issues/143) + [!180](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/180) +* Loading of datasets with many files has been heavily optimized. + [#164](https://gitlab.com/datadrivendiscovery/d3m/issues/164) + [!136](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/136) + [!178](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/178) + [!](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/179) +* Extended container's `DataFrame.to_csv` method to use by default + metadata column names for CSV header instead of column names of + `DataFrame` itself. + [!158](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/158). +* Problem parsing has been refactored into extendable system similar to how + dataset parsing is done. A simple `d3m.metadata.problem.Problem` class has + been defined to contain a problem description. Default implementation supports + loading of D3M problems. `--problem` command line argument to reference runtime + can now be a path or URI to a problem description. + [#276](https://gitlab.com/datadrivendiscovery/d3m/issues/276) + [!145](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/145) +* Data metadata is not validated anymore at every update, but only when explicitly + validated using the `check` method. This improves metadata performance. + [!144](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/144) + +### Other + +* Top-level runtime functions now also return `Result` (or new `MultiResult`) + objects instead of raising special `PipelineRunError` exception (which has been + removed) and instead of returning just pipeline run (which is available + inside `Result`). + [#297](https://gitlab.com/datadrivendiscovery/d3m/issues/297) + [!192](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/192) + **Backwards incompatible.** +* Metrics have been reimplemented to operate on whole predictions DataFrame. + [#304](https://gitlab.com/datadrivendiscovery/d3m/issues/304) + [#311](https://gitlab.com/datadrivendiscovery/d3m/issues/311) + [!171](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/171) + **Backwards incompatible.** +* Pipeline run implementation has been refactored to be in a single class to + facilitate easier subclassing. + [#255](https://gitlab.com/datadrivendiscovery/d3m/issues/255) + [#305](https://gitlab.com/datadrivendiscovery/d3m/issues/305) + [!164](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/164) +* Added new semantic types: + * `https://metadata.datadrivendiscovery.org/types/PrimaryMultiKey` + * `https://metadata.datadrivendiscovery.org/types/BoundingPolygon` + * `https://metadata.datadrivendiscovery.org/types/UnknownType` +* Removed semantic types: + * `https://metadata.datadrivendiscovery.org/types/BoundingBox` + * `https://metadata.datadrivendiscovery.org/types/BoundingBoxXMin` + * `https://metadata.datadrivendiscovery.org/types/BoundingBoxYMin` + * `https://metadata.datadrivendiscovery.org/types/BoundingBoxXMax` + * `https://metadata.datadrivendiscovery.org/types/BoundingBoxYMax` + + **Backwards incompatible.** +* Added to `primitive_family`: + * `SEMISUPERVISED_CLASSIFICATION` + * `SEMISUPERVISED_REGRESSION` + * `VERTEX_CLASSIFICATION` +* Added to `task_type`: + * `SEMISUPERVISED_CLASSIFICATION` + * `SEMISUPERVISED_REGRESSION` + * `VERTEX_CLASSIFICATION` +* Added to `performance_metric`: + * `HAMMING_LOSS` +* Removed from `performance_metric`: + * `ROOT_MEAN_SQUARED_ERROR_AVG` + + **Backwards incompatible.** +* Added `https://metadata.datadrivendiscovery.org/types/GPUResourcesUseParameter` and + `https://metadata.datadrivendiscovery.org/types/CPUResourcesUseParameter` semantic types for + primitive hyper-parameters which control the use of GPUs and CPUs (cores), respectively. + You can use these semantic types to mark which hyper-parameter defines a range of how many + GPUs or CPUs (cores), respectively, a primitive can and should use. + [#39](https://gitlab.com/datadrivendiscovery/d3m/issues/39) + [!177](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/177) +* Added `get_hyperparams` and `get_volumes` helper methods to `PrimitiveMetadata` + so that it is easier to obtain hyper-parameters definitions class of a primitive. + [#163](https://gitlab.com/datadrivendiscovery/d3m/issues/163) + [!175](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/175) +* Pipeline run schema now records the global seed used by the runtime to run the pipeline. + [!187](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/187) +* Core package scores output now includes also a random seed column. + [#299](https://gitlab.com/datadrivendiscovery/d3m/issues/299) + [!185](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/185) +* Metrics in core packages now take as input whole predictions DataFrame + objects and compute scores over them. So `applicability_to_targets` metric + method has been removed, and also code which handles the list of target + columns metric used to compute the score. This is not needed anymore + because now all columns are always used by all metrics. Moreover, + corresponding `dataset_id` and `targets` fields have been removed from + pipeline run schema. +* Core package now requires pip 19 or later to be installed. + `--process-dependency-links` argument when installing the package is not needed + nor supported anymore. + Primitives should not require use of `--process-dependency-links` to install + them either. Instead use link dependencies as described in + [PEP 508](https://www.python.org/dev/peps/pep-0508/). + [#285](https://gitlab.com/datadrivendiscovery/d3m/issues/285) + [!176](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/176) + **Backwards incompatible.** +* `outputs` field in parsed problem description has been removed. + [#290](https://gitlab.com/datadrivendiscovery/d3m/issues/290) + [!174](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/174) + **Backwards incompatible.** +* `Hyperparameter`'s `value_to_json` and `value_from_json` methods have been + renamed to `value_to_json_structure` and `value_from_json_structure`, respectively. + [#122](https://gitlab.com/datadrivendiscovery/d3m/issues/122) + [#173](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/173) +* Moved utility functions from common primitives package to core package: + + * `copy_metadata` to `Metadata.copy_to` method + * `select_columns` to `DataFrame.select_columns` method + * `select_columns_metadata` to `DataMetadata.select_columns` method + * `list_columns_with_semantic_types` to `DataMetadata.list_columns_with_semantic_types` method + * `list_columns_with_structural_types` to `DataMetadata.list_columns_with_structural_types` method + * `remove_columns` to `DataFrame.remove_columns` method + * `remove_columns_metadata` to `DataMetadata.remove_columns` method + * `append_columns` to `DataFrame.append_columns` method + * `append_columns_metadata` to `DataMetadata.append_columns` method + * `insert_columns` to `DataFrame.insert_columns` method + * `insert_columns_metadata` to `DataMetadata.insert_columns` method + * `replace_columns` to `DataFrame.replace_columns` method + * `replace_columns_metadata` to `DataMetadata.replace_columns` method + * `get_index_columns` to `DataMetadata.get_index_columns` method + * `horizontal_concat` to `DataFrame.horizontal_concat` method + * `horizontal_concat_metadata` to `DataMetadata.horizontal_concat` method + * `get_columns_to_use` to `d3m.base.utils.get_columns_to_use` function + * `combine_columns` to `d3m.base.utils.combine_columns` function + * `combine_columns_metadata` to `d3m.base.utils.combine_columns_metadata` function + * `set_table_metadata` to `DataMetadata.set_table_metadata` method + * `get_column_index_from_column_name` to `DataMetadata.get_column_index_from_column_name` method + * `build_relation_graph` to `Dataset.get_relations_graph` method + * `get_tabular_resource` to `d3m.base.utils.get_tabular_resource` function + * `get_tabular_resource_metadata` to `d3m.base.utils.get_tabular_resource_metadata` function + * `cut_dataset` to `Dataset.select_rows` method + + [#148](https://gitlab.com/datadrivendiscovery/d3m/issues/148) + [!172](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/172) + +* Updated core dependencies. Some important packages are now at versions: + * `pyarrow`: 0.12.1 + + [!156](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/156) + +## v2019.2.18 + +### Bugfixes + +* JSON schema for problem descriptions has been fixed to allow loading + D3M problem descriptions with data augmentation fields. + [#284](https://gitlab.com/datadrivendiscovery/d3m/issues/284) + [!154](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/154) +* Utils now contains representers to encode numpy float and integer numbers + for YAML. Importing `utils` registers them. + [#275](https://gitlab.com/datadrivendiscovery/d3m/issues/275) + [!148](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/148) +* Made sure all JSON files are read with UTF-8 encoding, so that we do + not depend on the encoding of the environment. + [!150](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/150) + [!153](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/153) + +## v2019.2.12 + +### Enhancements + +* Runtime now makes sure that target columns are never marked as attributes. + [#265](https://gitlab.com/datadrivendiscovery/d3m/issues/265) + [!131](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/131) +* When using runtime CLI, pipeline run output is made even in the case of an + exception. Moreover, exception thrown from `Result.check_success` contains + associated pipeline runs in its `pipeline_runs` attribute. + [#245](https://gitlab.com/datadrivendiscovery/d3m/issues/245) + [!120](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/120) +* Made additional relaxations when reading D3M datasets and problem descriptions + to not require required fields which have defaults. + [!128](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/128) +* When loading D3M datasets and problem descriptions, package now just warns + if they have an unsupported schema version and continues to load them. + [#247](https://gitlab.com/datadrivendiscovery/d3m/issues/247) + [!119](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/119) +* Added to `primitive_family`: + + * `NATURAL_LANGUAGE_PROCESSING` + + [!125](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/125) + +### Bugfixes + +* Fixed an unexpected exception when running a pipeline using reference + runtime but not requesting to return output values. + [#260](https://gitlab.com/datadrivendiscovery/d3m/issues/260) + [!127](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/127) +* Fixed infinite recursion loop which happened if Python logging was + configured inside primitive's method call. Moreover, recording of + logging records for pipeline run changed so that it does not modify + the record itself while recording it. + [#250](https://gitlab.com/datadrivendiscovery/d3m/issues/250) + [#123](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/123) +* Correctly populate `volumes` primitive constructor argument. + Before it was not really possible to use primitive static files with + reference runtime. + [!132](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/132) +* Fixed runtime/pipeline run configuration through environment variables. + Now it reads them without throwing an exception. + [#274](https://gitlab.com/datadrivendiscovery/d3m/issues/274) + [!118](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/118) + [!137](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/137) + +## v2019.1.21 + +* Some enumeration classes were moved and renamed: + * `d3m.metadata.pipeline.ArgumentType` to `d3m.metadata.base.ArgumentType` + * `d3m.metadata.pipeline.PipelineContext` to `d3m.metadata.base.Context` + * `d3m.metadata.pipeline.PipelineStep` to `d3m.metadata.base.PipelineStepType` + + **Backwards incompatible.** + +* Added `pipeline_run.json` JSON schema which describes the results of running a + pipeline as described by the `pipeline.json` JSON schema. Also implemented + a reference pipeline run output for reference runtime. + [#165](https://gitlab.com/datadrivendiscovery/d3m/issues/165) + [!59](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/59) +* When computing primitive digests, primitive's ID is included in the + hash so that digest is not the same for all primitives from the same + package. + [#154](https://gitlab.com/datadrivendiscovery/d3m/issues/154) +* When datasets are loaded, digest of their metadata and data can be + computed. To control when this is done, `compute_digest` argument + to `Dataset.load` can now take the following `ComputeDigest` + enumeration values: `ALWAYS`, `ONLY_IF_MISSING` (default), and `NEVER`. + [!75](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/75) +* Added `digest` field to pipeline descriptions. Digest is computed based + on the pipeline document and it helps differentiate between pipelines + with same `id`. When loading a pipeline, if there + is a digest mismatch a warning is issued. You can use + `strict_digest` argument to request an exception instead. + [#190](https://gitlab.com/datadrivendiscovery/d3m/issues/190) + [!75](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/75) +* Added `digest` field to problem description metadata. + This `digest` field is computed based on the problem description document + and it helps differentiate between problem descriptions with same `id`. + [#190](https://gitlab.com/datadrivendiscovery/d3m/issues/190) + [!75](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/75) +* Moved `id`, `version`, `name`, `other_names`, and `description` fields + in problem schema to top-level of the problem description. Moreover, made + `id` required. This aligns it more with the structure of other descriptions we have. + [!75](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/75) + **Backwards incompatible.** +* Pipelines can now provide multiple inputs to the same primitive argument. + In such case runtime wraps those inputs into a `List` container type, and then + passes the list to the primitive. + [#200](https://gitlab.com/datadrivendiscovery/d3m/issues/200) + [!112](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/112) +* Primitives now have a method `fit_multi_produce` which primitive author can + override to implement an optimized version of both fitting and producing a primitive on same data. + The default implementation just calls `set_training_data`, `fit` and produce methods. + If your primitive has non-standard additional arguments in its `produce` method(s) then you + will have to implement `fit_multi_produce` method to accept those additional arguments + as well, similarly to how you have had to do for `multi_produce`. + [#117](https://gitlab.com/datadrivendiscovery/d3m/issues/117) + [!110](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/110) + **Could be backwards incompatible.** +* `source`, `timestamp`, and `check` arguments to all metadata functions and container types' + constructors have been deprecated. You do not have to and should not be providing them anymore. + [#171](https://gitlab.com/datadrivendiscovery/d3m/issues/171) + [#172](https://gitlab.com/datadrivendiscovery/d3m/issues/172) + [#173](https://gitlab.com/datadrivendiscovery/d3m/issues/173) + [!108](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/108) + [!109](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/109) +* Primitive's constructor is not run anymore during importing of primitive's class + which allows one to use constructor to load things and do any resource + allocation/reservation. Constructor is now the preferred place to do so. + [#158](https://gitlab.com/datadrivendiscovery/d3m/issues/158) + [!107](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/107) +* `foreign_key` metadata has been extended with `RESOURCE` type which allows + referencing another resource in the same dataset. + [#221](https://gitlab.com/datadrivendiscovery/d3m/issues/221) + [!105](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/105) +* Updated supported D3M dataset and problem schema both to version 3.2.0. + Problem description parsing supports data augmentation metadata. + A new approach for LUPI datasets and problems is now supported, + including runtime support. + Moreover, if dataset's resource name is `learningData`, it is marked as a + dataset entry point. + [#229](https://gitlab.com/datadrivendiscovery/d3m/issues/229) + [#225](https://gitlab.com/datadrivendiscovery/d3m/issues/225) + [#226](https://gitlab.com/datadrivendiscovery/d3m/issues/226) + [!97](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/97) +* Added support for "raw" datasets. + [#217](https://gitlab.com/datadrivendiscovery/d3m/issues/217) + [!94](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/94) +* A warning is issued if a primitive does not provide a description through + its docstring. + [#167](https://gitlab.com/datadrivendiscovery/d3m/issues/167) + [!101](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/101) +* A warning is now issued if an installable primitive is lacking contact or bug + tracker URI metadata. + [#178](https://gitlab.com/datadrivendiscovery/d3m/issues/178) + [!81](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/81) +* `Pipeline` class now has also `equals` and `hash` methods which can help + determining if two pipelines are equal in the sense of isomorphism. + [!53](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/53) +* `Pipeline` and pipeline steps classes now has `get_all_hyperparams` + method to return all hyper-parameters defined for a pipeline and steps. + [#222](https://gitlab.com/datadrivendiscovery/d3m/issues/222) + [!104](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/104) +* Implemented a check for primitive Python paths to assure that they adhere + to the new standard of all of them having to be in the form `d3m.primitives.primitive_family.primitive_name.kind` + (e.g., `d3m.primitives.classification.random_forest.SKLearn`). + Currently there is a warning if a primitive has a different Python path, + and after January 2019 it will be an error. + For `primitive_name` segment there is a [`primitive_names.py`](./d3m/metadata/primitive_names.py) + file containing a list of all allowed primitive names. + Everyone is encouraged to help currate this list and suggest improvements (merging, removals, additions) + of values in that list. Initial version was mostly automatically made from an existing list of + values used by current primitives. + [#3](https://gitlab.com/datadrivendiscovery/d3m/issues/3) + [!67](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/67) +* Added to semantic types: + * `https://metadata.datadrivendiscovery.org/types/TokenizableIntoNumericAndAlphaTokens` + * `https://metadata.datadrivendiscovery.org/types/TokenizableByPunctuation` + * `https://metadata.datadrivendiscovery.org/types/AmericanPhoneNumber` + * `https://metadata.datadrivendiscovery.org/types/UnspecifiedStructure` + * `http://schema.org/email` + * `http://schema.org/URL` + * `http://schema.org/address` + * `http://schema.org/State` + * `http://schema.org/City` + * `http://schema.org/Country` + * `http://schema.org/addressCountry` + * `http://schema.org/postalCode` + * `http://schema.org/latitude` + * `http://schema.org/longitude` + + [!62](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/62) + [!95](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/95) + [!94](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/94) + +* Updated core dependencies. Some important packages are now at versions: + * `scikit-learn`: 0.20.2 + * `numpy`: 1.15.4 + * `pandas`: 0.23.4 + * `networkx`: 2.2 + * `pyarrow`: 0.11.1 + + [#106](https://gitlab.com/datadrivendiscovery/d3m/issues/106) + [#175](https://gitlab.com/datadrivendiscovery/d3m/issues/175) + +* Added to `algorithm_types`: + * `IDENTITY_FUNCTION` + * `DATA_SPLITTING` + * `BREADTH_FIRST_SEARCH` +* Moved a major part of README to Sphinx documentation which is built + and available at [http://docs.datadrivendiscovery.org/](http://docs.datadrivendiscovery.org/). +* Added a `produce_methods` argument to `Primitive` hyper-parameter class + which allows one to limit matching primitives only to those providing all + of the listed produce methods. + [#124](https://gitlab.com/datadrivendiscovery/d3m/issues/124) + [!56](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/56) +* Fixed `sample_multiple` method of the `Hyperparameter` class. + [#157](https://gitlab.com/datadrivendiscovery/d3m/issues/157) + [!50](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/50) +* Fixed pickling of `Choice` hyper-parameter. + [!49](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/49) + [!51](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/51) +* Added `Constant` hyper-parameter class. + [#186](https://gitlab.com/datadrivendiscovery/d3m/issues/186) + [!90](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/90) +* Added `count` to aggregate values in metafeatures. + [!52](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/52) +* Clarified and generalized some metafeatures, mostly renamed so that it can be + used on attributes as well: + * `number_of_classes` to `number_distinct_values` + * `class_entropy` to `entropy_of_values` + * `majority_class_ratio` to `value_probabilities_aggregate.max` + * `minority_class_ratio` to `value_probabilities_aggregate.min` + * `majority_class_size` to `value_counts_aggregate.max` + * `minority_class_size` to `value_counts_aggregate.min` + * `class_probabilities` to `value_probabilities_aggregate` + * `target_values` to `values_aggregate` + * `means_of_attributes` to `mean_of_attributes` + * `standard_deviations_of_attributes` to `standard_deviation_of_attributes` + * `categorical_joint_entropy` to `joint_entropy_of_categorical_attributes` + * `numeric_joint_entropy` to `joint_entropy_of_numeric_attributes` + * `pearson_correlation_of_attributes` to `pearson_correlation_of_numeric_attributes` + * `spearman_correlation_of_attributes` to `spearman_correlation_of_numeric_attributes` + * `canonical_correlation` to `canonical_correlation_of_numeric_attributes` + + [!52](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/52) + +* Added metafeatures: + * `default_accuracy` + * `oner` + * `jrip` + * `naive_bayes_tree` + * `number_of_string_attributes` + * `ratio_of_string_attributes` + * `number_of_other_attributes` + * `ratio_of_other_attributes` + * `attribute_counts_by_structural_type` + * `attribute_ratios_by_structural_type` + * `attribute_counts_by_semantic_type` + * `attribute_ratios_by_semantic_type` + * `value_counts_aggregate` + * `number_distinct_values_of_discrete_attributes` + * `entropy_of_discrete_attributes` + * `joint_entropy_of_discrete_attributes` + * `joint_entropy_of_attributes` + * `mutual_information_of_discrete_attributes` + * `equivalent_number_of_discrete_attributes` + * `discrete_noise_to_signal_ratio` + + [!21](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/21) + [!52](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/52) + +* Added special handling when reading scoring D3M datasets (those with true targets in a separate + file `targets.csv`). When such dataset is detected, the values from the separate file are now + merged into the dataset, and its ID is changed to finish with `SCORE` suffix. Similarly, an + ID of a scoring problem description gets its suffix changed to `SCORE`. + [#176](https://gitlab.com/datadrivendiscovery/d3m/issues/176) +* Organized semantic types and add to some of them parent semantic types to organize/structure + them better. New parent semantic types added: `https://metadata.datadrivendiscovery.org/types/ColumnRole`, + `https://metadata.datadrivendiscovery.org/types/DimensionType`, `https://metadata.datadrivendiscovery.org/types/HyperParameter`. +* Fixed that `dateTime` column type is mapped to `http://schema.org/DateTime` semantic + type and not `https://metadata.datadrivendiscovery.org/types/Time`. + **Backwards incompatible.** +* Updated generated [site for metadata](https://metadata.datadrivendiscovery.org/) and + generate sites describing semantic types. + [#33](https://gitlab.com/datadrivendiscovery/d3m/issues/33) + [#114](https://gitlab.com/datadrivendiscovery/d3m/issues/114) + [!37](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/37) +* Optimized resolving of primitives in `Resolver` to not require loading of + all primitives when loading a pipeline, in the common case. + [#162](https://gitlab.com/datadrivendiscovery/d3m/issues/162) + [!38](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/38) +* Added `NotFoundError`, `AlreadyExistsError`, and `PermissionDeniedError` + exceptions to `d3m.exceptions`. +* `Pipeline`'s `to_json_structure`, `to_json`, and `to_yaml` now have `nest_subpipelines` + argument which allows conversion with nested sub-pipelines instead of them + being only referenced. +* Made sure that Arrow serialization of metadata does not pickle also linked + values (`for_value`). +* Made sure enumerations are picklable. +* `PerformanceMetric` class now has `best_value` and `worst_value` which + return the range of possible values for the metric. Moreover, `normalize` + method normalizes the metric's value to a range between 0 and 1. +* Load D3M dataset qualities only after data is loaded. This fixes + lazy loading of datasets with qualities which was broken before. +* Added `load_all_primitives` argument to the default pipeline `Resolver` + which allows one to control loading of primitives outside of the resolver. +* Added `primitives_blacklist` argument to the default pipeline `Resolver` + which allows one to specify a collection of primitive path prefixes to not + (try to) load. +* Fixed return value of the `fit` method in `TransformerPrimitiveBase`. + It now correctly returns `CallResult` instead of `None`. +* Fixed a typo and renamed `get_primitive_hyparparams` to `get_primitive_hyperparams` + in `PrimitiveStep`. + **Backwards incompatible.** +* Additional methods were added to the `Pipeline` class and step classes, + to support runtime and easier manipulation of pipelines programmatically + (`get_free_hyperparams`, `get_input_data_references`, `has_placeholder`, + `replace_step`, `get_exposable_outputs`). +* Added reference implementation of the runtime. It is available + in the `d3m.runtime` module. This module also has an extensive + command line interface you can access through `python3 -m d3m.runtime`. + [#115](https://gitlab.com/datadrivendiscovery/d3m/issues/115) + [!57](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/57) + [!72](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/72) +* `GeneratorPrimitiveBase` interface has been changed so that `produce` method + accepts a list of non-negative integers as an input instead of a list of `None` values. + This allows for batching and control by the caller which outputs to generate. + Previously outputs would depend on number of calls to `produce` and number of outputs + requested in each call. Now these integers serve as an index into the set of potential + outputs. + **Backwards incompatible.** +* We now try to preserve metadata log in default implementation of `can_accept`. +* Added `sample_rate` field to `dimension` metadata. +* `python3 -m d3m.index download` command now accepts `--prefix` argument to limit the + primitives for which static files are downloaded. Useful for testing. +* Added `check` argument to `DataMetadata`'s `update` and `remove` methods which allows + one to control if selector check against `for_value` should be done or not. When + it is known that selector is valid, not doing the check can speed up those methods. +* Defined metadata field `file_columns` which allows to store known columns metadata for + tables referenced from columns. This is now used by a D3M dataset reader to store known + columns metadata for collections of CSV files. Previously, this metadata was lost despite + being available in Lincoln Labs dataset metadata. + +## v2018.7.10 + +* Made sure that `OBJECT_DETECTION_AVERAGE_PRECISION` metric supports operation on + vectorized target column. + [#149](https://gitlab.com/datadrivendiscovery/d3m/issues/149) +* Files in D3M dataset collections are now listed recursively to support datasets + with files split into directories. + [#146](https://gitlab.com/datadrivendiscovery/d3m/issues/146) +* When parameter value for `Params` fails to type check, a name of the parameter is now + reported as well. + [#135](https://gitlab.com/datadrivendiscovery/d3m/issues/135) +* `python3 -m d3m.index` has now additional command `download` which downloads all static + files needed by available primitives. Those files are then exposed through `volumes` + constructor argument to primitives by TA2/runtime. Files are stored into an output + directory in a standard way where each volume is stored with file or directory name + based on its digest. + [#102](https://gitlab.com/datadrivendiscovery/d3m/issues/102) +* Fixed standard return type of `log_likelihoods`, `log_likelihood`, `losses`, and `loss` + primitive methods to support multi-target primitives. +* Clarified that `can_accept` receives primitive arguments and not just method arguments. +* Added `https://metadata.datadrivendiscovery.org/types/FilesCollection` for resources which are + file collections. Also moved the main semantic type of file collection's values to the column. +* Fixed conversion of a simple list to a DataFrame. +* Added `https://metadata.datadrivendiscovery.org/types/Confidence` semantic type for columns + representing confidence and `confidence_for` metadata which can help confidence column refer + to the target column for which it is confidence for. +* Fixed default `can_accept` implementation to return type unwrapped from `CallResult`. +* Fixed `DataMetadata.remove` to preserve `for_value` value (and allow it to be set through the call). +* Fixed a case where automatically generated metadata overrode explicitly set existing metadata. + [!25](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/25) +* Fixed `query_with_exceptions` metadata method to correctly return exceptions for + deeper selectors. +* Added to `primitive_family`: + * `SCHEMA_DISCOVERY` + * `DATA_AUGMENTATION` +* Added to `algorithm_types`: + * `HEURISTIC` + * `MARKOV_RANDOM_FIELD` + * `LEARNING_USING_PRIVILEGED_INFORMATION` + * `APPROXIMATE_DATA_AUGMENTATION` +* Added `PrimitiveNotFittedError`, `DimensionalityMismatchError`, and `MissingValueError` + exceptions to `d3m.exceptions`. + [!22](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/22) +* Fixed setting semantic types for boundary columns. + [#126](https://gitlab.com/datadrivendiscovery/d3m/issues/126) [!23](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/23) +* Added `video/avi` media type to lists of known media types. +* Fixed a type check which prevented an additional primitive argument to be of `Union` type. +* Fixed erroneous removal of empty dicts (`{}`) from metadata when empty dicts were + explicitly stored in metadata. + [#118](https://gitlab.com/datadrivendiscovery/d3m/issues/118) +* Made sure that conflicting entry points are resolved in a deterministic way. +* Made sure primitive metadata's `python_path` matches the path under which + a primitive is registered under `d3m.primitives`. This also prevents + a primitive to be registered twice at different paths in the namespace. + [#4](https://gitlab.com/datadrivendiscovery/d3m/issues/4) +* Fixed a bug which prevented registration of primitives at deeper levels + (e.g., `d3m.primitives...`). + [#121](https://gitlab.com/datadrivendiscovery/d3m/issues/121) + +## v2018.6.5 + +* `Metadata` class got additional methods to manipulate metadata: + * `remove(selector)` removes metadata at `selector`. + * `query_with_exceptions(selector)` to return metadata for selectors which + have metadata which differs from that of `ALL_ELEMENTS`. + * `add_semantic_type`, `has_semantic_type`, `remove_semantic_type`, + `get_elements_with_semantic_type` to help with semantic types. + * `query_column`, `update_column`, `remove_column`, `get_columns_with_semantic_type` + to make it easier to work with tabular data. + + [#55](https://gitlab.com/datadrivendiscovery/d3m/issues/55) + [#78](https://gitlab.com/datadrivendiscovery/d3m/issues/78) + +* Container `List` now inherits from a regular Python `list` and not from `typing.List`. + It does not have anymore a type variable. Typing information is stored in `metadata` + anyway (`structural_type`). This simplifies type checking (and improves performance) + and fixes pickling issues. + **Backwards incompatible.** +* `Hyperparams` class' `defaults` method now accepts optional `path` argument which + allows one to fetch defaults from nested hyper-parameters. +* `Hyperparameters` class and its subclasses now have `get_default` method instead + of a property `default`. + **Backwards incompatible.** +* `Hyperparams` class got a new method `replace` which makes it easier to modify + hyper-parameter values. +* `Set` hyper-parameter can now accept also a hyper-parameters configuration as elements + which allows one to define a set of multiple hyper-parameters per each set element. + [#94](https://gitlab.com/datadrivendiscovery/d3m/issues/94) +* Pipeline's `check` method now checks structural types of inputs and outputs and assures + they match. + [!19](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/19) +* `Set` hyper-parameter now uses tuple of unique elements instead of set to represent the set. + This assures that the order of elements is preserved to help with reproducibility when + iterating over a set. + **Backwards incompatible.** + [#109](https://gitlab.com/datadrivendiscovery/d3m/issues/109) +* `Set` hyper-parameter can now be defined without `max_samples` argument to allow a set + without an upper limit on the number of elements. + `min_samples` and `max_samples` arguments to `Set` constructor have been switched as + a consequence, to have a more intuitive order. + Similar changes have been done to `sample_multiple` method of hyper-parameters. + **Backwards incompatible.** + [#110](https://gitlab.com/datadrivendiscovery/d3m/issues/110) +* Core dependencies have been upgraded: `numpy==1.14.3`. `pytypes` is now a released version. +* When converting a numpy array with more than 2 dimensions to a DataFrame, higher dimensions are + automatically converted to nested numpy arrays inside a DataFrame. + [#80](https://gitlab.com/datadrivendiscovery/d3m/issues/80) +* Metadata is now automatically preserved when converting between container types. + [#76](https://gitlab.com/datadrivendiscovery/d3m/issues/76) +* Basic metadata for data values is now automatically generated when using D3M container types. + Value is traversed over its structure and `structural_type` and `dimension` with its `length` + keys are populated. Some `semantic_types` are added in simple cases, and `dimension`'s + `name` as well. In some cases analysis of all data to generate metadata can take time, + so you might consider disabling automatic generation by setting `generate_metadata` + to `False` in container's constructor or `set_for_value` calls and then manually populating + necessary metadata. + [#35](https://gitlab.com/datadrivendiscovery/d3m/issues/35) + [!6](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/6) + [!11](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/11) +* When reading D3M datasets, `media_types` metadata now includes proper media types + for the column, and also media type for each particular row (file). +* D3M dataset and problem description parsing has been updated to 3.1.2 version: + * `Dataset` class now supports loading `edgeList` resources. + * `primitive_family` now includes `OBJECT_DETECTION`. + * `task_type` now includes `OBJECT_DETECTION`. + * `performance_metrics` now includes `PRECISION`, `RECALL`, `OBJECT_DETECTION_AVERAGE_PRECISION`. + * `targets` of a problem description now includes `clusters_number`. + * New metadata `boundary_for` can now describe for which other column + a column is a boundary for. + * Support for `realVector`, `json` and `geojson` column types. + * Support for `boundingBox` column role. + * New semantic types: + * `https://metadata.datadrivendiscovery.org/types/EdgeList` + * `https://metadata.datadrivendiscovery.org/types/FloatVector` + * `https://metadata.datadrivendiscovery.org/types/JSON` + * `https://metadata.datadrivendiscovery.org/types/GeoJSON` + * `https://metadata.datadrivendiscovery.org/types/Interval` + * `https://metadata.datadrivendiscovery.org/types/IntervalStart` + * `https://metadata.datadrivendiscovery.org/types/IntervalEnd` + * `https://metadata.datadrivendiscovery.org/types/BoundingBox` + * `https://metadata.datadrivendiscovery.org/types/BoundingBoxXMin` + * `https://metadata.datadrivendiscovery.org/types/BoundingBoxYMin` + * `https://metadata.datadrivendiscovery.org/types/BoundingBoxXMax` + * `https://metadata.datadrivendiscovery.org/types/BoundingBoxYMax` + + [#99](https://gitlab.com/datadrivendiscovery/d3m/issues/99) + [#107](https://gitlab.com/datadrivendiscovery/d3m/issues/107) + +* Unified the naming of attributes/features metafeatures to attributes. + **Backwards incompatible.** + [!13](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/13) +* Unified the naming of categorical/nominal metafeatures to categorical. + **Backwards incompatible.** + [!12](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/12) +* Added more metafeatures: + * `pca` + * `random_tree` + * `decision_stump` + * `naive_bayes` + * `linear_discriminant_analysis` + * `knn_1_neighbor` + * `c45_decision_tree` + * `rep_tree` + * `categorical_joint_entropy` + * `numeric_joint_entropy` + * `number_distinct_values_of_numeric_features` + * `class_probabilities` + * `number_of_features` + * `number_of_instances` + * `canonical_correlation` + * `entropy_of_categorical_features` + * `entropy_of_numeric_features` + * `equivalent_number_of_categorical_features` + * `equivalent_number_of_numeric_features` + * `mutual_information_of_categorical_features` + * `mutual_information_of_numeric_features` + * `categorical_noise_to_signal_ratio` + * `numeric_noise_to_signal_ratio` + + [!10](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/10) + [!14](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/14) + [!17](https://gitlab.com/datadrivendiscovery/d3m/merge_requests/17) + +* Added metafeatures for present values: + * `number_of_instances_with_present_values` + * `ratio_of_instances_with_present_values` + * `number_of_present_values` + * `ratio_of_present_values` + + [#84](https://gitlab.com/datadrivendiscovery/d3m/issues/84) + +* Implemented interface for saving datasets. + [#31](https://gitlab.com/datadrivendiscovery/d3m/issues/31) +* To remove a key in metadata, instead of using `None` value one should now use + special `NO_VALUE` value. + **Backwards incompatible.** +* `None` is now serialized to JSON as `null` instead of string `"None"`. + **Could be backwards incompatible.** +* Unified naming and behavior of methods dealing with JSON and JSON-related + data. Now across the package: + * `to_json_structure` returns a structure with values fully compatible with JSON and serializable with default JSON serializer + * `to_simple_structure` returns a structure similar to JSON, but with values left as Python values + * `to_json` returns serialized value as JSON string + + **Backwards incompatible.** + +* Hyper-parameters are now required to specify at least one + semantic type from: `https://metadata.datadrivendiscovery.org/types/TuningParameter`, + `https://metadata.datadrivendiscovery.org/types/ControlParameter`, + `https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter`, + `https://metadata.datadrivendiscovery.org/types/MetafeatureParameter`. + **Backwards incompatible.** +* Made type strings in primitive annotations deterministic. + [#93](https://gitlab.com/datadrivendiscovery/d3m/issues/93) +* Reimplemented primitives loading code to load primitives lazily. + [#74](https://gitlab.com/datadrivendiscovery/d3m/issues/74) +* `d3m.index` module now has new and modified functions: + * `search` now returns a list of Python paths of all potential + primitives defined through entry points (but does not load them + or checks if entry points are valid) + * `get_primitive` loads and returns a primitive given its Python path + * `get_primitive_by_id` returns a primitive given its ID, but a primitive + has to be loaded beforehand + * `get_loaded_primitives` returns a list of all currently loaded primitives + * `load_all` tries to load all primitives + * `register_primitive` now accepts full Python path instead of just suffix + + **Backwards incompatible.** + [#74](https://gitlab.com/datadrivendiscovery/d3m/issues/74) + +* Defined `model_features` primitive metadata to describe features supported + by an underlying model. This is useful to allow easy matching between + problem's subtypes and relevant primitives. + [#88](https://gitlab.com/datadrivendiscovery/d3m/issues/88) +* Made hyper-parameter space of an existing `Hyperparams` subclass immutable. + [#91](https://gitlab.com/datadrivendiscovery/d3m/issues/91) +* `d3m.index describe` command now accept `-s`/`--sort-keys` argument which + makes all keys in the JSON output sorted, making output JSON easier to + diff and compare. +* `can_accept` now gets a `hyperparams` object with hyper-parameters under + which to check a method call. This allows `can_accept` to return a result + based on control hyper-parameters. + **Backwards incompatible.** + [#81](https://gitlab.com/datadrivendiscovery/d3m/issues/81) +* Documented that all docstrings should be made according to + [numpy docstring format](https://numpydoc.readthedocs.io/en/latest/format.html). + [#85](https://gitlab.com/datadrivendiscovery/d3m/issues/85) +* Added to semantic types: + * `https://metadata.datadrivendiscovery.org/types/MissingData` + * `https://metadata.datadrivendiscovery.org/types/InvalidData` + * `https://metadata.datadrivendiscovery.org/types/RedactedTarget` + * `https://metadata.datadrivendiscovery.org/types/RedactedPrivilegedData` +* Added to `primitive_family`: + * `TIME_SERIES_EMBEDDING` +* Added to `algorithm_types`: + * `IVECTOR_EXTRACTION` +* Removed `SparseDataFrame` from standard container types because it is being + deprecated in Pandas. + **Backwards incompatible.** + [#95](https://gitlab.com/datadrivendiscovery/d3m/issues/95) +* Defined `other_names` metadata field for any other names a value might have. +* Optimized primitives loading time. + [#87](https://gitlab.com/datadrivendiscovery/d3m/issues/87) +* Made less pickling of values when hyper-parameter has `Union` structural type. + [#83](https://gitlab.com/datadrivendiscovery/d3m/issues/83) +* `DataMetadata.set_for_value` now first checks new value against the metadata, by default. + **Could be backwards incompatible.** +* Added `NO_NESTED_VALUES` primitive precondition and effect. + This allows primitive to specify if it cannot handle values where a container value + contains nested other values with dimensions. + +## v2018.4.18 + +* Added `pipeline.json` JSON schema to this package. Made `problem.json` JSON schema + describing parsed problem description's schema. There is also a `d3m.metadata.pipeline` + parser for pipelines in this schema and Python object to represent a pipeline. + [#53](https://gitlab.com/datadrivendiscovery/d3m/issues/53) +* Updated README to make it explicit that for tabular data the first dimension + is always rows and the second always columns, even in the case of a DataFrame + container type. + [#54](https://gitlab.com/datadrivendiscovery/d3m/issues/54) +* Made `Dataset` container type return Pandas `DataFrame` instead of numpy `ndarray` + and in generaly suggest to use Pandas `DataFrame` as a default container type. + **Backwards incompatible.** + [#49](https://gitlab.com/datadrivendiscovery/d3m/issues/49) +* Added `UniformBool` hyper-parameter class. +* Renamed `FeaturizationPrimitiveBase` to `FeaturizationLearnerPrimitiveBase`. + **Backwards incompatible.** +* Defined `ClusteringTransformerPrimitiveBase` and renamed `ClusteringPrimitiveBase` + to `ClusteringLearnerPrimitiveBase`. + **Backwards incompatible.** + [#20](https://gitlab.com/datadrivendiscovery/d3m/issues/20) +* Added `inputs_across_samples` decorator to mark which method arguments + are inputs which compute across samples. + [#19](https://gitlab.com/datadrivendiscovery/d3m/issues/19) +* Converted `SingletonOutputMixin` to a `singleton` decorator. This allows + each produce method separately to be marked as a singleton produce method. + **Backwards incompatible.** + [#17](https://gitlab.com/datadrivendiscovery/d3m/issues/17) +* `can_accept` can also raise an exception with information why it cannot accept. + [#13](https://gitlab.com/datadrivendiscovery/d3m/issues/13) +* Added `Primitive` hyper-parameter to describe a primitive or primitives. + Additionally, documented in docstrings better how to define hyper-parameters which + use primitives for their values and how should such primitives-as-values be passed + to primitives as their hyper-parameters. + [#51](https://gitlab.com/datadrivendiscovery/d3m/issues/51) +* Hyper-parameter values can now be converted to and from JSON-compatible structure + using `values_to_json` and `values_from_json` methods. Non-primitive values + are pickled and stored as base64 strings. + [#67](https://gitlab.com/datadrivendiscovery/d3m/issues/67) +* Added `Choice` hyper-parameter which allows one to define + combination of hyper-parameters which should exists together. + [#28](https://gitlab.com/datadrivendiscovery/d3m/issues/28) +* Added `Set` hyper-parameter which samples multiple times another hyper-parameter. + [#52](https://gitlab.com/datadrivendiscovery/d3m/issues/52) +* Added `https://metadata.datadrivendiscovery.org/types/MetafeatureParameter` + semantic type for hyper-parameters which control which meta-features are + computed by the primitive. + [#41](https://gitlab.com/datadrivendiscovery/d3m/issues/41) +* Added `supported_media_types` primitive metadata to describe + which media types a primitive knows how to manipulate. + [#68](https://gitlab.com/datadrivendiscovery/d3m/issues/68) +* Renamed metadata property `mime_types` to `media_types`. + **Backwards incompatible.** +* Made pyarrow dependency a package extra. You can depend on it using + `d3m[arrow]`. + [#66](https://gitlab.com/datadrivendiscovery/d3m/issues/66) +* Added `multi_produce` method to primitive interface which allows primitives + to optimize calls to multiple produce methods they might have. + [#21](https://gitlab.com/datadrivendiscovery/d3m/issues/21) +* Added `d3m.utils.redirect_to_logging` context manager which can help + redirect primitive's output to stdout and stderr to primitive's logger. + [#65](https://gitlab.com/datadrivendiscovery/d3m/issues/65) +* Primitives can now have a dependency on static files and directories. + One can use `FILE` and `TGZ` entries in primitive's `installation` + metadata to ask the caller to provide paths those files and/or + extracted directories through new `volumes` constructor argument. + [#18](https://gitlab.com/datadrivendiscovery/d3m/issues/18) +* Core dependencies have been upgraded: `numpy==1.14.2`, `networkx==2.1`. +* LUPI quality in D3M datasets is now parsed into + `https://metadata.datadrivendiscovery.org/types/SuggestedPrivilegedData` + semantic type for a column. + [#61](https://gitlab.com/datadrivendiscovery/d3m/issues/61) +* Support for primitives using Docker containers has been put on hold. + We are keeping a way to pass information about running containers to a + primitive and defining dependent Docker images in metadata, but currently + it is not expected that any runtime running primitives will run + Docker containers for a primitive. + [#18](https://gitlab.com/datadrivendiscovery/d3m/issues/18) +* Primitives do not have to define all constructor arguments anymore. + This allows them to ignore arguments they do not use, e.g., + `docker_containers`. + On the other side, when creating an instance of a primitive, one + has now to check which arguments the constructor accepts, which is + available in primitive's metadata: + `primitive.metadata.query()['primitive_code'].get('instance_methods', {})['__init__']['arguments']`. + [#63](https://gitlab.com/datadrivendiscovery/d3m/issues/63) +* Information about running primitive's Docker container has changed + from just its address to a `DockerContainer` tuple containing both + the address and a map of all exposed ports. + At the same time, support for Docker has been put on hold so you + do not really have to upgrade for this change anything and can simply + remove the `docker_containers` argument from primitive's constructor. + **Backwards incompatible.** + [#14](https://gitlab.com/datadrivendiscovery/d3m/issues/14) +* Multiple exception classes have been defined in `d3m.exceptions` + module and are now in use. This allows easier and more precise + handling of exceptions. + [#12](https://gitlab.com/datadrivendiscovery/d3m/issues/12) +* Fixed inheritance of `Hyperparams` class. + [#44](https://gitlab.com/datadrivendiscovery/d3m/issues/44) +* Each primitive's class now automatically gets an instance of + [Python's logging](https://docs.python.org/3/library/logging.html) + logger stored into its ``logger`` class attribute. The instance is made + under the name of primitive's ``python_path`` metadata value. Primitives + can use this logger to log information at various levels (debug, warning, + error) and even associate extra data with log record using the ``extra`` + argument to the logger calls. + [#10](https://gitlab.com/datadrivendiscovery/d3m/issues/10) +* Made sure container data types can be serialized with Arrow/Plasma + while retaining their metadata. + [#29](https://gitlab.com/datadrivendiscovery/d3m/issues/29) +* `Scores` in `GradientCompositionalityMixin` replaced with `Gradients`. + `Scores` only makes sense in a probabilistic context. +* Renamed `TIMESERIES_CLASSIFICATION`, `TIMESERIES_FORECASTING`, and + `TIMESERIES_SEGMENTATION` primitives families to + `TIME_SERIES_CLASSIFICATION`, `TIME_SERIES_FORECASTING`, and + `TIME_SERIES_SEGMENTATION`, respectively, to match naming + pattern used elsewhere. + Similarly, renamed `UNIFORM_TIMESERIES_SEGMENTATION` algorithm type + to `UNIFORM_TIME_SERIES_SEGMENTATION`. + Compound words using hyphens are separated, but hyphens for prefixes + are not separated. So "Time-series" and "Root-mean-squared error" + become `TIME_SERIES` and `ROOT_MEAN_SQUARED_ERROR` + but "Non-overlapping" and "Multi-class" are `NONOVERLAPPING` and `MULTICLASS`. + **Backwards incompatible.** +* Updated performance metrics to include `PRECISION_AT_TOP_K` metric. +* Added to problem description parsing support for additional metric + parameters and updated performance metric functions to use them. + [#42](https://gitlab.com/datadrivendiscovery/d3m/issues/42) +* Merged `d3m_metadata`, `primitive_interfaces` and `d3m` repositories + into `d3m` repository. This requires the following changes of + imports in existing code: + * `d3m_metadata` to `d3m.metadata` + * `primitive_interfaces` to `d3m.primitive_interfaces` + * `d3m_metadata.container` to `d3m.container` + * `d3m_metadata.metadata` to `d3m.metadata.base` + * `d3m_metadata.metadata.utils` to `d3m.utils` + * `d3m_metadata.metadata.types` to `d3m.types` + + **Backwards incompatible.** + [#11](https://gitlab.com/datadrivendiscovery/d3m/issues/11) + +* Fixed computation of sampled values for `LogUniform` hyper-parameter class. + [#47](https://gitlab.com/datadrivendiscovery/d3m/issues/47) +* When copying or slicing container values, metadata is now copied over + instead of cleared. This makes it easier to propagate metadata. + This also means one should make sure to update the metadata in the + new container value to reflect changes to the value itself. + **Could be backwards incompatible.** +* `DataMetadata` now has `set_for_value` method to make a copy of + metadata and set new `for_value` value. You can use this when you + made a new value and you want to copy over metadata, but you also + want this value to be associated with metadata. This is done by + default for container values. +* Metadata now includes SHA256 digest for primitives and datasets. + It is computed automatically during loading. This should allow one to + track exact version of primitive and datasets used. + `d3m.container.dataset.get_d3m_dataset_digest` is a reference + implementation of computing digest for D3M datasets. + You can set `compute_digest` to `False` to disable this. + You can set `strict_digest` to `True` to raise an exception instead + of a warning if computed digest does not match one in metadata. +* Datasets can be now loaded in "lazy" mode: only metadata is loaded + when creating a `Dataset` object. You can use `is_lazy` method to + check if dataset iz lazy and data has not yet been loaded. You can use + `load_lazy` to load data for a lazy object, making it non-lazy. +* There is now an utility metaclass `d3m.metadata.utils.AbstractMetaclass` + which makes classes which use it automatically inherit docstrings + for methods from the parent. Primitive base class and some other D3M + classes are now using it. +* `d3m.metadata.base.CONTAINER_SCHEMA_VERSION` and + `d3m.metadata.base.DATA_SCHEMA_VERSION` were fixed to point to the + correct URI. +* Many `data_metafeatures` properties in metadata schema had type + `numeric` which does not exist in JSON schema. They were fixed to + `number`. +* Added to a list of known semantic types: + `https://metadata.datadrivendiscovery.org/types/Target`, + `https://metadata.datadrivendiscovery.org/types/PredictedTarget`, + `https://metadata.datadrivendiscovery.org/types/TrueTarget`, + `https://metadata.datadrivendiscovery.org/types/Score`, + `https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint`, + `https://metadata.datadrivendiscovery.org/types/SuggestedPrivilegedData`, + `https://metadata.datadrivendiscovery.org/types/PrivilegedData`. +* Added to `algorithm_types`: `ARRAY_CONCATENATION`, `ARRAY_SLICING`, + `ROBUST_PRINCIPAL_COMPONENT_ANALYSIS`, `SUBSPACE_CLUSTERING`, + `SPECTRAL_CLUSTERING`, `RELATIONAL_ALGEBRA`, `MULTICLASS_CLASSIFICATION`, + `MULTILABEL_CLASSIFICATION`, `OVERLAPPING_CLUSTERING`, `SOFT_CLUSTERING`, + `STRICT_PARTITIONING_CLUSTERING`, `STRICT_PARTITIONING_CLUSTERING_WITH_OUTLIERS`, + `UNIVARIATE_REGRESSION`, `NONOVERLAPPING_COMMUNITY_DETECTION`, + `OVERLAPPING_COMMUNITY_DETECTION`. + +## v2018.1.26 + +* Test primitives updated to have `location_uris` metadata. +* Test primitives updated to have `#egg=` package URI suffix in metadata. +* Primitives (instances of their classes) can now be directly pickled + and unpickled. Internally it uses `get_params` and `set_params` in + default implementation. If you need to preserve additional state consider + extending `__getstate__` and `__setstate__` methods. +* Added `RandomPrimitive` test primitive. +* Bumped `numpy` dependency to `1.14` and `pandas` to `0.22`. +* Added `https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter` as a known URI + for `semantic_types` to help convey which hyper-parameters control the use of resources by the + primitive. + [#41](https://gitlab.com/datadrivendiscovery/metadata/issues/41) +* Fixed use of `numpy` values in `Params` and `Hyperparams`. + [#39](https://gitlab.com/datadrivendiscovery/metadata/issues/39) +* Added `upper_inclusive` argument to `UniformInt`, `Uniform`, and `LogUniform` classes + to signal that the upper bound is inclusive (default is exclusive). + [#38](https://gitlab.com/datadrivendiscovery/metadata/issues/38) +* Made `semantic_types` and `description` keyword-only arguments in hyper-parameter description classes. +* Made all enumeration metadata classes have their instances be equal to their string names. +* Made sure `Hyperparams` subclasses can be pickled and unpickled. +* Improved error messages during metadata validation. +* Documented common metadata for primitives and data in the README. +* Added standard deviation to aggregate metadata values possible. +* Added `NO_JAGGED_VALUES` to `preconditions` and `effects`. +* Added to `algorithm_types`: `AGGREGATE_FUNCTION`, `AUDIO_STREAM_MANIPULATION`, `BACKWARD_DIFFERENCE_CODING`, + `BAYESIAN_LINEAR_REGRESSION`, `CATEGORY_ENCODER`, `CROSS_VALIDATION`, `DISCRETIZATION`, `ENCODE_BINARY`, + `ENCODE_ORDINAL`, `FEATURE_SCALING`, `FORWARD_DIFFERENCE_CODING`, `FREQUENCY_TRANSFORM`, `GAUSSIAN_PROCESS`, + `HASHING`, `HELMERT_CODING`, `HOLDOUT`, `K_FOLD`, `LEAVE_ONE_OUT`, `MERSENNE_TWISTER`, `ORTHOGONAL_POLYNOMIAL_CODING`, + `PASSIVE_AGGRESSIVE`, `PROBABILISTIC_DATA_CLEANING`, `QUADRATIC_DISCRIMINANT_ANALYSIS`, `RECEIVER_OPERATING_CHARACTERISTIC`, + `RELATIONAL_DATA_MINING`, `REVERSE_HELMERT_CODING`, `SEMIDEFINITE_EMBEDDING`, `SIGNAL_ENERGY`, `SOFTMAX_FUNCTION`, + `SPRUCE`, `STOCHASTIC_GRADIENT_DESCENT`, `SUM_CODING`, `TRUNCATED_NORMAL_DISTRIBUTION`, `UNIFORM_DISTRIBUTION`. +* Added to `primitive_family`: `DATA_GENERATION`, `DATA_VALIDATION`, `DATA_WRANGLING`, `VIDEO_PROCESSING`. +* Added `NoneType` to the list of data types allowed inside container types. +* For `PIP` dependencies specified by a `package_uri` git URI, an `#egg=package_name` URI suffix is + now required. + +## v2018.1.5 + +* Made use of the PyPI package official. Documented a requirement for + `--process-dependency-links` argument during installation. + [#27](https://gitlab.com/datadrivendiscovery/metadata/issues/27) +* Arguments `learning_rate` and `weight_decay` in `GradientCompositionalityMixin` renamed to + `fine_tune_learning_rate` and `fine_tune_weight_decay`, respectively. + `learning_rate` is a common hyper-parameter name. + [#41](https://gitlab.com/datadrivendiscovery/primitive-interfaces/issues/41) +* Added `https://metadata.datadrivendiscovery.org/types/TuningParameter` and + `https://metadata.datadrivendiscovery.org/types/ControlParameter` as two known URIs for + `semantic_types` to help convey which hyper-parameters are true tuning parameters (should be + tuned during hyper-parameter optimization phase) and which are control parameters (should be + determined during pipeline construction phase and are part of the logic of the pipeline). +* Made `installation` metadata optional. This allows local-only primitives. + You can still register them into D3M namespace using `d3m.index.register_primitive`. +* Fixed serialization to JSON of hyper-parameters with `q` argument. +* Clarified that primitive's `PIP` dependency `package` has to be installed with `--process-dependency-link` argument + enabled, and `package_uri` with both `--process-dependency-link` and `--editable`, so that primitives can have access + to their git history to generate metadata. +* Only `git+http` and `git+https` URI schemes are allowed for git repository URIs for `package_uri`. +* Added to `algorithm_types`: `AUDIO_MIXING`, `CANONICAL_CORRELATION_ANALYSIS`, `DATA_PROFILING`, `DEEP_FEATURE_SYNTHESIS`, + `INFORMATION_ENTROPY`, `MFCC_FEATURE_EXTRACTION`, `MULTINOMIAL_NAIVE_BAYES`, `MUTUAL_INFORMATION`, `PARAMETRIC_TRAJECTORY_MODELING`, + `SIGNAL_DITHERING`, `SIGNAL_TO_NOISE_RATIO`, `STATISTICAL_MOMENT_ANALYSIS`, `UNIFORM_TIMESERIES_SEGMENTATION`. +* Added to `primitive_family`: `SIMILARITY_MODELING`, `TIMESERIES_CLASSIFICATION`, `TIMESERIES_SEGMENTATION`. + +## v2017.12.27 + +* Documented `produce` method for `ClusteringPrimitiveBase` and added + `ClusteringDistanceMatrixMixin`. + [#18](https://gitlab.com/datadrivendiscovery/primitive-interfaces/issues/18) +* Added `can_accept` class method to primitive base class and implemented its + default implementation. + [#20](https://gitlab.com/datadrivendiscovery/primitive-interfaces/issues/20) +* "Distance" primitives now accept an extra argument instead of a tuple. +* `Params` should now be a subclass of `d3m.metadata.params.Params`, which is a + specialized dict instead of a named tuple. +* Removed `Graph` class. There is no need for it anymore because we can identify + them by having input type a NetworkX graph and through metadata discovery. +* Added `timeout` and `iterations` arguments to more methods. +* Added `forward` and `backward` backprop methods to `GradientCompositionalityMixin` + to allow end-to-end backpropagation across diverse primitives. + [#26](https://gitlab.com/datadrivendiscovery/primitive-interfaces/issues/26) +* Added `log_likelihoods` method to `ProbabilisticCompositionalityMixin`. +* Constructor now accepts `docker_containers` argument with addresses of running + primitive's Docker containers. + [#25](https://gitlab.com/datadrivendiscovery/primitive-interfaces/issues/25) +* Removed `CallMetadata` and `get_call_metadata` and changed so that some methods + directly return new but similar `CallResult`. + [#27](https://gitlab.com/datadrivendiscovery/primitive-interfaces/issues/27) +* Documented how extra arguments to standard and extra methods can be defined. +* Documented that all arguments with the same name in all methods should have the + same type. Arguments are per primitive not per method. + [#29](https://gitlab.com/datadrivendiscovery/primitive-interfaces/issues/29) +* Specified how to define extra "produce" methods which have same semantics + as `produce` but different output types. + [#30](https://gitlab.com/datadrivendiscovery/primitive-interfaces/issues/30) +* Added `SingletonOutputMixin` to signal that primitive's output contains + only one element. + [#15](https://gitlab.com/datadrivendiscovery/primitive-interfaces/issues/15) +* Added `get_loss_primitive` to allow accessing to the loss primitive + being used. +* Moved `set_training_data` back to the base class. + This breaks Liskov substitution principle. + [#19](https://gitlab.com/datadrivendiscovery/primitive-interfaces/issues/19) +* Renamed `__metadata__` to `metadata` attribute. + [#23](https://gitlab.com/datadrivendiscovery/primitive-interfaces/issues/23) +* `set_random_seed` method has been removed and replaced with a + `random_seed` argument to the constructor, which is also exposed as an attribute. + [#16](https://gitlab.com/datadrivendiscovery/primitive-interfaces/issues/16) +* Primitives have now `hyperparams` attribute which returns a + hyper-parameters object passed to the constructor. + [#14](https://gitlab.com/datadrivendiscovery/primitive-interfaces/issues/14) +* `Params` and `Hyperparams` are now required to be pickable and copyable. + [#3](https://gitlab.com/datadrivendiscovery/primitive-interfaces/issues/3) +* Primitives are now parametrized by `Hyperparams` type variable as well. + Constructor now receives hyper-parameters as an instance as one argument + instead of multiple keyword arguments. + [#13](https://gitlab.com/datadrivendiscovery/primitive-interfaces/issues/13) +* `LossFunctionMixin`'s `get_loss_function` method now returns a value from + problem schema `Metric` enumeration. +* `LossFunctionMixin` has now a `loss` and `losses` methods which allows one + to ask a primitive to compute loss for a given set of inputs and outputs using + internal loss function the primitive is using. + [#17](https://gitlab.com/datadrivendiscovery/primitive-interfaces/issues/17) +* Added `Params` class. +* Removed `Graph` class in favor of NetworkX `Graph` class. +* Added `Metadata` class with subclasses and documented the use of selectors. +* Added `Hyperparams` class. +* Added `Dataset` class. +* "Sequences" have generally been renamed to "containers". Related code is also now under + `d3m.container` and not under `d3m.metadata.sequence` anymore. +* `__metadata__` attribute was renamed to `metadata`. +* Package renamed from `d3m_types` to `d3m_metadata`. +* Added schemas for metadata contexts. +* A problem schema parsing and Python enumerations added in + `d3m.metadata.problem` module. +* A standard set of container and base types have been defined. +* `d3m.index` command tool rewritten to support three commands: `search`, `discover`, + and `describe`. See details by running `python -m d3m.index -h`. +* Package now requires Python 3.6. +* Repository migrated to gitlab.com and made public. + +## v2017.10.10 + +* Made `d3m.index` module with API to register primitives into a `d3m.primitives` module + and searches over it. +* `d3m.index` is also a command-line tool to list available primitives and automatically + generate JSON annotations for primitives. +* Created `d3m.primitives` module which automatically populates itself with primitives + using Python entry points. diff --git a/d3m/HOW_TO_RELEASE.md b/d3m/HOW_TO_RELEASE.md new file mode 100644 index 0000000..e8bfcae --- /dev/null +++ b/d3m/HOW_TO_RELEASE.md @@ -0,0 +1,35 @@ +# How to release a new version + +*A cheat sheet.* + +* On `devel` branch: + * `git pull` to make sure everything is in sync with remote origin. + * Change a version in `d3m/__init__.py` to the new version, e.g., `2019.2.12`. + * Change `vNEXT` in `HISTORY.md` to the to-be-released version, with `v` prefix. + * Commit with message `Bumping version for release.` + * `git push` + * Wait for CI to run tests successfully. +* On `master` branch: + * `git pull` to make sure everything is in sync with remote origin. + * Merge `devel` into `master` branch: `git merge devel` + * `git push` + * Wait for CI to run tests successfully. + * Release a package to PyPi: + * `rm -rf dist/` + * `python setup.py sdist` + * `twine upload dist/*` + * Tag with version prefixed with `v`, e.g., for version `2017.9.20`: `git tag v2017.9.20` + * `git push` & `git push --tags` +* On `devel` branch: + * `git merge master` to make sure `devel` is always on top of `master`. + * Change a version in `d3m/__init__.py` to `devel`. + * Add a new empty `vNEXT` version on top of `HISTORY.md`. + * Commit with message `Version bump for development.` + * `git push` +* After a release: + * Create a new [`core` and `primitives` Docker images](https://gitlab.com/datadrivendiscovery/images) for the release. + * Add new release to the [primitives index repository](https://gitlab.com/datadrivendiscovery/primitives/blob/master/HOW_TO_MANAGE.md). + +If there is a need for a patch version to fix a released version on the same day, +use `.postX` prefix, like `2017.9.20.post0`. If more than a day has passed, just +use the new day's version. diff --git a/d3m/LICENSE.txt b/d3m/LICENSE.txt new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/d3m/LICENSE.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/d3m/MANIFEST.in b/d3m/MANIFEST.in new file mode 100644 index 0000000..3e677d0 --- /dev/null +++ b/d3m/MANIFEST.in @@ -0,0 +1,2 @@ +include README.md +include LICENSE.txt diff --git a/d3m/README.md b/d3m/README.md new file mode 100644 index 0000000..6030203 --- /dev/null +++ b/d3m/README.md @@ -0,0 +1,56 @@ +# Common code for D3M project + +This package provides a core package for D3M project with common code available. +It contains standard interfaces, reference implementations, and utility implementations. + +## Installation + +This package works with Python 3.6 and pip 19+. You need to have the following packages installed on the system (for Debian/Ubuntu): + +* `libssl-dev` +* `libcurl4-openssl-dev` +* `libyaml-dev` + +You can install latest stable version from [PyPI](https://pypi.org/): + +``` +$ pip3 install d3m +``` + +To install latest development version: + +``` +$ pip3 install -e git+https://gitlab.com/datadrivendiscovery/d3m.git@devel#egg=d3m +``` + +When cloning a repository, clone it recursively to get also git submodules: + +``` +$ git clone --recursive https://gitlab.com/datadrivendiscovery/d3m.git +``` + +## Changelog + +See [HISTORY.md](./HISTORY.md) for summary of changes to this package. + +## Documentation + +Documentation for the package is available at [https://docs.datadrivendiscovery.org/](https://docs.datadrivendiscovery.org/). + +## Contributing + +See [CODE_STYLE.md](./CODE_STYLE.md) for our coding style and contribution guide. Please ensure any merge requests you open follow this guide. + +## Repository structure + +`master` branch contains latest stable release of the package. +`devel` branch is a staging branch for the next release. + +Releases are [tagged](https://gitlab.com/datadrivendiscovery/d3m/tags). + +## About Data Driven Discovery Program + +DARPA Data Driven Discovery (D3M) Program is researching ways to get machines to build +machine learning pipelines automatically. It is split into three layers: +TA1 (primitives), TA2 (systems which combine primitives automatically into pipelines +and executes them), and TA3 (end-users interfaces). diff --git a/d3m/d3m/__init__.py b/d3m/d3m/__init__.py new file mode 100644 index 0000000..23a8751 --- /dev/null +++ b/d3m/d3m/__init__.py @@ -0,0 +1,8 @@ +__version__ = '2020.5.18' +__description__ = 'Common code for D3M project' +__author__ = 'DARPA D3M Program' + + +from d3m import namespace + +namespace.setup() diff --git a/d3m/d3m/__main__.py b/d3m/d3m/__main__.py new file mode 100644 index 0000000..c1332ce --- /dev/null +++ b/d3m/d3m/__main__.py @@ -0,0 +1,6 @@ +import sys + +from d3m import cli + + +cli.main(sys.argv) diff --git a/d3m/d3m/base/__init__.py b/d3m/d3m/base/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/d3m/d3m/base/primitives.py b/d3m/d3m/base/primitives.py new file mode 100644 index 0000000..144eec8 --- /dev/null +++ b/d3m/d3m/base/primitives.py @@ -0,0 +1,451 @@ +import abc +import typing +import weakref + +import frozendict # type: ignore +import numpy # type: ignore +import pandas # type: ignore + +from d3m import container, exceptions, types +from d3m.base import utils as base_utils +from d3m.metadata import base as metadata_base, hyperparams, params +from d3m.primitive_interfaces import base, generator, transformer + +__all__ = ( + 'FileReaderPrimitiveBase', + 'DatasetSplitPrimitiveBase', + 'TabularSplitPrimitiveBase', +) + +FileReaderInputs = container.DataFrame +FileReaderOutputs = container.DataFrame + + +class FileReaderHyperparams(hyperparams.Hyperparams): + use_columns = hyperparams.Set( + elements=hyperparams.Hyperparameter[int](-1), + default=(), + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="A set of column indices to force primitive to operate on. If any specified column does not contain filenames for supported media types, it is skipped.", + ) + exclude_columns = hyperparams.Set( + elements=hyperparams.Hyperparameter[int](-1), + default=(), + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", + ) + return_result = hyperparams.Enumeration( + values=['append', 'replace', 'new'], + default='append', + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Should columns with read files be appended, should they replace original columns, or should only columns with read files be returned?", + ) + add_index_columns = hyperparams.UniformBool( + default=True, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", + ) + + +class FileReaderPrimitiveBase(transformer.TransformerPrimitiveBase[FileReaderInputs, FileReaderOutputs, FileReaderHyperparams]): + """ + A primitive base class for reading files referenced in columns. + + Primitives using this base class must implement: + + * ``_supported_media_types``: A sequence of supported media types such as ``audio/mpeg``, ``image/jpeg``, etc. + * ``_file_structural_type``: Structural type of the file contents after being read such as ``container.ndarray``, ``container.DataFrame``, etc. + * ``_file_semantic_types``: A sequence of semantic types to be applied to the produced column. + * ``metadata``: Primitive Metadata. + * ``_read_fileuri``: The function which describes how to load each file. This function must load one file at the time. + """ + + _supported_media_types: typing.Sequence[str] = () + _file_structural_type: type = None + # If any of these semantic types already exists on a column, then nothing is done. + # If all are missing, the first one is set. + _file_semantic_types: typing.Sequence[str] = () + + def __init__(self, *, hyperparams: FileReaderHyperparams) -> None: + super().__init__(hyperparams=hyperparams) + + # Because same file can be referenced multiple times in multiple rows, we maintain + # a cache of read files so that we do not have to read same files again and again. + self._cache: weakref.WeakValueDictionary[typing.Tuple[int, str], typing.Any] = weakref.WeakValueDictionary() + + def _can_use_column(self, inputs_metadata: metadata_base.DataMetadata, column_index: int) -> bool: + column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) + + if column_metadata['structural_type'] != str: + return False + + semantic_types = column_metadata.get('semantic_types', []) + media_types = set(column_metadata.get('media_types', [])) + + if 'https://metadata.datadrivendiscovery.org/types/FileName' in semantic_types and media_types <= set(self._supported_media_types): + return True + + return False + + def _get_columns(self, inputs_metadata: metadata_base.DataMetadata) -> typing.List[int]: + def can_use_column(column_index: int) -> bool: + return self._can_use_column(inputs_metadata, column_index) + + columns_to_use, columns_not_to_use = base_utils.get_columns_to_use(inputs_metadata, self.hyperparams['use_columns'], self.hyperparams['exclude_columns'], can_use_column) + + # We are OK if no columns ended up being read. + # "base_utils.combine_columns" will throw an error if it cannot work with this. + + if self.hyperparams['use_columns'] and columns_not_to_use: + self.logger.warning("Not all specified columns contain filenames for supported media types. Skipping columns: %(columns)s", { + 'columns': columns_not_to_use, + }) + + return columns_to_use + + def produce(self, *, inputs: FileReaderInputs, timeout: float = None, iterations: int = None) -> base.CallResult[FileReaderOutputs]: + columns_to_use = self._get_columns(inputs.metadata) + + output_columns = [self._produce_column(inputs, column_index) for column_index in columns_to_use] + + outputs = base_utils.combine_columns(inputs, columns_to_use, output_columns, return_result=self.hyperparams['return_result'], add_index_columns=self.hyperparams['add_index_columns']) + + if self.hyperparams['return_result'] == 'append': + outputs.metadata = self._reassign_boundaries(outputs.metadata, columns_to_use) + + return base.CallResult(outputs) + + @abc.abstractmethod + def _read_fileuri(self, metadata: frozendict.FrozenOrderedDict, fileuri: str) -> typing.Any: + pass + + def _read_filename(self, column_index: int, metadata: frozendict.FrozenOrderedDict, filename: str) -> typing.Any: + # TODO: Support handling multiple "location_base_uris". + # "location_base_uris" should be made so that we can just concat with the filename + # ("location_base_uris" end with "/"). + fileuri = metadata['location_base_uris'][0] + filename + + # We do not use the structure where we check if the key exists in the cache and if not set it and then + # return from the cache outside if clause because we are not sure garbage collection might not remove it + # before we get to return. So we directly ask for a reference and return it, or we obtain the file + # and populate the cache. + file = self._cache.get((column_index, fileuri), None) + if file is not None: + return file + + file = self._read_fileuri(metadata, fileuri) + + # We cache the file based on column index as well, because it could be that file is read differently + # based on column metadata, or that resulting metadata is different for a different column. + # We cache only if we can make a weakref. Many Python built-in types like "str" do not support them. + if type(file).__weakrefoffset__: + self._cache[(column_index, fileuri)] = file + + return file + + def _produce_column(self, inputs: FileReaderInputs, column_index: int) -> FileReaderOutputs: + read_files = [self._read_filename(column_index, inputs.metadata.query((row_index, column_index)), value) for row_index, value in enumerate(inputs.iloc[:, column_index])] + + column = container.DataFrame({inputs.columns[column_index]: read_files}, generate_metadata=False) + + column.metadata = self._produce_column_metadata(inputs.metadata, column_index, read_files) + column.metadata = column.metadata.generate(column, compact=True) + + return column + + def _produce_column_metadata( + self, inputs_metadata: metadata_base.DataMetadata, column_index: int, read_files: typing.Sequence[typing.Any], + ) -> metadata_base.DataMetadata: + column_metadata = inputs_metadata.select_columns([column_index]) + column_metadata = column_metadata.update_column(0, { + 'structural_type': self._file_structural_type, + # Clear metadata useful for filename columns. + 'location_base_uris': metadata_base.NO_VALUE, + 'media_types': metadata_base.NO_VALUE, + }) + + # It is not a filename anymore. + column_metadata = column_metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/FileName') + + # At least one semantic type from listed semantic types should be set. + semantic_types = column_metadata.query_column(0).get('semantic_types', []) + if not set(semantic_types) & set(self._file_semantic_types): + # Add the first one. + column_metadata = column_metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 0), self._file_semantic_types[0]) + + for row_index, file in enumerate(read_files): + # Copy metadata only if we have a container type. + if isinstance(file, types.Container): + column_metadata = file.metadata.copy_to(column_metadata, (), (row_index, 0)) + + column_metadata = column_metadata.compact(['name', 'structural_type', 'media_types', 'location_base_uris', 'semantic_types']) + + return column_metadata + + def _reassign_boundaries(self, inputs_metadata: metadata_base.DataMetadata, columns: typing.List[int]) -> metadata_base.DataMetadata: + """ + Moves metadata about boundaries from the filename column to image object column. + """ + + outputs_metadata = inputs_metadata + columns_length = inputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] + + for column_index in range(columns_length): + column_metadata = outputs_metadata.query_column(column_index) + + if 'boundary_for' not in column_metadata: + continue + + # TODO: Support also "column_name" boundary metadata. + if 'column_index' not in column_metadata['boundary_for']: + continue + + try: + i = columns.index(column_metadata['boundary_for']['column_index']) + except ValueError: + continue + + outputs_metadata = outputs_metadata.update_column(column_index, { + 'boundary_for': { + # We know that "columns" were appended at the end. + 'column_index': columns_length - len(columns) + i, + } + }) + + return outputs_metadata + + +DatasetSplitInputs = container.List +DatasetSplitOutputs = container.List + + +class DatasetSplitPrimitiveBase(generator.GeneratorPrimitiveBase[DatasetSplitOutputs, base.Params, base.Hyperparams]): + """ + A base class for primitives which fit on a ``Dataset`` object to produce splits of that + ``Dataset`` when producing. There are two produce methods: `produce` and `produce_score_data`. + They take as an input a list of non-negative integers which identify which ``Dataset`` + splits to return. + + This class is parameterized using only by two type variables, + ``Params`` and ``Hyperparams``. + """ + + @abc.abstractmethod + def produce(self, *, inputs: DatasetSplitInputs, timeout: float = None, iterations: int = None) -> base.CallResult[DatasetSplitOutputs]: + """ + For each input integer creates a ``Dataset`` split and produces the training ``Dataset`` object. + This ``Dataset`` object should then be used to fit (train) the pipeline. + """ + + @abc.abstractmethod + def produce_score_data(self, *, inputs: DatasetSplitInputs, timeout: float = None, iterations: int = None) -> base.CallResult[DatasetSplitOutputs]: + """ + For each input integer creates a ``Dataset`` split and produces the scoring ``Dataset`` object. + This ``Dataset`` object should then be used to test the pipeline and score the results. + + Output ``Dataset`` objects do not have targets redacted and are not directly suitable for testing. + """ + + @abc.abstractmethod + def set_training_data(self, *, dataset: container.Dataset) -> None: # type: ignore + """ + Sets training data of this primitive, the ``Dataset`` to split. + + Parameters + ---------- + dataset: + The dataset to split. + """ + + +class TabularSplitPrimitiveParams(params.Params): + dataset: typing.Optional[container.Dataset] + main_resource_id: typing.Optional[str] + splits: typing.Optional[typing.List[typing.Tuple[numpy.ndarray, numpy.ndarray]]] + graph: typing.Optional[typing.Dict[str, typing.List[typing.Tuple[str, bool, int, int, typing.Dict]]]] + + +# TODO: Make clear the assumption that both output container type (List) and output Datasets should have metadata. +# Redaction primitive expects that, while there is officially no reason for Datasets +# to really have metadata: metadata is stored available on the input container type, not +# values inside it. +class TabularSplitPrimitiveBase(DatasetSplitPrimitiveBase[TabularSplitPrimitiveParams, base.Hyperparams]): + """ + A primitive base class for splitting tabular datasets. + + Primitives using this base class must implement: + + * ``_get_splits``: The function which describes how to split the tabular dataset. + """ + + def __init__(self, *, hyperparams: base.Hyperparams, random_seed: int = 0) -> None: + super().__init__(hyperparams=hyperparams, random_seed=random_seed) + + # We need random seed multiple times. So we create our own random state we use everywhere. + self._random_state = numpy.random.RandomState(self.random_seed) + self._fitted: bool = False + self._dataset: container.Dataset = None + self._main_resource_id: str = None + self._splits: typing.List[typing.Tuple[numpy.ndarray, numpy.ndarray]] = None + self._graph: typing.Dict[str, typing.List[typing.Tuple[str, bool, int, int, typing.Dict]]] = None + + def produce(self, *, inputs: DatasetSplitInputs, timeout: float = None, iterations: int = None) -> base.CallResult[DatasetSplitOutputs]: + return self._produce(inputs, True) + + def produce_score_data(self, *, inputs: DatasetSplitInputs, timeout: float = None, iterations: int = None) -> base.CallResult[DatasetSplitOutputs]: + return self._produce(inputs, False) + + def set_training_data(self, *, dataset: container.Dataset) -> None: # type: ignore + main_resource_id, main_resource = base_utils.get_tabular_resource(dataset, None, has_hyperparameter=False) + + self._main_resource_id = main_resource_id + self._dataset = dataset + self._fitted = False + + def fit(self, *, timeout: float = None, iterations: int = None) -> base.CallResult[None]: + """ + This function computes everything in advance, including generating the relation graph. + """ + + if self._dataset is None: + raise exceptions.InvalidStateError('Missing training data.') + + if self._fitted: + return base.CallResult(None) + + targets, target_columns = self._get_target_columns(self._dataset, self._main_resource_id) + attributes = self._get_attribute_columns(self._dataset, self._main_resource_id, target_columns) + + # Get splits' indices. + self._splits = self._get_splits(attributes, targets, self._dataset, self._main_resource_id) + + # Graph is the adjacency representation for the relations graph. Make it not be a "defaultdict". + self._graph = dict(self._dataset.get_relations_graph()) + + self._fitted = True + + return base.CallResult(None) + + def fit_multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: DatasetSplitInputs, # type: ignore + dataset: container.Dataset, timeout: float = None, iterations: int = None) -> base.MultiCallResult: + return self._fit_multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs, dataset=dataset) # type: ignore + + @abc.abstractmethod + def _get_splits(self, attributes: pandas.DataFrame, targets: pandas.DataFrame, dataset: container.Dataset, main_resource_id: str) -> typing.List[typing.Tuple[numpy.ndarray, numpy.ndarray]]: + pass + + def _get_target_columns(self, dataset: container.Dataset, main_resource_id: str) -> typing.Tuple[pandas.DataFrame, typing.Sequence[int]]: + target_columns = dataset.metadata.list_columns_with_semantic_types(['https://metadata.datadrivendiscovery.org/types/TrueTarget'], at=(main_resource_id,)) + + # It is OK if there are no target columns. "_get_splits" should raise an exception + # if this is a problem for a given split logic. + + return dataset[main_resource_id].iloc[:, list(target_columns)], target_columns + + def _get_attribute_columns(self, dataset: container.Dataset, main_resource_id: str, target_columns: typing.Sequence[int]) -> pandas.DataFrame: + attribute_columns = dataset.metadata.list_columns_with_semantic_types(['https://metadata.datadrivendiscovery.org/types/Attribute'], at=(main_resource_id,)) + + if not attribute_columns: + # No attribute columns with semantic types, let's use all + # non-target columns as attributes then. + all_columns = list(range(dataset.metadata.query((main_resource_id, metadata_base.ALL_ELEMENTS,))['dimension']['length'])) + attribute_columns = [column_index for column_index in all_columns if column_index not in target_columns] + + if not attribute_columns: + raise ValueError("No attribute columns.") + + return dataset[main_resource_id].iloc[:, list(attribute_columns)] + + def _produce(self, inputs: DatasetSplitInputs, is_train: bool) -> base.CallResult[DatasetSplitOutputs]: + """ + This function splits the fitted Dataset. + + Parameters + ---------- + inputs: + A list of 0-based indices which specify which splits to be used as test split in output. + is_train: + Whether we are producing train or test data. + + Returns + ------- + Returns a list of Datasets. + """ + + if not self._fitted: + raise exceptions.PrimitiveNotFittedError("Primitive not fitted.") + + output_datasets = container.List(generate_metadata=True) + + for index in inputs: + train_indices, test_indices = self._splits[index] + + if is_train: + output_dataset = base_utils.sample_rows( + self._dataset, + self._main_resource_id, + set(train_indices), + self._graph, + delete_recursive=self.hyperparams.get('delete_recursive', False), + ) + else: + output_dataset = base_utils.sample_rows( + self._dataset, + self._main_resource_id, + set(test_indices), + self._graph, + delete_recursive=self.hyperparams.get('delete_recursive', False), + ) + + output_datasets.append(output_dataset) + + output_datasets.metadata = metadata_base.DataMetadata({ + 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, + 'structural_type': container.List, + 'dimension': { + 'length': len(output_datasets), + }, + }) + + # We update metadata based on metadata of each dataset. + # TODO: In the future this might be done automatically by generate_metadata. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/119 + for index, dataset in enumerate(output_datasets): + output_datasets.metadata = dataset.metadata.copy_to(output_datasets.metadata, (), (index,)) + + return base.CallResult(output_datasets) + + def get_params(self) -> TabularSplitPrimitiveParams: + if not self._fitted: + return TabularSplitPrimitiveParams( + dataset=None, + main_resource_id=None, + splits=None, + graph=None, + ) + + return TabularSplitPrimitiveParams( + dataset=self._dataset, + main_resource_id=self._main_resource_id, + splits=self._splits, + graph=self._graph, + ) + + def set_params(self, *, params: TabularSplitPrimitiveParams) -> None: + self._dataset = params['dataset'] + self._main_resource_id = params['main_resource_id'] + self._splits = params['splits'] + self._graph = params['graph'] + self._fitted = all(param is not None for param in params.values()) + + def __getstate__(self) -> dict: + state = super().__getstate__() + + state['random_state'] = self._random_state + + return state + + def __setstate__(self, state: dict) -> None: + super().__setstate__(state) + + self._random_state = state['random_state'] diff --git a/d3m/d3m/base/utils.py b/d3m/d3m/base/utils.py new file mode 100644 index 0000000..f17b782 --- /dev/null +++ b/d3m/d3m/base/utils.py @@ -0,0 +1,342 @@ +import collections +import copy +import logging +import typing + +from d3m import container, exceptions +from d3m.metadata import base as metadata_base + +logger = logging.getLogger(__name__) + + +def get_columns_to_use( + metadata: metadata_base.DataMetadata, use_columns: typing.Sequence[int], exclude_columns: typing.Sequence[int], + can_use_column: typing.Callable, +) -> typing.Tuple[typing.List[int], typing.List[int]]: + """ + A helper function which computes a list of columns to use and a list of columns to ignore + given ``use_columns``, ``exclude_columns``, and a ``can_use_column`` function which should + return ``True`` when column can be used. + """ + + all_columns = list(use_columns) + + # If "use_columns" is provided, this is our view of which columns exist. + if not all_columns: + # Otherwise, we start with all columns. + all_columns = list(range(metadata.query_field((metadata_base.ALL_ELEMENTS,), 'dimension')['length'])) + + # And remove those in "exclude_columns". + all_columns = [column_index for column_index in all_columns if column_index not in exclude_columns] + + # Now we create a list of columns for which "can_use_column" returns "True", + # but also a list of columns for which it does not. The latter can be used + # to determine if there is an error or warning. For example, when using "use_columns", + # ideally, "columns_not_to_use" should be empty or a warning should be made. + # Or, some primitives might require to operate on all columns, so "columns_not_to_use" + # is empty, an error should be raised. + columns_to_use = [] + columns_not_to_use = [] + for column_index in all_columns: + if can_use_column(column_index): + columns_to_use.append(column_index) + else: + columns_not_to_use.append(column_index) + + return columns_to_use, columns_not_to_use + + +def combine_columns( + inputs: container.DataFrame, column_indices: typing.Sequence[int], columns_list: typing.Sequence[container.DataFrame], *, + return_result: str, add_index_columns: bool, +) -> container.DataFrame: + """ + Method which appends existing columns, replaces them, or creates new result from them, based on + ``return_result`` argument, which can be ``append``, ``replace``, or ``new``. + + ``add_index_columns`` controls if when creating a new result, primary index columns should be added + if they are not already among columns. + + ``inputs`` is a DataFrame for which we are appending on replacing columns, or if we are creating new result, + from where a primary index column can be taken. + + ``column_indices`` controls which columns in ``inputs`` were used to create ``columns_list``, + and which columns should be replaced when replacing them. + + ``columns_list`` is a list of DataFrames representing all together new columns. The reason it is a list is + to make it easier to operate per-column when preparing ``columns_list`` and not have to concat them all + together unnecessarily. + + Top-level metadata in ``columns_list`` is ignored, except when creating new result. + In that case top-level metadata from the first element in the list is used. + + When ``column_indices`` columns are being replaced with ``columns_list``, existing metadata in ``column_indices`` + columns is not preserved but replaced with metadata in ``columns_list``. Ideally, metadata for ``columns_list`` + has been constructed by copying source metadata from ``column_indices`` columns and modifying it as + necessary to adapt it to new columns. But ``columns_list`` also can have completely new metadata, if this + is more reasonable, but it should be understood that in this case when replacing ``column_indices`` + columns, any custom additional metadata on those columns will be lost. + + ``column_indices`` and ``columns_list`` do not have to match in number of columns. Columns are first + replaced in order for matching indices and columns. If then there are more ``column_indices`` than + ``columns_list``, additional ``column_indices`` columns are removed. If there are more ``columns_list`` than + ``column_indices`` columns, then additional ``columns_list`` are inserted after the last replaced column. + + If ``column_indices`` is empty, then the replacing behavior is equivalent to appending. + """ + + if return_result == 'append': + outputs = inputs + for columns in columns_list: + outputs = outputs.append_columns(columns) + + elif return_result == 'replace': + if not column_indices: + return combine_columns(inputs, column_indices, columns_list, return_result='append', add_index_columns=add_index_columns) + + # We copy here and disable copying inside "replace_columns" to copy only once. + # We have to copy because "replace_columns" is modifying data in-place. + outputs = copy.copy(inputs) + + columns_replaced = 0 + for columns in columns_list: + columns_length = columns.shape[1] + if columns_replaced < len(column_indices): + # It is OK if the slice of "column_indices" is shorter than "columns", Only those columns + # listed in the slice will be replaced and others appended after the last replaced column. + outputs = outputs.replace_columns(columns, column_indices[columns_replaced:columns_replaced + columns_length], copy=False) + else: + # We insert the rest of columns after the last columns we replaced. We know that "column_indices" + # is non-empty and that the last item of "column_indices" points ot the last column we replaced + # for those listed in "column_indices". We replaced more columns though, so we have to add the + # difference, and then add 1 to insert after the last column. + outputs = outputs.insert_columns(columns, column_indices[-1] + (columns_replaced - len(column_indices)) + 1) + columns_replaced += columns_length + + if columns_replaced < len(column_indices): + outputs = outputs.remove_columns(column_indices[columns_replaced:len(column_indices)]) + + elif return_result == 'new': + if not any(columns.shape[1] for columns in columns_list): + raise ValueError("No columns produced.") + + outputs = columns_list[0] + for columns in columns_list[1:]: + outputs = outputs.append_columns(columns) + + if add_index_columns: + inputs_index_columns = inputs.metadata.get_index_columns() + outputs_index_columns = outputs.metadata.get_index_columns() + + if inputs_index_columns and not outputs_index_columns: + # Add index columns at the beginning. + outputs = inputs.select_columns(inputs_index_columns).append_columns(outputs, use_right_metadata=True) + + else: + raise exceptions.InvalidArgumentValueError("\"return_result\" has an invalid value: {return_result}".format(return_result=return_result)) + + return outputs + + +def combine_columns_metadata( + inputs: metadata_base.DataMetadata, column_indices: typing.Sequence[int], columns_list: typing.Sequence[metadata_base.DataMetadata], *, + return_result: str, add_index_columns: bool, +) -> metadata_base.DataMetadata: + """ + Analogous to ``combine_columns`` but operates only on metadata. + """ + + if return_result == 'append': + outputs = inputs + for columns in columns_list: + outputs = outputs.append_columns(columns) + + elif return_result == 'replace': + if not column_indices: + return combine_columns_metadata(inputs, column_indices, columns_list, return_result='append', add_index_columns=add_index_columns) + + outputs = inputs + + columns_replaced = 0 + for columns in columns_list: + columns_length = columns.query_field((metadata_base.ALL_ELEMENTS,), 'dimension')['length'] + if columns_replaced < len(column_indices): + # It is OK if the slice of "column_indices" is shorter than "columns", Only those columns + # listed in the slice will be replaced and others appended after the last replaced column. + outputs = outputs.replace_columns(columns, column_indices[columns_replaced:columns_replaced + columns_length]) + else: + # We insert the rest of columns after the last columns we replaced. We know that "column_indices" + # is non-empty and that the last item of "column_indices" points ot the last column we replaced + # for those listed in "column_indices". We replaced more columns though, so we have to add the + # difference, and then add 1 to insert after the last column. + outputs = outputs.insert_columns(columns, column_indices[-1] + (columns_replaced - len(column_indices)) + 1) + columns_replaced += columns_length + + if columns_replaced < len(column_indices): + outputs = outputs.remove_columns(column_indices[columns_replaced:len(column_indices)]) + + elif return_result == 'new': + if not any(columns_metadata.query_field((metadata_base.ALL_ELEMENTS,), 'dimension')['length'] for columns_metadata in columns_list): + raise ValueError("No columns produced.") + + outputs = columns_list[0] + for columns in columns_list[1:]: + outputs = outputs.append_columns(columns) + + if add_index_columns: + inputs_index_columns = inputs.get_index_columns() + outputs_index_columns = outputs.get_index_columns() + + if inputs_index_columns and not outputs_index_columns: + # Add index columns at the beginning. + outputs = inputs.select_columns(inputs_index_columns).append_columns(outputs, use_right_metadata=True) + + else: + raise exceptions.InvalidArgumentValueError("\"return_result\" has an invalid value: {return_result}".format(return_result=return_result)) + + return outputs + + +def get_tabular_resource( + dataset: container.Dataset, resource_id: typing.Optional[str], *, + pick_entry_point: bool = True, pick_one: bool = True, has_hyperparameter: bool = True, +) -> typing.Tuple[str, container.DataFrame]: + if resource_id is None and pick_entry_point: + for dataset_resource_id in dataset.keys(): + if dataset.metadata.has_semantic_type((dataset_resource_id,), 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint'): + resource_id = dataset_resource_id + break + + if resource_id is None and pick_one: + tabular_resource_ids = [dataset_resource_id for dataset_resource_id, dataset_resource in dataset.items() if isinstance(dataset_resource, container.DataFrame)] + if len(tabular_resource_ids) == 1: + resource_id = tabular_resource_ids[0] + + if resource_id is None: + if has_hyperparameter: + if pick_entry_point and pick_one: + raise ValueError("A Dataset with multiple tabular resources without an entry point and no resource specified as a hyper-parameter.") + elif pick_entry_point: + raise ValueError("A Dataset without an entry point and no resource specified as a hyper-parameter.") + elif pick_one: + raise ValueError("A Dataset with multiple tabular resources and no resource specified as a hyper-parameter.") + else: + raise ValueError("No resource specified as a hyper-parameter.") + else: + if pick_entry_point and pick_one: + raise ValueError("A Dataset with multiple tabular resources without an entry point.") + elif pick_entry_point: + raise ValueError("A Dataset without an entry point.") + elif pick_one: + raise ValueError("A Dataset with multiple tabular resources.") + else: + raise ValueError("No resource specified.") + + else: + resource = dataset[resource_id] + + if not isinstance(resource, container.DataFrame): + raise TypeError("The Dataset resource '{resource_id}' is not a DataFrame, but '{type}'.".format( + resource_id=resource_id, + type=type(resource), + )) + + return resource_id, resource + + +def get_tabular_resource_metadata( + dataset: metadata_base.DataMetadata, resource_id: typing.Optional[metadata_base.SelectorSegment], *, + pick_entry_point: bool = True, pick_one: bool = True, +) -> metadata_base.SelectorSegment: + if resource_id is None and pick_entry_point: + # This can be also "ALL_ELEMENTS" and it will work out, but we prefer a direct resource ID, + # if available. So we reverse the list, because the first is "ALL_ELEMENTS" if it exists. + for dataset_resource_id in reversed(dataset.get_elements(())): + if dataset.has_semantic_type((dataset_resource_id,), 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint'): + resource_id = dataset_resource_id + break + + if resource_id is None and pick_one: + # This can be also "ALL_ELEMENTS" and it will work out, but we prefer a direct resource ID, + # if available. So we reverse the list, because the first is "ALL_ELEMENTS" if it exists. + tabular_resource_ids = [] + for dataset_resource_id in reversed(dataset.get_elements(())): + dataset_resource_type = dataset.query((dataset_resource_id,)).get('structural_type', None) + + if dataset_resource_type is None: + continue + + if issubclass(dataset_resource_type, container.DataFrame): + tabular_resource_ids.append(dataset_resource_id) + + if len(tabular_resource_ids) == 1: + resource_id = tabular_resource_ids[0] + + if resource_id is None: + if pick_entry_point and pick_one: + raise ValueError("A Dataset with multiple tabular resources without an entry point and no DataFrame resource specified as a hyper-parameter.") + elif pick_entry_point: + raise ValueError("A Dataset without an entry point and no DataFrame resource specified as a hyper-parameter.") + elif pick_one: + raise ValueError("A Dataset with multiple tabular resources and no DataFrame resource specified as a hyper-parameter.") + else: + raise ValueError("No DataFrame resource specified as a hyper-parameter.") + + else: + resource_type = dataset.query((resource_id,))['structural_type'] + + if not issubclass(resource_type, container.DataFrame): + raise TypeError("The Dataset resource '{resource_id}' is not a DataFrame, but '{type}'.".format( + resource_id=resource_id, + type=resource_type, + )) + + return resource_id + + +def sample_rows( + dataset: container.Dataset, main_resource_id: str, main_resource_indices_to_keep: typing.Set[int], + relations_graph: typing.Dict[str, typing.List[typing.Tuple[str, bool, int, int, typing.Dict]]], *, + delete_recursive: bool = False, +) -> container.Dataset: + # We store rows as sets, but later on we sort them when we select rows. + row_indices_to_keep_sets: typing.Dict[str, typing.Set[int]] = collections.defaultdict(set) + row_indices_to_keep_sets[main_resource_id] = main_resource_indices_to_keep + + # If "delete_recursive" is set to "False", we do not populate "row_indices_to_keep_sets" + # with other resources, making "select_rows" simply keep them. + if delete_recursive: + # We sort to be deterministic. + for main_resource_row_index in sorted(row_indices_to_keep_sets[main_resource_id]): + queue = [] + queue.append((main_resource_id, [main_resource_row_index])) + while queue: + current_resource_id, current_row_indices = queue.pop(0) + current_resource = dataset[current_resource_id] + + for edge_resource_id, edge_direction, edge_from_index, edge_to_index, custom_state in relations_graph[current_resource_id]: + # All rows from the main resource we want are already there. + # TODO: What to do if we get a reference to the row in the main resource which is not part of this sample? + # This means that probably the sample is invalid. We should not be generating such samples which do not + # preserve reference loops and their consistency. Otherwise it is not really possible to denormalize + # such Dataset properly: a reference is referencing a row in the main resource which does not exist. + if edge_resource_id == main_resource_id: + continue + + edge_resource = dataset[edge_resource_id] + + to_column_values = edge_resource.iloc[:, edge_to_index] + for from_column_value in current_resource.iloc[current_row_indices, edge_from_index]: + # We assume here that "index" corresponds to the default index with row indices. + rows_with_value = edge_resource.index[to_column_values == from_column_value] + # We sort to be deterministic. + new_rows_list = sorted(set(rows_with_value) - row_indices_to_keep_sets[edge_resource_id]) + row_indices_to_keep_sets[edge_resource_id].update(new_rows_list) + queue.append((edge_resource_id, new_rows_list)) + + # We sort indices to get deterministic outputs from sets (which do not have deterministic order). + # We also do not want to change the row order but keep the original row order. + # Sorting by row indices values assure that. + row_indices_to_keep = {resource_id: sorted(indices) for resource_id, indices in row_indices_to_keep_sets.items()} + + return dataset.select_rows(row_indices_to_keep) diff --git a/d3m/d3m/cli.py b/d3m/d3m/cli.py new file mode 100644 index 0000000..a6d43c9 --- /dev/null +++ b/d3m/d3m/cli.py @@ -0,0 +1,1172 @@ +import argparse +import logging +import typing + +from d3m import exceptions, index, runtime, utils, __version__ +from d3m.container import dataset as dataset_module +from d3m.metadata import base as metadata_base, pipeline as pipeline_module, pipeline_run, problem as problem_module + +logger = logging.getLogger(__name__) + + +def pipeline_run_handler( + arguments: argparse.Namespace, parser: argparse.ArgumentParser, +) -> None: + # Call a handler for the command. + arguments.pipeline_run_handler( + arguments, + ) + + +def pipeline_run_configure_parser(parser: argparse.ArgumentParser, *, skip_arguments: typing.Tuple = ()) -> None: + subparsers = parser.add_subparsers(dest='pipeline_run_command', title='commands') + subparsers.required = True # type: ignore + + validate_parser = subparsers.add_parser( + 'validate', help="validate pipeline runs", + description="Validate pipeline runs for use in metalearning database.", + ) + + if 'list' not in skip_arguments: + validate_parser.add_argument( + '-l', '--list', default=False, action='store_true', + help="print path of pipeline run being validated", + ) + if 'continue' not in skip_arguments: + validate_parser.add_argument( + '-c', '--continue', default=False, action='store_true', + help="continue after pipeline run validation error", + ) + if 'pipeline_runs' not in skip_arguments: + validate_parser.add_argument( + 'pipeline_runs', metavar='PIPELINE_RUN', nargs='+', + help="path to a pipeline run", + ) + validate_parser.set_defaults(pipeline_run_handler=pipeline_run.pipeline_run_handler) + + +def dataset_handler( + arguments: argparse.Namespace, parser: argparse.ArgumentParser, *, + dataset_resolver: typing.Callable = None, +) -> None: + # Call a handler for the command. + arguments.dataset_handler( + arguments, + dataset_resolver=dataset_resolver, + ) + + +def dataset_configure_parser(parser: argparse.ArgumentParser, *, skip_arguments: typing.Tuple = ()) -> None: + subparsers = parser.add_subparsers(dest='dataset_command', title='commands') + subparsers.required = True # type: ignore + + describe_parser = subparsers.add_parser( + 'describe', help="generate JSON description of datasets", + description="Generates JSON descriptions of datasets.", + ) + convert_parser = subparsers.add_parser( + 'convert', help="convert datasets", + description="Converts one dataset to another.", + ) + validate_parser = subparsers.add_parser( + 'validate', help="validate datasets", + description="Validate dataset descriptions for use in metalearning database.", + ) + + if 'list' not in skip_arguments: + describe_parser.add_argument( + '-l', '--list', default=False, action='store_true', + help="print path or URI of dataset being described", + ) + if 'indent' not in skip_arguments: + describe_parser.add_argument( + '-i', '--indent', type=int, default=2, action='store', + help="indent JSON by this much, 0 disables indentation, default 2", + ) + if 'sort_keys' not in skip_arguments: + describe_parser.add_argument( + '-s', '--sort-keys', default=False, action='store_true', + help="sort keys in JSON" + ) + if 'print' not in skip_arguments: + describe_parser.add_argument( + '-p', '--print', default=False, action='store_true', + help="pretty print dataset contents instead of printing JSON description", + ) + if 'metadata' not in skip_arguments: + describe_parser.add_argument( + '-m', '--metadata', default=False, action='store_true', + help="pretty print dataset metadata instead of printing JSON description", + ) + if 'lazy' not in skip_arguments: + describe_parser.add_argument( + '-L', '--lazy', default=False, action='store_true', + help="load dataset lazily", + ) + if 'time' not in skip_arguments: + describe_parser.add_argument( + '-t', '--time', default=False, action='store_true', + help="time dataset loading instead of printing JSON description", + ) + if 'continue' not in skip_arguments: + describe_parser.add_argument( + '-c', '--continue', default=False, action='store_true', + help="continue after dataset loading error", + ) + if 'output' not in skip_arguments: + describe_parser.add_argument( + '-o', '--output', type=utils.FileType('w', encoding='utf8'), default='-', action='store', + help="save output to a file, default stdout", + ) + if 'datasets' not in skip_arguments: + describe_parser.add_argument( + 'datasets', metavar='DATASET', nargs='*', + help="path or URI of a dataset", + ) + describe_parser.set_defaults(dataset_handler=dataset_module.describe_handler) + + if 'input_uri' not in skip_arguments: + convert_parser.add_argument( + '-i', '--input', dest='input_uri', + help="input path or URI of a dataset", + ) + if 'output_uri' not in skip_arguments: + convert_parser.add_argument( + '-o', '--output', dest='output_uri', + help="output path or URI of a dataset", + ) + if 'preserve_metadata' not in skip_arguments: + convert_parser.add_argument( + '--no-metadata', default=True, action='store_false', dest='preserve_metadata', + help="do not preserve metadata", + ) + convert_parser.set_defaults(dataset_handler=dataset_module.convert_handler) + + if 'list' not in skip_arguments: + validate_parser.add_argument( + '-l', '--list', default=False, action='store_true', + help="print path or URI of dataset being validated", + ) + if 'continue' not in skip_arguments: + validate_parser.add_argument( + '-c', '--continue', default=False, action='store_true', + help="continue after dataset validation error", + ) + if 'datasets' not in skip_arguments: + validate_parser.add_argument( + 'datasets', metavar='DATASET', nargs='+', + help="path to a dataset description", + ) + validate_parser.set_defaults(dataset_handler=pipeline_run.dataset_handler) + + +def problem_handler( + arguments: argparse.Namespace, parser: argparse.ArgumentParser, *, + problem_resolver: typing.Callable = None, +) -> None: + # Call a handler for the command. + arguments.problem_handler( + arguments, + problem_resolver=problem_resolver, + ) + + +def problem_configure_parser(parser: argparse.ArgumentParser, *, skip_arguments: typing.Tuple = ()) -> None: + subparsers = parser.add_subparsers(dest='problem_command', title='commands') + subparsers.required = True # type: ignore + + describe_parser = subparsers.add_parser( + 'describe', help="generate JSON description of problems", + description="Generates JSON descriptions of problems.", + ) + validate_parser = subparsers.add_parser( + 'validate', help="validate problems", + description="Validate problem descriptions for use in metalearning database.", + ) + + if 'list' not in skip_arguments: + describe_parser.add_argument( + '-l', '--list', default=False, action='store_true', + help="print path or URI of problem being described", + ) + if 'indent' not in skip_arguments: + describe_parser.add_argument( + '-i', '--indent', type=int, default=2, action='store', + help="indent JSON by this much, 0 disables indentation, default 2", + ) + if 'sort_keys' not in skip_arguments: + describe_parser.add_argument( + '-s', '--sort-keys', default=False, action='store_true', + help="sort keys in JSON" + ) + if 'print' not in skip_arguments: + describe_parser.add_argument( + '-p', '--print', default=False, action='store_true', + help="pretty print problem description instead of printing JSON", + ) + if 'continue' not in skip_arguments: + describe_parser.add_argument( + '-c', '--continue', default=False, action='store_true', + help="continue after problem parsing error", + ) + if 'output' not in skip_arguments: + describe_parser.add_argument( + '-o', '--output', type=utils.FileType('w', encoding='utf8'), default='-', action='store', + help="save output to a file, default stdout", + ) + if 'no_print' not in skip_arguments: + describe_parser.add_argument( + '--no-print', default=False, action='store_true', + help="do not print JSON", + ) + if 'problems' not in skip_arguments: + describe_parser.add_argument( + 'problems', metavar='PROBLEM', nargs='+', + help="path or URI to a problem description", + ) + describe_parser.set_defaults(problem_handler=problem_module.describe_handler) + + if 'list' not in skip_arguments: + validate_parser.add_argument( + '-l', '--list', default=False, action='store_true', + help="print path or URI of problem being validated", + ) + if 'continue' not in skip_arguments: + validate_parser.add_argument( + '-c', '--continue', default=False, action='store_true', + help="continue after problem validation error", + ) + if 'problems' not in skip_arguments: + validate_parser.add_argument( + 'problems', metavar='PROBLEM', nargs='+', + help="path to a problem description", + ) + validate_parser.set_defaults(problem_handler=pipeline_run.problem_handler) + + +def primitive_handler(arguments: argparse.Namespace, parser: argparse.ArgumentParser) -> None: + # Call a handler for the command. + arguments.primitive_handler(arguments) + + +def primitive_configure_parser(parser: argparse.ArgumentParser, *, skip_arguments: typing.Tuple = ()) -> None: + subparsers = parser.add_subparsers(dest='primitive_command', title='commands') + subparsers.required = True # type: ignore + + search_parser = subparsers.add_parser( + 'search', help="search locally available primitives", + description="Searches locally available primitives. Lists registered Python paths for primitives installed on the system.", + ) + discover_parser = subparsers.add_parser( + 'discover', help="discover primitives available on PyPi", + description="Discovers primitives available on PyPi. Lists package names containing D3M primitives on PyPi.", + ) + describe_parser = subparsers.add_parser( + 'describe', help="generate JSON description of primitives", + description="Generates JSON descriptions of primitives.", + ) + download_parser = subparsers.add_parser( + 'download', help="download files for primitives' volumes", + description="Downloads static files needed by primitives.", + ) + validate_parser = subparsers.add_parser( + 'validate', help="validate primitive descriptions", + description="Validate primitive descriptions for use in metalearning database.", + ) + + if 'prefix' not in skip_arguments: + search_parser.add_argument( + '-p', '--prefix', action='store', + help="primitive path prefix to limit search results to", + ) + search_parser.set_defaults(primitive_handler=index.search_handler) + + if 'index' not in skip_arguments: + discover_parser.add_argument( + '-i', '--index', default=index.DEFAULT_INDEX, action='store', + help=f"base URL of Python Package Index to use, default {index.DEFAULT_INDEX}", + ) + discover_parser.set_defaults(primitive_handler=index.discover_handler) + + if 'list' not in skip_arguments: + describe_parser.add_argument( + '-l', '--list', default=False, action='store_true', + help="print path or ID of primitive being described", + ) + if 'indent' not in skip_arguments: + describe_parser.add_argument( + '-i', '--indent', type=int, default=2, action='store', + help="indent JSON by this much, 0 disables indentation, default 2", + ) + if 'sort_keys' not in skip_arguments: + describe_parser.add_argument( + '-s', '--sort-keys', default=False, action='store_true', + help="sort keys in JSON" + ) + if 'print' not in skip_arguments: + describe_parser.add_argument( + '-p', '--print', default=False, action='store_true', + help="pretty print primitive description instead of printing JSON", + ) + if 'continue' not in skip_arguments: + describe_parser.add_argument( + '-c', '--continue', default=False, action='store_true', + help="continue after primitive loading error", + ) + if 'output' not in skip_arguments: + describe_parser.add_argument( + '-o', '--output', type=utils.FileType('w', encoding='utf8'), default='-', action='store', + help="save output to a file, default stdout", + ) + if 'primitives' not in skip_arguments: + describe_parser.add_argument( + 'primitives', metavar='PRIMITIVE', nargs='+', + help="primitive path od primitive ID", + ) + describe_parser.set_defaults(primitive_handler=index.describe_handler) + + if 'output' not in skip_arguments: + download_parser.add_argument( + '-o', '--output', default=index.DEFAULT_OUTPUT, action='store', + help="path of a directory to download to, default current directory", + ) + if 'redownload' not in skip_arguments: + download_parser.add_argument( + '-r', '--redownload', default=False, action='store_true', + help="redownload files again, even if they already exist", + ) + if 'prefix' not in skip_arguments: + download_parser.add_argument( + '-p', '--prefix', action='store', + help="primitive path prefix to limit download to", + ) + download_parser.set_defaults(primitive_handler=index.download_handler) + + if 'list' not in skip_arguments: + validate_parser.add_argument( + '-l', '--list', default=False, action='store_true', + help="print path of primitive description being validated", + ) + if 'continue' not in skip_arguments: + validate_parser.add_argument( + '-c', '--continue', default=False, action='store_true', + help="continue after primitive description validation error", + ) + if 'primitives' not in skip_arguments: + validate_parser.add_argument( + 'primitives', metavar='PRIMITIVE', nargs='+', + help="path to a primitive description", + ) + validate_parser.set_defaults(primitive_handler=pipeline_run.primitive_handler) + + +def pipeline_handler( + arguments: argparse.Namespace, parser: argparse.ArgumentParser, *, + resolver_class: typing.Type[pipeline_module.Resolver] = None, + no_resolver_class: typing.Type[pipeline_module.Resolver] = None, + pipeline_class: typing.Type[pipeline_module.Pipeline] = None, +) -> None: + # Call a handler for the command. + arguments.pipeline_handler( + arguments, + resolver_class=resolver_class, + no_resolver_class=no_resolver_class, + pipeline_class=pipeline_class, + ) + + +def pipeline_configure_parser(parser: argparse.ArgumentParser, *, skip_arguments: typing.Tuple = ()) -> None: + subparsers = parser.add_subparsers(dest='pipeline_command', title='commands') + subparsers.required = True # type: ignore + + describe_parser = subparsers.add_parser( + 'describe', help="generate JSON description of pipelines", + description="Generates JSON descriptions of pipelines.", + ) + validate_parser = subparsers.add_parser( + 'validate', help="validate pipelines", + description="Validate pipeline descriptions for use in metalearning database.", + ) + + if 'no_resolving' not in skip_arguments: + describe_parser.add_argument( + '-n', '--no-resolving', default=False, action='store_true', + help="do not resolve primitives and pipelines, this prevents checking to be fully done though", + ) + if 'check' not in skip_arguments: + describe_parser.add_argument( + '-C', '--no-check', default=True, action='store_false', dest='check', + help="do not check a pipeline, just parse it", + ) + if 'allow_placeholders' not in skip_arguments: + describe_parser.add_argument( + '-a', '--allow-placeholders', default=False, action='store_true', + help="allow placeholders in a pipeline", + ) + if 'standard_pipeline' not in skip_arguments: + describe_parser.add_argument( + '-t', '--not-standard-pipeline', default=True, action='store_false', dest='standard_pipeline', + help="allow a pipeline to not have standard inputs and outputs", + ) + if 'list' not in skip_arguments: + describe_parser.add_argument( + '-l', '--list', default=False, action='store_true', + help="print path of pipeline being described", + ) + if 'indent' not in skip_arguments: + describe_parser.add_argument( + '-i', '--indent', type=int, default=2, action='store', + help="indent JSON by this much, 0 disables indentation, default 2", + ) + if 'sort_keys' not in skip_arguments: + describe_parser.add_argument( + '-s', '--sort-keys', default=False, action='store_true', + help="sort keys in JSON" + ) + if 'print' not in skip_arguments: + describe_parser.add_argument( + '-p', '--print', default=False, action='store_true', + help="pretty print pipeline description instead of printing JSON", + ) + if 'continue' not in skip_arguments: + describe_parser.add_argument( + '-c', '--continue', default=False, action='store_true', + help="continue after pipeline parsing error", + ) + if 'set_source_name' not in skip_arguments: + describe_parser.add_argument( + '--set-source-name', action='store', + help="set pipeline's source name", + ) + if 'output' not in skip_arguments: + describe_parser.add_argument( + '-o', '--output', type=utils.FileType('w', encoding='utf8'), default='-', action='store', + help="save output to a file, default stdout", + ) + if 'pipelines' not in skip_arguments: + describe_parser.add_argument( + 'pipelines', metavar='PIPELINE', nargs='+', + help="path to a pipeline (.json, .yml, or .yaml)", + ) + describe_parser.set_defaults(pipeline_handler=pipeline_module.describe_handler) + + if 'list' not in skip_arguments: + validate_parser.add_argument( + '-l', '--list', default=False, action='store_true', + help="print path of pipeline being validated", + ) + if 'continue' not in skip_arguments: + validate_parser.add_argument( + '-c', '--continue', default=False, action='store_true', + help="continue after pipeline validation error", + ) + if 'pipelines' not in skip_arguments: + validate_parser.add_argument( + 'pipelines', metavar='PIPELINE', nargs='*', + help="path to a pipeline (.json, .yml, or .yaml)", + ) + validate_parser.set_defaults(pipeline_handler=pipeline_run.pipeline_handler) + + +def runtime_handler( + arguments: argparse.Namespace, parser: argparse.ArgumentParser, *, + pipeline_resolver: typing.Callable = None, pipeline_run_parser: typing.Callable = None, + dataset_resolver: typing.Callable = None, problem_resolver: typing.Callable = None, +) -> None: + # Dynamically fetch which subparser was used. + subparser = parser._subparsers._group_actions[0].choices[arguments.runtime_command] # type: ignore + + # TODO: These arguments are required, but this is not visible from the usage line. These arguments are marked as optional there. + if getattr(arguments, 'input_run', None) is None: + manual_config = { + 'fit': [ + ('-i/--input', 'inputs'), ('-p/--pipeline', 'pipeline'), + ], + 'produce': [ + ('-t/--test-input', 'test_inputs'), + ], + 'score': [ + ('-t/--test-input', 'test_inputs'), ('-a/--score-input', 'score_inputs'), + ], + 'fit-produce': [ + ('-i/--input', 'inputs'), ('-t/--test-input', 'test_inputs'), ('-p/--pipeline', 'pipeline'), + ], + 'fit-score': [ + ('-i/--input', 'inputs'), ('-t/--test-input', 'test_inputs'), ('-a/--score-input', 'score_inputs'), + ('-p/--pipeline', 'pipeline'), + ], + 'evaluate': [ + ('-i/--input', 'inputs'), ('-p/--pipeline', 'pipeline'), ('-d/--data-pipeline', 'data_pipeline'), + ], + }.get(arguments.runtime_command, []) + + if any(getattr(arguments, dest, None) is None for (name, dest) in manual_config): + subparser.error( + '{command} requires either -u/--input-run or the following arguments: {manual_arguments}'.format( + command=arguments.runtime_command, + manual_arguments=', '.join( + name for (name, dest) in manual_config + ), + ) + ) + else: + manual_config_with_defaults = [ + ('-i/--input', 'inputs', None), ('-t/--test-input', 'test_inputs', None), ('-a/--score-input', 'score_inputs', None), + ('-r/--problem', 'problem', None), ('-p/--pipeline', 'pipeline', None), ('-d/--data-pipeline', 'data_pipeline', None), + ('-n/--random-seed', 'random_seed', 0), ('-e/--metric', 'metrics', None), ('-Y/--scoring-param', 'scoring_params', None), + ('--scoring-random-seed', 'scoring_random_seed', 0), ('-n/--scoring-pipeline', 'scoring_pipeline', runtime.DEFAULT_SCORING_PIPELINE_PATH), + ('-y/--data-param', 'data_params', None), ('--data-split-file', 'data_split_file', None), ('--data-random-seed', 'data_random_seed', 0), + ('--not-standard-pipeline', 'standard_pipeline', True), + ] + if any(getattr(arguments, dest, None) not in [default, None] for (name, dest, default) in manual_config_with_defaults): + subparser.error( + '-u/--input-run cannot be used with the following arguments: {manual_arguments}'.format( + manual_arguments=', '.join( + name for (name, dest, default) in manual_config_with_defaults if getattr(arguments, dest, None) not in [default, None] + ), + ) + ) + + if not getattr(arguments, 'standard_pipeline', True) and getattr(arguments, 'output', None) is not None: + subparser.error("you cannot save predictions for a non-standard pipeline") + + # Call a handler for the command. + arguments.runtime_handler( + arguments, + pipeline_resolver=pipeline_resolver, + pipeline_run_parser=pipeline_run_parser, + dataset_resolver=dataset_resolver, + problem_resolver=problem_resolver, + ) + + +def runtime_configure_parser(parser: argparse.ArgumentParser, *, skip_arguments: typing.Tuple = ()) -> None: + if 'random_seed' not in skip_arguments: + parser.add_argument( + '-n', '--random-seed', type=int, default=0, action='store', metavar='SEED', + help="random seed to use", + ) + if 'context' not in skip_arguments: + parser.add_argument( + '-x', '--context', choices=[context.name for context in metadata_base.Context], default=metadata_base.Context.TESTING.name, action='store', + help="in which context to run pipelines, default is TESTING", + ) + if 'volumes_dir' not in skip_arguments: + parser.add_argument( + '-v', '--volumes', action='store', dest='volumes_dir', + help="path to a directory with static files required by primitives, in the standard directory structure (as obtained running \"python3 -m d3m index download\")", + ) + if 'datasets_dir' not in skip_arguments: + parser.add_argument( + '-d', '--datasets', action='store', dest='datasets_dir', + help="path to a directory with datasets (and problem descriptions) to resolve IDs in pipeline run files", + ) + if 'scratch_dir' not in skip_arguments: + parser.add_argument( + '-s', '--scratch', action='store', dest='scratch_dir', + help="path to a directory to store any temporary files needed during execution", + ) + if 'worker_id' not in skip_arguments: + parser.add_argument( + '--worker-id', action='store', + help="globally unique identifier for the machine on which the runtime is running", + ) + + subparsers = parser.add_subparsers(dest='runtime_command', title='commands') + subparsers.required = True # type: ignore + + fit_parser = subparsers.add_parser( + 'fit', help="fit a pipeline", + description="Fits a pipeline on train data, resulting in a fitted pipeline. Outputs also produced predictions during fitting on train data.", + ) + produce_parser = subparsers.add_parser( + 'produce', help="produce using a fitted pipeline", + description="Produce predictions on test data given a fitted pipeline.", + ) + score_parser = subparsers.add_parser( + 'score', help="produce using a fitted pipeline and score results", + description="Produce predictions on test data given a fitted pipeline and compute scores.", + ) + fit_produce_parser = subparsers.add_parser( + 'fit-produce', help="fit a pipeline and then produce using it", + description="Fit a pipeline on train data and produce predictions on test data.", + ) + fit_score_parser = subparsers.add_parser( + 'fit-score', help="fit a pipeline, produce using it and score results", + description="Fit a pipeline on train data, then produce predictions on test data and compute scores.", + ) + score_predictions_parser = subparsers.add_parser( + 'score-predictions', help="score a predictions file", + description="Compute scores given a file with predictions.", + ) + evaluate_parser = subparsers.add_parser( + 'evaluate', help="evaluate a pipeline", + description="Run pipeline multiple times using an evaluation approach and compute scores for each run.", + ) + + if 'pipeline' not in skip_arguments: + fit_parser.add_argument( + '-p', '--pipeline', action='store', + help="path to a pipeline file (.json, .yml, or .yaml) or pipeline ID", + ) + if 'problem' not in skip_arguments: + fit_parser.add_argument( + '-r', '--problem', action='store', + help="path or URI to a problem description", + ) + if 'inputs' not in skip_arguments: + fit_parser.add_argument( + '-i', '--input', action='append', metavar='INPUT', dest='inputs', + help="path or URI of an input train dataset", + ) + if 'input_run' not in skip_arguments: + fit_parser.add_argument( + '-u', '--input-run', type=utils.FileType('r', encoding='utf8'), action='store', + help="path to a pipeline run file with configuration, use \"-\" for stdin", + ) + if 'save' not in skip_arguments: + fit_parser.add_argument( + '-s', '--save', type=utils.FileType('wb'), action='store', + help="save fitted pipeline to a file, use \"-\" for stdout", + ) + if 'output' not in skip_arguments: + fit_parser.add_argument( + '-o', '--output', type=utils.FileType('w', encoding='utf8'), action='store', + help="save produced predictions during fitting to a file, use \"-\" for stdout", + ) + if 'output_run' not in skip_arguments: + fit_parser.add_argument( + '-O', '--output-run', type=utils.FileType('w', encoding='utf8'), action='store', + help="save pipeline run document to a YAML file, use \"-\" for stdout", + ) + if 'standard_pipeline' not in skip_arguments: + fit_parser.add_argument( + '--not-standard-pipeline', default=True, action='store_false', dest='standard_pipeline', + help="allow a pipeline to not have standard inputs and outputs", + ) + if 'expose_produced_outputs_dir' not in skip_arguments: + fit_parser.add_argument( + '-E', '--expose-produced-outputs', action='store', dest='expose_produced_outputs_dir', + help="save to a directory produced outputs of all primitives from pipeline's fit run", + ) + fit_parser.set_defaults(runtime_handler=runtime.fit_handler) + + if 'fitted_pipeline' not in skip_arguments: + produce_parser.add_argument( + '-f', '--fitted-pipeline', type=utils.FileType('rb'), action='store', required=True, + help="path to a saved fitted pipeline, use \"-\" for stdin", + ) + if 'test_inputs' not in skip_arguments: + produce_parser.add_argument( + '-t', '--test-input', action='append', metavar='INPUT', dest='test_inputs', + help="path or URI of an input test dataset", + ) + if 'input_run' not in skip_arguments: + produce_parser.add_argument( + '-u', '--input-run', type=utils.FileType('r', encoding='utf8'), action='store', + help="path to a pipeline run file with configuration, use \"-\" for stdin", + ) + if 'output' not in skip_arguments: + produce_parser.add_argument( + '-o', '--output', type=utils.FileType('w', encoding='utf8'), action='store', + help="save produced predictions to a file, use \"-\" for stdout", + ) + if 'output_run' not in skip_arguments: + produce_parser.add_argument( + '-O', '--output-run', type=utils.FileType('w', encoding='utf8'), action='store', + help="save pipeline run document to a YAML file, use \"-\" for stdout", + ) + if 'expose_produced_outputs_dir' not in skip_arguments: + produce_parser.add_argument( + '-E', '--expose-produced-outputs', action='store', dest='expose_produced_outputs_dir', + help="save to a directory produced outputs of all primitives from pipeline's produce run", + ) + produce_parser.set_defaults(runtime_handler=runtime.produce_handler) + + if 'fitted_pipeline' not in skip_arguments: + score_parser.add_argument( + '-f', '--fitted-pipeline', type=utils.FileType('rb'), action='store', required=True, + help="path to a saved fitted pipeline, use \"-\" for stdin", + ) + if 'scoring_pipeline' not in skip_arguments: + score_parser.add_argument( + '-n', '--scoring-pipeline', default=runtime.DEFAULT_SCORING_PIPELINE_PATH, action='store', + help="path to a scoring pipeline file (.json, .yml, or .yaml) or pipeline ID, default is standard scoring pipeline", + ) + if 'test_inputs' not in skip_arguments: + score_parser.add_argument( + '-t', '--test-input', action='append', metavar='INPUT', dest='test_inputs', + help="path or URI of an input test dataset", + ) + if 'score_inputs' not in skip_arguments: + score_parser.add_argument( + '-a', '--score-input', action='append', metavar='INPUT', dest='score_inputs', + help="path or URI of an input score dataset", + ) + if 'input_run' not in skip_arguments: + score_parser.add_argument( + '-u', '--input-run', type=utils.FileType('r', encoding='utf8'), action='store', + help="path to a pipeline run file with configuration, use \"-\" for stdin", + ) + if 'metrics' not in skip_arguments: + score_parser.add_argument( + '-e', '--metric', choices=[metric.name for metric in problem_module.PerformanceMetric], + action='append', metavar='METRIC', dest='metrics', + help="metric to use, can be specified multiple times, default from problem description", + ) + if 'scoring_params' not in skip_arguments: + score_parser.add_argument( + '-Y', '--scoring-param', nargs=2, action='append', metavar=('NAME', 'VALUE'), dest='scoring_params', + help="hyper-parameter name and its value for scoring pipeline, can be specified multiple times, value should be JSON-serialized", + ) + if 'output' not in skip_arguments: + score_parser.add_argument( + '-o', '--output', type=utils.FileType('w', encoding='utf8'), action='store', + help="save produced predictions to a file, use \"-\" for stdout", + ) + if 'scores' not in skip_arguments: + score_parser.add_argument( + '-c', '--scores', type=utils.FileType('w', encoding='utf8'), default='-', action='store', + help="save scores to a file, default stdout", + ) + if 'output_run' not in skip_arguments: + score_parser.add_argument( + '-O', '--output-run', type=utils.FileType('w', encoding='utf8'), action='store', + help="save pipeline run document to a YAML file, use \"-\" for stdout", + ) + if 'expose_produced_outputs_dir' not in skip_arguments: + score_parser.add_argument( + '-E', '--expose-produced-outputs', action='store', dest='expose_produced_outputs_dir', + help="save to a directory produced outputs of all primitives from pipeline's produce run", + ) + score_parser.set_defaults(runtime_handler=runtime.score_handler) + + if 'pipeline' not in skip_arguments: + fit_produce_parser.add_argument( + '-p', '--pipeline', action='store', + help="path to a pipeline file (.json, .yml, or .yaml) or pipeline ID", + ) + if 'problem' not in skip_arguments: + fit_produce_parser.add_argument( + '-r', '--problem', action='store', + help="path or URI to a problem description", + ) + if 'inputs' not in skip_arguments: + fit_produce_parser.add_argument( + '-i', '--input', action='append', metavar='INPUT', dest='inputs', + help="path or URI of an input train dataset", + ) + if 'test_inputs' not in skip_arguments: + fit_produce_parser.add_argument( + '-t', '--test-input', action='append', metavar='INPUT', dest='test_inputs', + help="path or URI of an input test dataset", + ) + if 'input_run' not in skip_arguments: + fit_produce_parser.add_argument( + '-u', '--input-run', type=utils.FileType('r', encoding='utf8'), action='store', + help="path to a pipeline run file with configuration, use \"-\" for stdin", + ) + if 'save' not in skip_arguments: + fit_produce_parser.add_argument( + '-s', '--save', type=utils.FileType('wb'), action='store', + help="save fitted pipeline to a file, use \"-\" for stdout", + ) + if 'output' not in skip_arguments: + fit_produce_parser.add_argument( + '-o', '--output', type=utils.FileType('w', encoding='utf8'), action='store', + help="save produced predictions to a file, use \"-\" for stdout", + ) + if 'output_run' not in skip_arguments: + fit_produce_parser.add_argument( + '-O', '--output-run', type=utils.FileType('w', encoding='utf8'), action='store', + help="save pipeline run documents to a YAML file, use \"-\" for stdout", + ) + if 'standard_pipeline' not in skip_arguments: + fit_produce_parser.add_argument( + '--not-standard-pipeline', default=True, action='store_false', dest='standard_pipeline', + help="allow a pipeline to not have standard inputs and outputs", + ) + if 'expose_produced_outputs_dir' not in skip_arguments: + fit_produce_parser.add_argument( + '-E', '--expose-produced-outputs', action='store', dest='expose_produced_outputs_dir', + help="save to a directory produced outputs of all primitives from pipeline's produce run", + ) + fit_produce_parser.set_defaults(runtime_handler=runtime.fit_produce_handler) + + if 'pipeline' not in skip_arguments: + fit_score_parser.add_argument( + '-p', '--pipeline', action='store', + help="path to a pipeline file (.json, .yml, or .yaml) or pipeline ID", + ) + if 'scoring_pipeline' not in skip_arguments: + fit_score_parser.add_argument( + '-n', '--scoring-pipeline', default=runtime.DEFAULT_SCORING_PIPELINE_PATH, action='store', + help="path to a scoring pipeline file (.json, .yml, or .yaml) or pipeline ID, default is standard scoring pipeline", + ) + if 'problem' not in skip_arguments: + fit_score_parser.add_argument( + '-r', '--problem', action='store', + help="path or URI to a problem description", + ) + if 'inputs' not in skip_arguments: + fit_score_parser.add_argument( + '-i', '--input', action='append', metavar='INPUT', dest='inputs', + help="path or URI of an input train dataset", + ) + if 'test_inputs' not in skip_arguments: + fit_score_parser.add_argument( + '-t', '--test-input', action='append', metavar='INPUT', dest='test_inputs', + help="path or URI of an input test dataset", + ) + if 'score_inputs' not in skip_arguments: + fit_score_parser.add_argument( + '-a', '--score-input', action='append', metavar='INPUT', dest='score_inputs', + help="path or URI of an input score dataset", + ) + if 'input_run' not in skip_arguments: + fit_score_parser.add_argument( + '-u', '--input-run', type=utils.FileType('r', encoding='utf8'), action='store', + help="path to a pipeline run file with configuration, use \"-\" for stdin", + ) + if 'metrics' not in skip_arguments: + fit_score_parser.add_argument( + '-e', '--metric', choices=[metric.name for metric in problem_module.PerformanceMetric], + action='append', metavar='METRIC', dest='metrics', + help="metric to use, can be specified multiple times, default from problem description", + ) + if 'scoring_params' not in skip_arguments: + fit_score_parser.add_argument( + '-Y', '--scoring-param', nargs=2, action='append', metavar=('NAME', 'VALUE'), dest='scoring_params', + help="hyper-parameter name and its value for scoring pipeline, can be specified multiple times, value should be JSON-serialized", + ) + if 'save' not in skip_arguments: + fit_score_parser.add_argument( + '-s', '--save', type=utils.FileType('wb'), action='store', + help="save fitted pipeline to a file, use \"-\" for stdout", + ) + if 'output' not in skip_arguments: + fit_score_parser.add_argument( + '-o', '--output', type=utils.FileType('w', encoding='utf8'), action='store', + help="save produced predictions to a file, use \"-\" for stdout", + ) + if 'scores' not in skip_arguments: + fit_score_parser.add_argument( + '-c', '--scores', type=utils.FileType('w', encoding='utf8'), default='-', action='store', + help="save scores to a file, default stdout", + ) + if 'output_run' not in skip_arguments: + fit_score_parser.add_argument( + '-O', '--output-run', type=utils.FileType('w', encoding='utf8'), action='store', + help="save pipeline run documents to a YAML file, use \"-\" for stdout", + ) + if 'scoring_random_seed' not in skip_arguments: + fit_score_parser.add_argument( + '--scoring-random-seed', type=int, action='store', default=0, + help="random seed to use for scoring", + ) + if 'expose_produced_outputs_dir' not in skip_arguments: + fit_score_parser.add_argument( + '-E', '--expose-produced-outputs', action='store', dest='expose_produced_outputs_dir', + help="save to a directory produced outputs of all primitives from pipeline's produce run", + ) + fit_score_parser.set_defaults(runtime_handler=runtime.fit_score_handler) + + if 'scoring_pipeline' not in skip_arguments: + score_predictions_parser.add_argument( + '-n', '--scoring-pipeline', default=runtime.DEFAULT_SCORING_PIPELINE_PATH, action='store', + help="path to a scoring pipeline file (.json, .yml, or .yaml) or pipeline ID, default is standard scoring pipeline", + ) + if 'problem' not in skip_arguments: + score_predictions_parser.add_argument( + '-r', '--problem', action='store', + help="path or URI to a problem description", + ) + if 'predictions' not in skip_arguments: + score_predictions_parser.add_argument( + '-p', '--predictions', type=utils.FileType('r', encoding='utf8'), action='store', required=True, + help="path to a predictions file, use \"-\" for stdin", + ) + if 'score_inputs' not in skip_arguments: + score_predictions_parser.add_argument( + '-a', '--score-input', action='append', metavar='INPUT', dest='score_inputs', required=True, + help="path or URI of an input score dataset", + ) + if 'metrics' not in skip_arguments: + score_predictions_parser.add_argument( + '-e', '--metric', choices=[metric.name for metric in problem_module.PerformanceMetric], + action='append', metavar='METRIC', dest='metrics', + help="metric to use, can be specified multiple times, default from problem description", + ) + if 'scoring_params' not in skip_arguments: + score_predictions_parser.add_argument( + '-Y', '--scoring-param', nargs=2, action='append', metavar=('NAME', 'VALUE'), dest='scoring_params', + help="hyper-parameter name and its value for scoring pipeline, can be specified multiple times, value should be JSON-serialized", + ) + if 'scores' not in skip_arguments: + score_predictions_parser.add_argument( + '-c', '--scores', type=utils.FileType('w', encoding='utf8'), default='-', action='store', + help="save scores to a file, default stdout", + ) + if 'scoring_random_seed' not in skip_arguments: + score_predictions_parser.add_argument( + '--scoring-random-seed', type=int, action='store', default=0, + help="random seed to use for scoring", + ) + if 'predictions_random_seed' not in skip_arguments: + score_predictions_parser.add_argument( + '--predictions-random-seed', type=int, action='store', default=None, + help="random seed used for predictions", + ) + score_predictions_parser.set_defaults(runtime_handler=runtime.score_predictions_handler) + + if 'pipeline' not in skip_arguments: + evaluate_parser.add_argument( + '-p', '--pipeline', action='store', + help="path to a pipeline file (.json, .yml, or .yaml) or pipeline ID" + ) + if 'data_pipeline' not in skip_arguments: + evaluate_parser.add_argument( + '-d', '--data-pipeline', action='store', + help="path to a data preparation pipeline file (.json, .yml, or .yaml) or pipeline ID", + ) + if 'scoring_pipeline' not in skip_arguments: + evaluate_parser.add_argument( + '-n', '--scoring-pipeline', default=runtime.DEFAULT_SCORING_PIPELINE_PATH, action='store', + help="path to a scoring pipeline file (.json, .yml, or .yaml) or pipeline ID, default is standard scoring pipeline", + ) + if 'problem' not in skip_arguments: + evaluate_parser.add_argument( + '-r', '--problem', action='store', + help="path or URI to a problem description", + ) + if 'inputs' not in skip_arguments: + evaluate_parser.add_argument( + '-i', '--input', action='append', metavar='INPUT', dest='inputs', + help="path or URI of an input full dataset", + ) + if 'input_run' not in skip_arguments: + evaluate_parser.add_argument( + '-u', '--input-run', type=utils.FileType('r', encoding='utf8'), action='store', + help="path to a pipeline run file with configuration, use \"-\" for stdin", + ) + if 'data_params' not in skip_arguments: + evaluate_parser.add_argument( + '-y', '--data-param', nargs=2, action='append', metavar=('NAME', 'VALUE'), dest='data_params', + help="hyper-parameter name and its value for data preparation pipeline, can be specified multiple times, value should be JSON-serialized", + ) + if 'data_split_file' not in skip_arguments: + evaluate_parser.add_argument( + '--data-split-file', type=utils.FileType('r', encoding='utf8'), action='store', + help="reads the split file and populates \"primary_index_values\" hyper-parameter for data preparation pipeline with " + "values from the \"d3mIndex\" column corresponding to the test data, use \"-\" for stdin", + ) + if 'metrics' not in skip_arguments: + evaluate_parser.add_argument( + '-e', '--metric', choices=[metric.name for metric in problem_module.PerformanceMetric], action='append', metavar='METRIC', dest='metrics', + help="metric to use, can be specified multiple times, default from problem description", + ) + if 'scoring_params' not in skip_arguments: + evaluate_parser.add_argument( + '-Y', '--scoring-param', nargs=2, action='append', metavar=('NAME', 'VALUE'), dest='scoring_params', + help="hyper-parameter name and its value for scoring pipeline, can be specified multiple times, value should be JSON-serialized", + ) + if 'scores' not in skip_arguments: + evaluate_parser.add_argument( + '-c', '--scores', type=utils.FileType('w', encoding='utf8'), default='-', action='store', + help="save scores to a file, default stdout", + ) + if 'output_run' not in skip_arguments: + evaluate_parser.add_argument( + '-O', '--output-run', type=utils.FileType('w', encoding='utf8'), action='store', + help="save pipeline run documents to a YAML file, use \"-\" for stdin", + ) + if 'data_random_seed' not in skip_arguments: + evaluate_parser.add_argument( + '--data-random-seed', type=int, action='store', default=0, + help="random seed to use for data preparation", + ) + if 'scoring_random_seed' not in skip_arguments: + evaluate_parser.add_argument( + '--scoring-random-seed', type=int, action='store', default=0, + help="random seed to use for scoring", + ) + evaluate_parser.set_defaults(runtime_handler=runtime.evaluate_handler) + + +def handler( + arguments: argparse.Namespace, parser: argparse.ArgumentParser, *, + pipeline_resolver: typing.Callable = None, pipeline_run_parser: typing.Callable = None, + dataset_resolver: typing.Callable = None, problem_resolver: typing.Callable = None, + resolver_class: typing.Type[pipeline_module.Resolver] = None, + no_resolver_class: typing.Type[pipeline_module.Resolver] = None, + pipeline_class: typing.Type[pipeline_module.Pipeline] = None, +) -> None: + # Dynamically fetch which subparser was used. + subparser = parser._subparsers._group_actions[0].choices[arguments.d3m_command] # type: ignore + + if arguments.d3m_command == 'primitive': + primitive_handler( + arguments, + subparser, + ) + + elif arguments.d3m_command == 'index': + logger.warning("\"index\" CLI command is deprecated. Use \"primitive\" CLI command instead.") + + primitive_handler( + arguments, + subparser, + ) + + elif arguments.d3m_command == 'pipeline': + pipeline_handler( + arguments, + subparser, + resolver_class=resolver_class, + no_resolver_class=no_resolver_class, + pipeline_class=pipeline_class, + ) + + elif arguments.d3m_command == 'problem': + problem_handler( + arguments, + subparser, + problem_resolver=problem_resolver, + ) + + elif arguments.d3m_command == 'dataset': + dataset_handler( + arguments, + subparser, + dataset_resolver=dataset_resolver, + ) + + elif arguments.d3m_command == 'pipeline-run': + pipeline_run_handler( + arguments, + subparser, + ) + + elif arguments.d3m_command == 'runtime': + runtime_handler( + arguments, + subparser, + pipeline_resolver=pipeline_resolver, + pipeline_run_parser=pipeline_run_parser, + dataset_resolver=dataset_resolver, + problem_resolver=problem_resolver, + ) + + else: + raise exceptions.InvalidStateError("Cannot find a suitable command handler.") + + +# A fixed parser which correctly shows the error message for unknown arguments to the sub-command. +# See: https://gitlab.com/datadrivendiscovery/d3m/-/issues/409 +class _ArgumentParser(argparse.ArgumentParser): + # "parse_known_args" is made to behave exactly like "parse_args". + def parse_known_args(self, args: typing.Sequence[str] = None, namespace: argparse.Namespace = None) -> typing.Tuple[argparse.Namespace, typing.List[str]]: + namespace, argv = super().parse_known_args(args, namespace) + if argv: + msg = argparse._('unrecognized arguments: %s') # type: ignore + self.error(msg % ' '.join(argv)) + return namespace, argv + + +def configure_parser(parser: argparse.ArgumentParser, *, skip_arguments: typing.Tuple = ()) -> None: + if 'pipeline_search_paths' not in skip_arguments: + parser.add_argument( + '-p', '--pipelines-path', action='append', metavar='PATH', dest='pipeline_search_paths', + help="path to a directory with pipelines to resolve from (.json, .yml, or .yaml), " + "can be specified multiple times, has priority over PIPELINES_PATH environment variable", + ) + if 'logging_level' not in skip_arguments: + parser.add_argument( + '-l', '--logging-level', default='info', action='store', + choices=['debug', 'info', 'warning', 'error', 'critical'], + help="logging level to use for the console", + ) + if 'compute_digest' not in skip_arguments: + parser.add_argument( + '--compute-digest', choices=[compute_digest.name for compute_digest in dataset_module.ComputeDigest], + default=dataset_module.ComputeDigest.ONLY_IF_MISSING.name, action='store', + help="when loading datasets, when to compute their digests, default is ONLY_IF_MISSING", + ) + if 'strict_resolving' not in skip_arguments: + parser.add_argument( + '--strict-resolving', default=False, action='store_true', + help="fail resolving if a resolved pipeline, primitive, or dataset, does not fully match specified reference", + ) + if 'strict_digest' not in skip_arguments: + parser.add_argument( + '--strict-digest', default=False, action='store_true', + help="when loading datasets, pipelines, primitives, or problem descriptions, if computed digest does not match the one provided in metadata, raise an exception?" + ) + if 'version' not in skip_arguments: + parser.add_argument( + '-V', '--version', action='version', version=str(__version__), + help="print d3m package version and exit", + ) + + subparsers = parser.add_subparsers(dest='d3m_command', title='commands', parser_class=_ArgumentParser) + subparsers.required = True # type: ignore + + primitive_parser = subparsers.add_parser( + 'primitive', help="describe, validate, explore, and manage primitives", + description="Describe, explore, and manage primitives.", + ) + # Legacy command name. Deprecated. We do not use "aliases" argument to "add_parser" + # because we want this command to be hidden. + subparsers._name_parser_map['index'] = primitive_parser # type: ignore + + primitive_configure_parser(primitive_parser, skip_arguments=skip_arguments) + + pipeline_parser = subparsers.add_parser( + 'pipeline', help="describe and validate pipelines", + description="Describe and validate pipelines.", + ) + + pipeline_configure_parser(pipeline_parser, skip_arguments=skip_arguments) + + problem_parser = subparsers.add_parser( + 'problem', help="describe and validate problems", + description="Describe and validate problems.", + ) + + problem_configure_parser(problem_parser, skip_arguments=skip_arguments) + + dataset_parser = subparsers.add_parser( + 'dataset', help="describe and validate datasets", + description="Describe and validate datasets.", + ) + + dataset_configure_parser(dataset_parser, skip_arguments=skip_arguments) + + pipeline_run_parser = subparsers.add_parser( + 'pipeline-run', help="validate pipeline runs", + description="Validate pipeline runs.", + ) + + pipeline_run_configure_parser(pipeline_run_parser, skip_arguments=skip_arguments) + + runtime_parser = subparsers.add_parser( + 'runtime', help="run D3M pipelines", + description="Run D3M pipelines.", + ) + + runtime_configure_parser(runtime_parser, skip_arguments=skip_arguments) + + # We set metavar at the end, when we know all subparsers. We want + # "index" command to be hidden because it is deprecated. + subparsers.metavar = '{' + ','.join(name for name in subparsers._name_parser_map.keys() if name != 'index') + '}' # type: ignore + + +def main(argv: typing.Sequence) -> None: + parser = argparse.ArgumentParser(prog='d3m', description="Run a D3M core package command.") + configure_parser(parser) + + arguments = parser.parse_args(argv[1:]) + + logging.basicConfig(level=arguments.logging_level.upper()) + + handler(arguments, parser) diff --git a/d3m/d3m/container/__init__.py b/d3m/d3m/container/__init__.py new file mode 100644 index 0000000..38b49ac --- /dev/null +++ b/d3m/d3m/container/__init__.py @@ -0,0 +1,8 @@ +""" +This module provides various container types one can use to pass values between primitives. +""" + +from .dataset import * +from .pandas import * +from .numpy import * +from .list import * diff --git a/d3m/d3m/container/dataset.py b/d3m/d3m/container/dataset.py new file mode 100644 index 0000000..7cdd22c --- /dev/null +++ b/d3m/d3m/container/dataset.py @@ -0,0 +1,3297 @@ +import abc +import argparse +import collections +import datetime +import errno +import filecmp +import hashlib +import io +import itertools +import json +import logging +import math +import os +import os.path +import pprint +import re +import shutil +import sys +import time +import traceback +import typing +from urllib import error as urllib_error, parse as url_parse + +import dateutil.parser # type: ignore +import frozendict # type: ignore +import numpy # type: ignore +import openml # type: ignore +import pandas # type: ignore +from pandas.io import common as pandas_io_common # type: ignore +from sklearn import datasets # type: ignore + +from . import pandas as container_pandas +from d3m import deprecate, exceptions, utils +from d3m.metadata import base as metadata_base + +# See: https://gitlab.com/datadrivendiscovery/d3m/issues/66 +try: + from pyarrow import lib as pyarrow_lib # type: ignore +except ModuleNotFoundError: + pyarrow_lib = None + +__all__ = ('Dataset', 'ComputeDigest') + +logger = logging.getLogger(__name__) + +UNITS = { + 'B': 1, 'KB': 10**3, 'MB': 10**6, 'GB': 10**9, 'TB': 10**12, 'PB': 10**15, + 'KiB': 2*10, 'MiB': 2*20, 'GiB': 2*30, 'TiB': 2*40, 'PiB': 2*50, +} +SIZE_TO_UNITS = { + 1: 'B', 3: 'KB', 6: 'MB', + 9: 'GB', 12: 'TB', 15: 'PB', +} + +D3M_ROLE_CONSTANTS_TO_SEMANTIC_TYPES = { + 'index': 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', + 'multiIndex': 'https://metadata.datadrivendiscovery.org/types/PrimaryMultiKey', + 'key': 'https://metadata.datadrivendiscovery.org/types/UniqueKey', + 'attribute': 'https://metadata.datadrivendiscovery.org/types/Attribute', + 'suggestedTarget': 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', + 'timeIndicator': 'https://metadata.datadrivendiscovery.org/types/Time', + 'locationIndicator': 'https://metadata.datadrivendiscovery.org/types/Location', + 'boundaryIndicator': 'https://metadata.datadrivendiscovery.org/types/Boundary', + 'interval': 'https://metadata.datadrivendiscovery.org/types/Interval', + 'instanceWeight': 'https://metadata.datadrivendiscovery.org/types/InstanceWeight', + 'boundingPolygon': 'https://metadata.datadrivendiscovery.org/types/BoundingPolygon', + 'suggestedPrivilegedData': 'https://metadata.datadrivendiscovery.org/types/SuggestedPrivilegedData', + 'suggestedGroupingKey': 'https://metadata.datadrivendiscovery.org/types/SuggestedGroupingKey', + 'edgeSource': 'https://metadata.datadrivendiscovery.org/types/EdgeSource', + 'directedEdgeSource': 'https://metadata.datadrivendiscovery.org/types/DirectedEdgeSource', + 'undirectedEdgeSource': 'https://metadata.datadrivendiscovery.org/types/UndirectedEdgeSource', + 'simpleEdgeSource': 'https://metadata.datadrivendiscovery.org/types/SimpleEdgeSource', + 'multiEdgeSource': 'https://metadata.datadrivendiscovery.org/types/MultiEdgeSource', + 'edgeTarget': 'https://metadata.datadrivendiscovery.org/types/EdgeTarget', + 'directedEdgeTarget': 'https://metadata.datadrivendiscovery.org/types/DirectedEdgeTarget', + 'undirectedEdgeTarget': 'https://metadata.datadrivendiscovery.org/types/UndirectedEdgeTarget', + 'simpleEdgeTarget': 'https://metadata.datadrivendiscovery.org/types/SimpleEdgeTarget', + 'multiEdgeTarget': 'https://metadata.datadrivendiscovery.org/types/MultiEdgeTarget', +} + +D3M_RESOURCE_TYPE_CONSTANTS_TO_SEMANTIC_TYPES = { + # File collections. + 'image': 'http://schema.org/ImageObject', + 'video': 'http://schema.org/VideoObject', + 'audio': 'http://schema.org/AudioObject', + 'text': 'http://schema.org/Text', + 'speech': 'https://metadata.datadrivendiscovery.org/types/Speech', + 'timeseries': 'https://metadata.datadrivendiscovery.org/types/Timeseries', + 'raw': 'https://metadata.datadrivendiscovery.org/types/UnspecifiedStructure', + # Other. + 'graph': 'https://metadata.datadrivendiscovery.org/types/Graph', + 'edgeList': 'https://metadata.datadrivendiscovery.org/types/EdgeList', + 'table': 'https://metadata.datadrivendiscovery.org/types/Table', +} + +D3M_COLUMN_TYPE_CONSTANTS_TO_SEMANTIC_TYPES = { + 'boolean': 'http://schema.org/Boolean', + 'integer': 'http://schema.org/Integer', + 'real': 'http://schema.org/Float', + 'string': 'http://schema.org/Text', + 'categorical': 'https://metadata.datadrivendiscovery.org/types/CategoricalData', + 'dateTime': 'http://schema.org/DateTime', + 'realVector': 'https://metadata.datadrivendiscovery.org/types/FloatVector', + 'json': 'https://metadata.datadrivendiscovery.org/types/JSON', + 'geojson': 'https://metadata.datadrivendiscovery.org/types/GeoJSON', + 'unknown': 'https://metadata.datadrivendiscovery.org/types/UnknownType', +} + +SEMANTIC_TYPES_TO_D3M_RESOURCE_TYPES = {v: k for k, v in D3M_RESOURCE_TYPE_CONSTANTS_TO_SEMANTIC_TYPES.items()} +SEMANTIC_TYPES_TO_D3M_ROLES = {v: k for k, v in D3M_ROLE_CONSTANTS_TO_SEMANTIC_TYPES.items()} +SEMANTIC_TYPES_TO_D3M_COLUMN_TYPES = {v: k for k, v in D3M_COLUMN_TYPE_CONSTANTS_TO_SEMANTIC_TYPES.items()} + +D3M_TO_DATASET_FIELDS: typing.Dict[typing.Sequence[str], typing.Tuple[typing.Sequence[str], bool]] = { + ('about', 'datasetID'): (('id',), True), + ('about', 'datasetName'): (('name',), True), + ('about', 'description'): (('description',), False), + ('about', 'datasetVersion'): (('version',), False), + ('about', 'digest'): (('digest',), False), + ('about', 'approximateSize'): (('approximate_stored_size',), False), + ('about', 'citation'): (('source', 'citation'), False), + ('about', 'license'): (('source', 'license'), False), + ('about', 'redacted'): (('source', 'redacted'), False), + ('about', 'source'): (('source', 'name'), False), + ('about', 'citation'): (('source', 'citation'), False), + ('about', 'humanSubjectsResearch'): (('source', 'human_subjects_research'), False), +} + +INTERVAL_SEMANTIC_TYPES = ( + 'https://metadata.datadrivendiscovery.org/types/IntervalStart', + 'https://metadata.datadrivendiscovery.org/types/IntervalEnd', +) + +BOUNDARY_SEMANTIC_TYPES = ( + 'https://metadata.datadrivendiscovery.org/types/Interval', + 'https://metadata.datadrivendiscovery.org/types/BoundingPolygon', +) + INTERVAL_SEMANTIC_TYPES + +# A map between legacy (before v4.0.0) D3M resource formats and media types. +# Now all resource formats are media types. +MEDIA_TYPES = { + 'audio/aiff': 'audio/aiff', + 'audio/flac': 'audio/flac', + 'audio/ogg': 'audio/ogg', + 'audio/wav': 'audio/wav', + 'audio/mpeg': 'audio/mpeg', + 'image/jpeg': 'image/jpeg', + 'image/png': 'image/png', + 'video/mp4': 'video/mp4', + 'video/avi': 'video/avi', + 'text/csv': 'text/csv', + 'text/csv+gzip': 'text/csv+gzip', + 'text/plain': 'text/plain', + # Legacy (before v4.0.0) resource type for GML files. + # In "MEDIA_TYPES_REVERSE" it is not present on purpose. + 'text/gml': 'text/vnd.gml', + 'text/vnd.gml': 'text/vnd.gml', +} +MEDIA_TYPES_REVERSE = {v: k for k, v in MEDIA_TYPES.items()} + +# A legacy (before v4.0.0) map between D3M file extensions and media types. +# Now all datasets include a mapping between resource formats and file extensions. +# Based on: https://gitlab.com/datadrivendiscovery/data-supply/blob/shared/documentation/supportedResourceTypesFormats.json +FILE_EXTENSIONS = { + '.aif': 'audio/aiff', + '.aiff': 'audio/aiff', + '.flac': 'audio/flac', + '.ogg': 'audio/ogg', + '.wav': 'audio/wav', + '.mp3': 'audio/mpeg', + '.jpeg': 'image/jpeg', + '.jpg': 'image/jpeg', + '.png': 'image/png', + '.csv': 'text/csv', + '.csv.gz': 'text/csv+gzip', + '.gml': 'text/vnd.gml', + '.txt': 'text/plain', + '.mp4': 'video/mp4', + '.avi': 'video/avi', +} +FILE_EXTENSIONS_REVERSE: typing.Dict[str, typing.List[str]] = collections.defaultdict(list) +for k, v in FILE_EXTENSIONS.items(): + FILE_EXTENSIONS_REVERSE[v].append(k) + +TIME_GRANULARITIES = { + 'seconds': 'SECONDS', + 'minutes': 'MINUTES', + 'days': 'DAYS', + 'weeks': 'WEEKS', + 'months': 'MONTHS', + 'years': 'YEARS', + 'unspecified': 'UNSPECIFIED', +} +TIME_GRANULARITIES_REVERSE = {v: k for k, v in TIME_GRANULARITIES.items()} + +ALL_D3M_SEMANTIC_TYPES = \ + set(D3M_ROLE_CONSTANTS_TO_SEMANTIC_TYPES.values()) | \ + set(D3M_RESOURCE_TYPE_CONSTANTS_TO_SEMANTIC_TYPES.values()) | \ + set(D3M_COLUMN_TYPE_CONSTANTS_TO_SEMANTIC_TYPES.values()) | \ + set(BOUNDARY_SEMANTIC_TYPES) + +# A map between OpenML qualities and D3M metafeatures. +OPENML_QUALITY_MAP: typing.Dict[str, typing.Tuple[str, typing.Callable]] = { + 'Dimensionality': ('dimensionality', float), + 'NumberOfFeatures': ('number_of_attributes', int), + 'NumberOfInstances': ('number_of_instances', int), + 'NumberOfInstancesWithMissingValues': ('number_of_instances_with_missing_values', int), + 'PercentageOfInstancesWithMissingValues': ('ratio_of_instances_with_missing_values', float), + 'NumberOfMissingValues': ('number_of_missing_values', int), + 'PercentageOfMissingValues': ('ratio_of_missing_values', float), + 'NumberOfNumericFeatures': ('number_of_numeric_attributes', int), + 'PercentageOfNumericFeatures': ('ratio_of_numeric_attributes', float), + 'NumberOfBinaryFeatures': ('number_of_binary_attributes', int), + 'PercentageOfBinaryFeatures': ('ratio_of_binary_attributes', float), + 'NumberOfSymbolicFeatures': ('number_of_categorical_attributes', int), + 'PercentageOfSymbolicFeatures': ('ratio_of_categorical_attributes', float), + 'MeanNoiseToSignalRatio': ('noise_to_signal_ratio', float), + 'EquivalentNumberOfAtts': ('equivalent_number_of_attributes', int), +} + +OPENML_IGNORED_QUALITIES = { + # We use "number_distinct_values" on a target column instead. + 'NumberOfClasses', + # We use "value_counts_aggregate.max" on a target column instead. + 'MajorityClassSize', + # We use "value_probabilities_aggregate.max" on a target column instead. + 'MajorityClassPercentage', + # We use "value_counts_aggregate.min" on a target column instead. + 'MinorityClassSize', + # We use "value_probabilities_aggregate.min" on a target column instead. + 'MinorityClassPercentage', + # We use "entropy_of_values" on a target column instead. + 'ClassEntropy', + # It depends on the order of instances in the dataset, so it is a strange metafeature. + # See: https://github.com/openml/EvaluationEngine/issues/34 + 'AutoCorrelation', + # The following are not computed by code availble through primitives, and we require that. + 'CfsSubsetEval_DecisionStumpAUC', + 'CfsSubsetEval_DecisionStumpErrRate', + 'CfsSubsetEval_DecisionStumpKappa', + 'CfsSubsetEval_NaiveBayesAUC', + 'CfsSubsetEval_NaiveBayesErrRate', + 'CfsSubsetEval_NaiveBayesKappa', + 'CfsSubsetEval_kNN1NAUC', + 'CfsSubsetEval_kNN1NErrRate', + 'CfsSubsetEval_kNN1NKappa', + 'DecisionStumpAUC', + 'DecisionStumpErrRate', + 'DecisionStumpKappa', + 'J48.00001.AUC', + 'J48.00001.ErrRate', + 'J48.00001.Kappa', + 'J48.0001.AUC', + 'J48.0001.ErrRate', + 'J48.0001.Kappa', + 'J48.001.AUC', + 'J48.001.ErrRate', + 'J48.001.Kappa', + 'REPTreeDepth1AUC', + 'REPTreeDepth1ErrRate', + 'REPTreeDepth1Kappa', + 'REPTreeDepth2AUC', + 'REPTreeDepth2ErrRate', + 'REPTreeDepth2Kappa', + 'REPTreeDepth3AUC', + 'REPTreeDepth3ErrRate', + 'REPTreeDepth3Kappa', + 'RandomTreeDepth1AUC', + 'RandomTreeDepth1ErrRate', + 'RandomTreeDepth1Kappa', + 'RandomTreeDepth2AUC', + 'RandomTreeDepth2ErrRate', + 'RandomTreeDepth2Kappa', + 'RandomTreeDepth3AUC', + 'RandomTreeDepth3ErrRate', + 'RandomTreeDepth3Kappa', + 'kNN1NAUC', + 'kNN1NErrRate', + 'kNN1NKappa', + 'NaiveBayesAUC', + 'NaiveBayesErrRate', + 'NaiveBayesKappa', +} + +# A map between OpenML qualities and aggregated D3M metafeatures. +OPENML_QUALITY_AGGREGATE_MAP: typing.Dict[str, typing.Tuple[str, str, typing.Callable]] = { + 'MinAttributeEntropy': ('entropy_of_attributes', 'min', float), + 'MeanAttributeEntropy': ('entropy_of_attributes', 'mean', float), + 'MaxAttributeEntropy': ('entropy_of_attributes', 'max', float), + 'Quartile1AttributeEntropy': ('entropy_of_attributes', 'quartile_1', float), + 'Quartile2AttributeEntropy': ('entropy_of_attributes', 'median', float), + 'Quartile3AttributeEntropy': ('entropy_of_attributes', 'quartile_3', float), + 'MinSkewnessOfNumericAtts': ('skew_of_attributes', 'min', float), + 'MeanSkewnessOfNumericAtts': ('skew_of_attributes', 'mean', float), + 'MaxSkewnessOfNumericAtts': ('skew_of_attributes', 'max', float), + 'Quartile1SkewnessOfNumericAtts': ('skew_of_attributes', 'quartile_1', float), + 'Quartile2SkewnessOfNumericAtts': ('skew_of_attributes', 'median', float), + 'Quartile3SkewnessOfNumericAtts': ('skew_of_attributes', 'quartile_3', float), + 'MinMutualInformation': ('mutual_information_of_attributes', 'min', float), + 'MeanMutualInformation': ('mutual_information_of_attributes', 'mean', float), + 'MaxMutualInformation': ('mutual_information_of_attributes', 'max', float), + 'Quartile1MutualInformation': ('mutual_information_of_attributes', 'quartile_1', float), + 'Quartile2MutualInformation': ('mutual_information_of_attributes', 'median', float), + 'Quartile3MutualInformation': ('mutual_information_of_attributes', 'quartile_3', float), + 'MinMeansOfNumericAtts': ('mean_of_attributes', 'min', float), + 'MaxMeansOfNumericAtts': ('mean_of_attributes', 'max', float), + 'MeanMeansOfNumericAtts': ('mean_of_attributes', 'mean', float), + 'Quartile1MeansOfNumericAtts': ('mean_of_attributes', 'quartile_1', float), + 'Quartile2MeansOfNumericAtts': ('mean_of_attributes', 'median', float), + 'Quartile3MeansOfNumericAtts': ('mean_of_attributes', 'quartile_3', float), + 'MaxStdDevOfNumericAtts': ('standard_deviation_of_attributes', 'max', float), + 'MinStdDevOfNumericAtts': ('standard_deviation_of_attributes', 'min', float), + 'MeanStdDevOfNumericAtts': ('standard_deviation_of_attributes', 'mean', float), + 'Quartile1StdDevOfNumericAtts': ('standard_deviation_of_attributes', 'quartile_1', float), + 'Quartile2StdDevOfNumericAtts': ('standard_deviation_of_attributes', 'median', float), + 'Quartile3StdDevOfNumericAtts': ('standard_deviation_of_attributes', 'quartile_3', float), + 'MinNominalAttDistinctValues': ('number_distinct_values_of_categorical_attributes', 'min', float), + 'MaxNominalAttDistinctValues': ('number_distinct_values_of_categorical_attributes', 'max', float), + 'MeanNominalAttDistinctValues': ('number_distinct_values_of_categorical_attributes', 'mean', float), + 'StdvNominalAttDistinctValues': ('number_distinct_values_of_categorical_attributes', 'std', float), + 'MinKurtosisOfNumericAtts': ('kurtosis_of_attributes', 'min', float), + 'MaxKurtosisOfNumericAtts': ('kurtosis_of_attributes', 'max', float), + 'MeanKurtosisOfNumericAtts': ('kurtosis_of_attributes', 'mean', float), + 'Quartile1KurtosisOfNumericAtts': ('kurtosis_of_attributes', 'quartile_1', float), + 'Quartile2KurtosisOfNumericAtts': ('kurtosis_of_attributes', 'median', float), + 'Quartile3KurtosisOfNumericAtts': ('kurtosis_of_attributes', 'quartile_3', float), +} + +OPENML_ID_REGEX = re.compile(r'^/d/(\d+)$') + +DEFAULT_DATETIME = datetime.datetime.fromtimestamp(0, tz=datetime.timezone.utc) + +if not ALL_D3M_SEMANTIC_TYPES <= metadata_base.ALL_SEMANTIC_TYPES: + raise ValueError("Not all D3M semantic types are defined in metadata.") + + +class ComputeDigest(utils.Enum): + """ + Enumeration of possible approaches to computing dataset digest. + """ + + NEVER = 'NEVER' + ONLY_IF_MISSING = 'ONLY_IF_MISSING' + ALWAYS = 'ALWAYS' + + +def _add_extension_dot(extension: str) -> str: + if not extension.startswith('.'): + return '.' + extension + return extension + + +def _remove_extension_dot(extension: str) -> str: + if extension.startswith('.'): + return extension[1:] + return extension + + +def parse_size(size_string: str) -> int: + number, unit = [string.strip() for string in size_string.split()] + return int(float(number) * UNITS[unit]) + + +def is_simple_boundary(semantic_types: typing.Tuple[str]) -> bool: + """ + A simple boundary is a column with only "https://metadata.datadrivendiscovery.org/types/Boundary" + semantic type and no other. + """ + + return 'https://metadata.datadrivendiscovery.org/types/Boundary' in semantic_types and not any(boundary_semantic_type in semantic_types for boundary_semantic_type in BOUNDARY_SEMANTIC_TYPES) + + +def update_digest(hash: typing.Any, file_path: str) -> None: + with open(file_path, 'rb') as file: + while True: + # Reading is buffered, so we can read smaller chunks. + chunk = file.read(hash.block_size) + if not chunk: + break + hash.update(chunk) + + +# This exists as a reference implementation for computing a digest of D3M dataset. +# Loader below does an equivalent computation as part of dataset loading process. +def get_d3m_dataset_digest(dataset_doc_path: str) -> str: + hash = hashlib.sha256() + + with open(dataset_doc_path, 'r', encoding='utf8') as dataset_doc_file: + dataset_doc = json.load(dataset_doc_file) + + dataset_path = os.path.dirname(dataset_doc_path) + + for data_resource in dataset_doc['dataResources']: + if data_resource.get('isCollection', False): + collection_path = os.path.join(dataset_path, data_resource['resPath']) + + # We assume that we can just concat "collection_path" with a value in the column. + assert collection_path[-1] == '/' + + for filename in utils.list_files(collection_path): + file_path = os.path.join(collection_path, filename) + + # We include both the filename and the content. + hash.update(os.path.join(data_resource['resPath'], filename).encode('utf8')) + update_digest(hash, file_path) + + else: + resource_path = os.path.join(dataset_path, data_resource['resPath']) + + # We include both the filename and the content. + hash.update(data_resource['resPath'].encode('utf8')) + update_digest(hash, resource_path) + + # We remove digest, if it exists in dataset description, before computing the digest over the rest. + dataset_doc['about'].pop('digest', None) + + # We add to hash also the dataset description, with sorted keys. + hash.update(json.dumps(dataset_doc, sort_keys=True).encode('utf8')) + + return hash.hexdigest() + + +class Loader(metaclass=utils.AbstractMetaclass): + """ + A base class for dataset loaders. + """ + + @abc.abstractmethod + def can_load(self, dataset_uri: str) -> bool: + """ + Return ``True`` if this loader can load a dataset from a given URI ``dataset_uri``. + + Parameters + ---------- + dataset_uri: + A URI to load a dataset from. + + Returns + ------- + ``True`` if this loader can load a dataset from ``dataset_uri``. + """ + + @abc.abstractmethod + def load(self, dataset_uri: str, *, dataset_id: str = None, dataset_version: str = None, dataset_name: str = None, lazy: bool = False, + compute_digest: ComputeDigest = ComputeDigest.ONLY_IF_MISSING, strict_digest: bool = False, handle_score_split: bool = True) -> 'Dataset': + """ + Loads the dataset at ``dataset_uri``. + + Parameters + ---------- + dataset_uri: + A URI to load. + dataset_id: + Override dataset ID determined by the loader. + dataset_version: + Override dataset version determined by the loader. + dataset_name: + Override dataset name determined by the loader. + lazy: + If ``True``, load only top-level metadata and not whole dataset. + compute_digest: + Compute a digest over the data? + strict_digest: + If computed digest does not match the one provided in metadata, raise an exception? + handle_score_split: + If a scoring dataset has target values in a separate file, merge them in? + + Returns + ------- + A loaded dataset. + """ + + +class Saver(metaclass=utils.AbstractMetaclass): + """ + A base class for dataset savers. + """ + + @abc.abstractmethod + def can_save(self, dataset_uri: str) -> bool: + """ + Return ``True`` if this saver can save a dataset to a given URI ``dataset_uri``. + + Parameters + ---------- + dataset_uri: + A URI to save a dataset to. + + Returns + ------- + ``True`` if this saver can save a dataset to ``dataset_uri``. + """ + + @abc.abstractmethod + def save(self, dataset: 'Dataset', dataset_uri: str, *, compute_digest: ComputeDigest = ComputeDigest.ALWAYS, preserve_metadata: bool = True) -> None: + """ + Saves the dataset ``dataset`` to ``dataset_uri``. + + Parameters + ---------- + dataset: + A dataset to save. + dataset_uri: + A URI to save to. + compute_digest: + Compute digest over the data when saving? + preserve_metadata: + When saving a dataset, store its metadata as well? + """ + + +class OpenMLDatasetLoader(Loader): + """ + A class for loading OpenML datasets. + """ + + def can_load(self, dataset_uri: str) -> bool: + try: + parsed_uri = url_parse.urlparse(dataset_uri) + except Exception: + return False + + if parsed_uri.scheme != 'https': + return False + + if 'www.openml.org' != parsed_uri.netloc: + return False + + if OPENML_ID_REGEX.search(parsed_uri.path) is None: + return False + + return True + + def _load_data(self, openml_dataset: openml.OpenMLDataset, resources: typing.Dict, metadata: metadata_base.DataMetadata) -> metadata_base.DataMetadata: + # OpenML package always computes digests when downloading data and checks them, failing if they do not match. + # See: https://github.com/openml/OpenML/issues/1027 + data, _, categorical_indicator, column_names = openml_dataset.get_data(include_row_id=True, include_ignore_attribute=True, dataset_format='dataframe') + + assert data.shape[1] == len(categorical_indicator) + assert data.shape[1] == len(column_names) + assert data.shape[1] == len(openml_dataset.features) + assert set(data.columns) == set(column_names) + + if openml_dataset.ignore_attribute: + if isinstance(openml_dataset.ignore_attribute, str): + ignore_columns = set(openml_dataset.ignore_attribute.split(',')) + else: + ignore_columns = set(openml_dataset.ignore_attribute) + else: + ignore_columns = set() + + assert ignore_columns <= set(column_names) + + if openml_dataset.default_target_attribute: + if isinstance(openml_dataset.default_target_attribute, str): + target_columns = set(openml_dataset.default_target_attribute.split(',')) + else: + target_columns = set(openml_dataset.default_target_attribute) + else: + target_columns = set() + + assert target_columns <= set(column_names) + + openml_column_data_types = {} + for i, column_name in enumerate(column_names): + openml_column_data_types[column_name] = openml_dataset.features[i].data_type + + assert (openml_column_data_types[column_name] == 'nominal' and categorical_indicator[i]) or (openml_column_data_types[column_name] != 'nominal' and not categorical_indicator[i]) + + # For nominal data types we store a list of possible values. + if openml_column_data_types[column_name] == 'nominal': + openml_column_data_types[column_name] = openml_dataset.features[i].nominal_values + + data = self._convert_categorical_columns(data, categorical_indicator) + + if openml_dataset.row_id_attribute: + assert openml_dataset.row_id_attribute in column_names + + row_id_column = openml_dataset.row_id_attribute + else: + assert 'd3mIndex' not in column_names + + # We do not update digest with new data generated here. This is OK because this data is determined by + # original data so original digest still applies. When saving a new digest has to be computed anyway + # because this data will have to be converted to string. + data.insert(0, 'd3mIndex', range(len(data))) + + column_names.insert(0, 'd3mIndex') + categorical_indicator = [False] + list(categorical_indicator) + openml_column_data_types['d3mIndex'] = 'integer' + row_id_column = 'd3mIndex' + + data = container_pandas.DataFrame(data) + + resources['learningData'] = data + metadata = metadata.update((), { + 'dimension': {'length': len(resources)}, + }) + + metadata = metadata.update(('learningData',), { + 'structural_type': type(data), + 'dimension': { + 'length': len(data) + }, + }) + metadata = metadata.update(('learningData', metadata_base.ALL_ELEMENTS), { + 'dimension': { + 'length': len(column_names) + }, + }) + + for column_index, column_name in enumerate(column_names): + column_metadata = { + 'semantic_types': [ + self._semantic_type(openml_column_data_types[column_name]), + ], + 'name': column_name, + } + + if column_name in target_columns: + column_metadata['semantic_types'].append('https://metadata.datadrivendiscovery.org/types/SuggestedTarget') + + if column_name == row_id_column: + column_metadata['semantic_types'].append('https://metadata.datadrivendiscovery.org/types/PrimaryKey') + elif column_name not in ignore_columns: + column_metadata['semantic_types'].append('https://metadata.datadrivendiscovery.org/types/Attribute') + + if utils.is_sequence(openml_column_data_types[column_name]): + # We convert all categorical columns into string columns. + column_metadata['structural_type'] = str + elif openml_column_data_types[column_name] == 'nominal': + raise exceptions.InvalidStateError("Nominal column data type which has not been converted to a list of values.") + elif openml_column_data_types[column_name] in ['string', 'date']: + column_metadata['structural_type'] = str + elif openml_column_data_types[column_name] == 'integer': + column_metadata['structural_type'] = int + else: + column_metadata['structural_type'] = float + + metadata = metadata.update(('learningData', metadata_base.ALL_ELEMENTS, column_index), column_metadata) + + metadata = metadata.set_table_metadata(at=('learningData',)) + + # Adding it here so that the order of semantic types is consistent between saving and loading of datasets. + metadata = metadata.add_semantic_type(('learningData',), 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint') + + return metadata + + def _get_dataset_metafeatures(self, openml_dataset: openml.OpenMLDataset) -> typing.Dict: + openml_qualities = openml_dataset.qualities or {} + metafeatures: typing.Dict = {} + + unknown_qualities = set(openml_qualities.keys()) - set(OPENML_QUALITY_MAP.keys()) - set(OPENML_QUALITY_AGGREGATE_MAP.keys()) - OPENML_IGNORED_QUALITIES + if unknown_qualities: + logger.warning("Unknown OpenML qualities in dataset %(dataset_id)s: %(unknown_qualities)s", { + 'dataset_id': openml_dataset.dataset_id, + 'unknown_qualities': sorted(unknown_qualities), + }) + + for quality_key, quality_value in openml_qualities.items(): + if numpy.isnan(quality_value): + continue + + if quality_key in OPENML_IGNORED_QUALITIES: + continue + + if quality_key in OPENML_QUALITY_MAP: + mapped_quality, quality_type = OPENML_QUALITY_MAP[quality_key] + + metafeatures[mapped_quality] = quality_type(quality_value) + + elif quality_key in OPENML_QUALITY_AGGREGATE_MAP: + mapped_quality, aggregate_key, quality_type = OPENML_QUALITY_AGGREGATE_MAP[quality_key] + + if mapped_quality not in metafeatures: + metafeatures[mapped_quality] = {} + + metafeatures[mapped_quality][aggregate_key] = quality_type(quality_value) + + # We warn about unknown qualities above. + + return metafeatures + + def _semantic_type(self, data_type: str) -> str: + if utils.is_sequence(data_type): + if len(data_type) == 2: + return 'http://schema.org/Boolean' + else: + return 'https://metadata.datadrivendiscovery.org/types/CategoricalData' + elif data_type == 'integer': + return 'http://schema.org/Integer' + elif data_type == 'real': + return 'http://schema.org/Float' + elif data_type == 'numeric': + return 'http://schema.org/Float' + elif data_type == 'string': + return 'http://schema.org/Text' + elif data_type == 'date': + return 'http://schema.org/DateTime' + else: + raise exceptions.UnexpectedValueError("Data type '{data_type}' is not supported.".format(data_type=data_type)) + + def _get_dataset_metadata(self, openml_dataset: openml.OpenMLDataset) -> typing.Dict: + """ + Returns OpenML only metadata converted to D3M metadata. It also computes digest using this metadata and expected data digest. + """ + + dataset_metadata: typing.Dict[str, typing.Any] = { + 'id': str(openml_dataset.dataset_id), + } + + if openml_dataset.name: + dataset_metadata['name'] = openml_dataset.name + if openml_dataset.description: + dataset_metadata['description'] = openml_dataset.description + if openml_dataset.version_label: + dataset_metadata['version'] = openml_dataset.version_label + if openml_dataset.tag: + dataset_metadata['keywords'] = openml_dataset.tag + + dataset_source: typing.Dict[str, typing.Any] = { + 'uris': [] + } + + if openml_dataset.creator: + dataset_source['name'] = openml_dataset.creator + if openml_dataset.licence: + dataset_source['license'] = openml_dataset.licence + if openml_dataset.citation: + dataset_source['citation'] = openml_dataset.citation + if openml_dataset.collection_date: + dataset_source['published'] = utils.datetime_for_json(dateutil.parser.parse(openml_dataset.collection_date, default=DEFAULT_DATETIME, fuzzy=True)) + if openml_dataset.openml_url or openml_dataset.url: + dataset_source['uris'].append(openml_dataset.openml_url or openml_dataset.url) + if openml_dataset.original_data_url: + dataset_source['uris'].append(openml_dataset.original_data_url) + if openml_dataset.paper_url: + dataset_source['uris'].append(openml_dataset.paper_url) + + if not dataset_source['uris']: + del dataset_source['uris'] + if dataset_source: + dataset_metadata['source'] = dataset_source + + if not openml_dataset.md5_checksum: + raise exceptions.UnexpectedValueError("OpenML dataset {id} does not have MD5 checksum.".format(id=openml_dataset.dataset_id)) + + dataset_metadata['digest'] = utils.compute_digest(dataset_metadata, openml_dataset.md5_checksum.encode('utf8')) + + return dataset_metadata + + def _convert_categorical_columns(self, data: pandas.DataFrame, categorical_indicator: typing.List[bool]) -> pandas.DataFrame: + """ + Converts categorical DataFrame columns to str columns. In D3M pipelines generally expect categorical + columns to be encoded as strings and only later the pipeline encodes them in some way. + """ + + for column_index, is_categorical in enumerate(categorical_indicator): + if not is_categorical: + continue + + column_name = data.columns[column_index] + + data[column_name] = data[column_name].astype(str) + + return data + + # "strict_digest" and "compute_digest" are ignored because OpenML package always computes digests when downloading data + # and checks them, failing if they do not match. See: https://github.com/openml/OpenML/issues/1027 + # "handle_score_split" is ignored. + def load(self, dataset_uri: str, *, dataset_id: str = None, dataset_version: str = None, dataset_name: str = None, lazy: bool = False, + compute_digest: ComputeDigest = ComputeDigest.ONLY_IF_MISSING, strict_digest: bool = False, handle_score_split: bool = True) -> 'Dataset': + assert self.can_load(dataset_uri) + + parsed_uri = url_parse.urlparse(dataset_uri, allow_fragments=False) + dataset_path_id = OPENML_ID_REGEX.search(parsed_uri.path)[1] + + try: + # We download just metadata first. + openml_dataset = openml.datasets.get_dataset(dataset_path_id, download_data=False) + except openml.exceptions.OpenMLServerException as error: + raise exceptions.DatasetNotFoundError( + "OpenML dataset '{dataset_uri}' cannot be found.".format(dataset_uri=dataset_uri), + ) from error + + # This converts OpenML dataset metadata to D3M dataset metadata. + dataset_metadata = self._get_dataset_metadata(openml_dataset) + + assert dataset_metadata['id'] == dataset_path_id + + # Use overrides if provided. Digest is not computed over those changes on purpose. + if dataset_id is not None: + dataset_metadata['id'] = dataset_id + if dataset_version is not None: + dataset_metadata['version'] = dataset_version + if dataset_name is not None: + dataset_metadata['name'] = dataset_name + + # Other standard metadata. + dataset_metadata.update({ + 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, + 'structural_type': Dataset, + 'location_uris': [ + dataset_uri, + ], + 'dimension': { + 'name': 'resources', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'], + 'length': 0, + }, + }) + + dataset_metafeatures = self._get_dataset_metafeatures(openml_dataset) + if dataset_metafeatures: + # We set metafeatures on the top level even if otherwise in D3M we set metafeatures at the resource level or + # even target column level, but setting them here allows one to access them in the lazy mode (when there are + # no resources yet in the dataset). We also do not include them into a digest because for D3M datasets + # the digest is just about the stored files of the dataset and not any additional metadata added by the loader. + dataset_metadata['data_metafeatures'] = dataset_metafeatures + + resources: typing.Dict = {} + metadata = metadata_base.DataMetadata(dataset_metadata) + + if not lazy: + load_lazy = None + + metadata = self._load_data( + openml_dataset, resources, metadata, + ) + + else: + def load_lazy(dataset: Dataset) -> None: + # "dataset" can be used as "resources", it is a dict of values. + dataset.metadata = self._load_data( + openml_dataset, dataset, dataset.metadata, + ) + + dataset._load_lazy = None + + return Dataset(resources, metadata, load_lazy=load_lazy) + + +class D3MDatasetLoader(Loader): + """ + A class for loading of D3M datasets. + + Loader support only loading from a local file system. + URI should point to the ``datasetDoc.json`` file in the D3M dataset directory. + """ + + SUPPORTED_VERSIONS = {'3.0', '3.1', '3.1.1', '3.1.2', '3.2.0', '3.2.1', '3.3.0', '3.3.1', '4.0.0', '4.1.0'} + + def can_load(self, dataset_uri: str) -> bool: + try: + parsed_uri = url_parse.urlparse(dataset_uri, allow_fragments=False) + except Exception: + return False + + if parsed_uri.scheme != 'file': + return False + + if parsed_uri.netloc not in ['', 'localhost']: + return False + + if not parsed_uri.path.startswith('/'): + return False + + if os.path.basename(parsed_uri.path) != 'datasetDoc.json': + return False + + return True + + def _load_data(self, resources: typing.Dict, metadata: metadata_base.DataMetadata, *, dataset_path: str, dataset_doc: typing.Dict, + dataset_id: typing.Optional[str], dataset_digest: typing.Optional[str], + compute_digest: ComputeDigest, strict_digest: bool, handle_score_split: bool) -> typing.Tuple[metadata_base.DataMetadata, typing.Optional[str]]: + # Allowing "True" for backwards compatibility. + if compute_digest is True or compute_digest == ComputeDigest.ALWAYS or (compute_digest == ComputeDigest.ONLY_IF_MISSING and dataset_digest is None): + hash = hashlib.sha256() + else: + hash = None + + for data_resource in dataset_doc['dataResources']: + if data_resource.get('isCollection', False): + resources[data_resource['resID']], metadata = self._load_collection(dataset_path, data_resource, metadata, hash) + else: + loader = getattr(self, '_load_resource_type_{resource_type}'.format(resource_type=data_resource['resType']), None) + if loader is None: + raise exceptions.NotSupportedError("Resource type '{resource_type}' is not supported.".format(resource_type=data_resource['resType'])) + + resources[data_resource['resID']], metadata = loader(dataset_path, data_resource, metadata, hash) + + # Backwards compatibility. If there is no resource marked as a dataset entry point, + # check if there is any resource with a suitable filename. + for data_resource in dataset_doc['dataResources']: + if metadata.has_semantic_type((data_resource['resID'],), 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint'): + break + else: + for data_resource in dataset_doc['dataResources']: + if os.path.splitext(os.path.basename(data_resource['resPath']))[0] == 'learningData': + metadata = metadata.add_semantic_type((data_resource['resID'],), 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint') + + # Handle a special case for SCORE dataset splits (those which have "targets.csv" file). + # They are the same as TEST dataset splits, but we present them differently, so that + # SCORE dataset splits have targets as part of data. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/176 + if handle_score_split and os.path.exists(os.path.join(dataset_path, '..', 'targets.csv')): + self._merge_score_targets(resources, metadata, dataset_path, hash) + + if hash is not None: + # We remove digest, if it exists in dataset description, before computing the digest over the rest. + # We modify "dataset_doc" here, but this is OK, we do not need it there anymore at this point. + dataset_doc['about'].pop('digest', None) + + # We add to hash also the dataset description, with sorted keys. + hash.update(json.dumps(dataset_doc, sort_keys=True).encode('utf8')) + + new_dataset_digest = hash.hexdigest() + + if dataset_digest is not None and dataset_digest != new_dataset_digest: + if strict_digest: + raise exceptions.DigestMismatchError( + "Digest for dataset '{dataset_id}' does not match one from dataset description. Dataset description digest: {dataset_digest}. Computed digest: {new_dataset_digest}.".format( + dataset_id=dataset_id or dataset_doc['about']['datasetID'], + dataset_digest=dataset_digest, + new_dataset_digest=new_dataset_digest, + ) + ) + else: + logger.warning( + "Digest for dataset '%(dataset_id)s' does not match one from dataset description. Dataset description digest: %(dataset_digest)s. Computed digest: %(new_dataset_digest)s.", + { + 'dataset_id': dataset_id or dataset_doc['about']['datasetID'], + 'dataset_digest': dataset_digest, + 'new_dataset_digest': new_dataset_digest, + }, + ) + else: + new_dataset_digest = dataset_doc['about'].get('digest', None) + + return metadata, new_dataset_digest + + def load(self, dataset_uri: str, *, dataset_id: str = None, dataset_version: str = None, dataset_name: str = None, lazy: bool = False, + compute_digest: ComputeDigest = ComputeDigest.ONLY_IF_MISSING, strict_digest: bool = False, handle_score_split: bool = True) -> 'Dataset': + assert self.can_load(dataset_uri) + + parsed_uri = url_parse.urlparse(dataset_uri, allow_fragments=False) + + dataset_doc_path = parsed_uri.path + dataset_path = os.path.dirname(dataset_doc_path) + + try: + with open(dataset_doc_path, 'r', encoding='utf8') as dataset_doc_file: + dataset_doc = json.load(dataset_doc_file) + except FileNotFoundError as error: + raise exceptions.DatasetNotFoundError( + "D3M dataset '{dataset_uri}' cannot be found.".format(dataset_uri=dataset_uri), + ) from error + + dataset_schema_version = dataset_doc.get('about', {}).get('datasetSchemaVersion', '3.3.0') + if dataset_schema_version not in self.SUPPORTED_VERSIONS: + logger.warning("Loading a dataset with unsupported schema version '%(version)s'. Supported versions: %(supported_versions)s", { + 'version': dataset_schema_version, + 'supported_versions': self.SUPPORTED_VERSIONS, + }) + + # We do not compute digest here, but we use one from dataset description if it exist. + # This is different from other loaders which compute digest when lazy loading and check + # it after data is finally loaded to make sure data has not changed in meantime. + dataset_digest = dataset_doc['about'].get('digest', None) + + resources: typing.Dict = {} + metadata = metadata_base.DataMetadata() + + metadata = self._load_top_qualities(dataset_doc, metadata) + + if not lazy: + load_lazy = None + + metadata = self._load_data_qualities(dataset_doc, metadata) + + metadata, dataset_digest = self._load_data( + resources, metadata, dataset_path=dataset_path, dataset_doc=dataset_doc, dataset_id=dataset_id, + dataset_digest=dataset_digest, compute_digest=compute_digest, strict_digest=strict_digest, + handle_score_split=handle_score_split, + ) + + else: + def load_lazy(dataset: Dataset) -> None: + nonlocal dataset_digest + + dataset.metadata = self._load_data_qualities(dataset_doc, dataset.metadata) + + # "dataset" can be used as "resources", it is a dict of values. + dataset.metadata, dataset_digest = self._load_data( + dataset, dataset.metadata, dataset_path=dataset_path, dataset_doc=dataset_doc, dataset_id=dataset_id, + dataset_digest=dataset_digest, compute_digest=compute_digest, strict_digest=strict_digest, + handle_score_split=handle_score_split, + ) + + new_metadata = { + 'dimension': {'length': len(dataset)}, + } + + if dataset_digest is not None: + new_metadata['digest'] = dataset_digest + + dataset.metadata = dataset.metadata.update((), new_metadata) + dataset.metadata = dataset.metadata.generate(dataset) + + dataset._load_lazy = None + + document_dataset_id = dataset_doc['about']['datasetID'] + # Handle a special case for SCORE dataset splits (those which have "targets.csv" file). + # They are the same as TEST dataset splits, but we present them differently, so that + # SCORE dataset splits have targets as part of data. Because of this we also update + # corresponding dataset ID. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/176 + if handle_score_split and os.path.exists(os.path.join(dataset_path, '..', 'targets.csv')) and document_dataset_id.endswith('_TEST'): + document_dataset_id = document_dataset_id[:-5] + '_SCORE' + + dataset_metadata = { + 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, + 'structural_type': Dataset, + 'id': dataset_id or document_dataset_id, + 'name': dataset_name or dataset_doc['about']['datasetName'], + 'dimension': { + 'name': 'resources', + 'length': len(resources), + }, + } + + if dataset_version or dataset_doc['about'].get('datasetVersion', None): + dataset_metadata['version'] = dataset_version or dataset_doc['about']['datasetVersion'] + + if dataset_digest is not None: + dataset_metadata['digest'] = dataset_digest + + if dataset_doc['about'].get('description', None): + dataset_metadata['description'] = dataset_doc['about']['description'] + + if dataset_doc['about'].get('approximateSize', None): + try: + dataset_metadata['approximate_stored_size'] = parse_size(dataset_doc['about']['approximateSize']) + except Exception as error: + raise ValueError("Unable to parse 'approximateSize': {approximate_size}".format(approximate_size=dataset_doc['about']['approximateSize'])) from error + + dataset_source = {} + + if 'redacted' in dataset_doc['about']: + dataset_source['redacted'] = dataset_doc['about']['redacted'] + + # "license" is often an empty string and in that case we do not want + # really to set the field in dataset metadata. + if dataset_doc['about'].get('license', None): + dataset_source['license'] = dataset_doc['about']['license'] + + if 'humanSubjectsResearch' in dataset_doc['about']: + dataset_source['human_subjects_research'] = dataset_doc['about']['humanSubjectsResearch'] + + if dataset_doc['about'].get('source', None): + dataset_source['name'] = dataset_doc['about']['source'] + + if dataset_doc['about'].get('citation', None): + dataset_source['citation'] = dataset_doc['about']['citation'] + + if dataset_doc['about'].get('publicationDate', None): + try: + dataset_source['published'] = utils.datetime_for_json(dateutil.parser.parse(dataset_doc['about']['publicationDate'], default=DEFAULT_DATETIME, fuzzy=True)) + except Exception as error: + raise ValueError("Unable to parse 'publicationDate': {publication_date}".format(publication_date=dataset_doc['about']['publicationDate'])) from error + + if dataset_source: + dataset_metadata['source'] = dataset_source + + metadata = metadata.update((), dataset_metadata) + + # We reconstruct the URI to normalize it. + location_uri = utils.fix_uri(dataset_doc_path) + location_uris = list(metadata.query(()).get('location_uris', [])) + if location_uri not in location_uris: + location_uris.insert(0, location_uri) + metadata = metadata.update((), {'location_uris': location_uris}) + + if dataset_doc['about'].get('datasetURI', None) and dataset_doc['about']['datasetURI'] not in location_uris: + location_uris.append(dataset_doc['about']['datasetURI']) + metadata = metadata.update((), {'location_uris': location_uris}) + + semantic_types = list(metadata.query(()).get('dimension', {}).get('semantic_types', [])) + if 'https://metadata.datadrivendiscovery.org/types/DatasetResource' not in semantic_types: + semantic_types.append('https://metadata.datadrivendiscovery.org/types/DatasetResource') + metadata = metadata.update((), {'dimension': {'semantic_types': semantic_types}}) + + source_uris = list(metadata.query(()).get('source', {}).get('uris', [])) + if dataset_doc['about'].get('sourceURI', None) and dataset_doc['about']['sourceURI'] not in source_uris: + source_uris.insert(0, dataset_doc['about']['sourceURI']) + metadata = metadata.update((), {'source': {'uris': source_uris}}) + + keywords = list(metadata.query(()).get('keywords', [])) + if dataset_doc['about'].get('applicationDomain', None) and dataset_doc['about']['applicationDomain'] not in keywords: + # Application domain has no vocabulary specified so we map it to keywords. + keywords.append(dataset_doc['about']['applicationDomain']) + metadata.update((), {'keywords': keywords}) + + return Dataset(resources, metadata, load_lazy=load_lazy) + + def _load_top_qualities(self, dataset_doc: typing.Dict, metadata: metadata_base.DataMetadata) -> metadata_base.DataMetadata: + ALL_ELEMENTS_REPR = repr(metadata_base.ALL_ELEMENTS) + + for quality in dataset_doc.get('qualities', []): + restricted_to = quality.get('restrictedTo', {}) + + # D3M metadata stored as D3M qualities. + if quality['qualName'] == 'metadata': + if restricted_to['resID'] == '': + selector: metadata_base.TupleSelector = () + else: + # Here we load only top-level metadata. + continue + + # TODO: Optimize, see: https://gitlab.com/datadrivendiscovery/d3m/issues/408 + metadata = metadata.update(selector, utils.from_reversible_json_structure(quality['qualValue'])) + + return metadata + + def _load_data_qualities(self, dataset_doc: typing.Dict, metadata: metadata_base.DataMetadata) -> metadata_base.DataMetadata: + ALL_ELEMENTS_REPR = repr(metadata_base.ALL_ELEMENTS) + + for quality in dataset_doc.get('qualities', []): + restricted_to = quality.get('restrictedTo', {}) + + # D3M metadata stored as D3M qualities. + if quality['qualName'] == 'metadata': + if restricted_to['resID'] == '': + # Here we load only non top-level metadata. + continue + else: + resource_selector = [metadata_base.ALL_ELEMENTS if segment == ALL_ELEMENTS_REPR else segment for segment in restricted_to['resComponent']['selector']] + selector: metadata_base.TupleSelector = (restricted_to['resID'], *resource_selector) + + # TODO: Optimize, see: https://gitlab.com/datadrivendiscovery/d3m/issues/408 + metadata = metadata.update(selector, utils.from_reversible_json_structure(quality['qualValue'])) + + # An alternative way to describe LUPI datasets using D3M qualities. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/61 + # https://gitlab.com/datadrivendiscovery/d3m/issues/225 + elif quality['qualName'] == 'privilegedFeature': + if quality['qualValue'] != 'True': + continue + + column_index = restricted_to.get('resComponent', {}).get('columnIndex', None) + if column_index is not None: + metadata = self._add_semantic_type_for_column_index(metadata, restricted_to['resID'], column_index, 'https://metadata.datadrivendiscovery.org/types/SuggestedPrivilegedData') + continue + + column_name = restricted_to.get('resComponent', {}).get('columnName', None) + if column_name is not None: + metadata = self._add_semantic_type_for_column_name(metadata, restricted_to['resID'], column_name, 'https://metadata.datadrivendiscovery.org/types/SuggestedPrivilegedData') + continue + + return metadata + + def _add_semantic_type_for_column_index(self, metadata: metadata_base.DataMetadata, resource_id: str, column_index: int, semantic_type: str) -> metadata_base.DataMetadata: + return metadata.add_semantic_type((resource_id, metadata_base.ALL_ELEMENTS, column_index), semantic_type) + + def _add_semantic_type_for_column_name(self, metadata: metadata_base.DataMetadata, resource_id: str, column_name: str, semantic_type: str) -> metadata_base.DataMetadata: + column_index = metadata.get_column_index_from_column_name(column_name, at=(resource_id,)) + + return self._add_semantic_type_for_column_index(metadata, resource_id, column_index, semantic_type) + + def _load_collection(self, dataset_path: str, data_resource: typing.Dict, metadata: metadata_base.DataMetadata, + hash: typing.Any) -> typing.Tuple[container_pandas.DataFrame, metadata_base.DataMetadata]: + assert data_resource.get('isCollection', False) + + collection_path = os.path.join(dataset_path, data_resource['resPath']) + + media_types_with_extensions = {} + # Legacy (before v4.0.0). We obtain a list of file extensions from the global list of file extensions. + if utils.is_sequence(data_resource['resFormat']): + for format in data_resource['resFormat']: + format_media_type = MEDIA_TYPES[format] + media_types_with_extensions[format_media_type] = [_add_extension_dot(extension) for extension in FILE_EXTENSIONS_REVERSE[format_media_type]] + else: + for format, extensions in data_resource['resFormat'].items(): + # We allow unknown formats, hoping that they are proper media types already. + format_media_type = MEDIA_TYPES.get(format, format) + # We do not really care if file extensions are not on the global list of file extensions. + media_types_with_extensions[format_media_type] = [_add_extension_dot(extension) for extension in extensions] + + all_media_types_set = set(media_types_with_extensions.keys()) + + reverse_media_types_with_extensions: typing.Dict[str, str] = {} + for media_type, extensions in media_types_with_extensions.items(): + for extension in extensions: + if extension in reverse_media_types_with_extensions: + raise exceptions.InvalidDatasetError("Conflicting file extension '{file_extension}': {media_type1} and {media_type2}".format( + file_extension=extension, + media_type1=reverse_media_types_with_extensions[extension], + media_type2=media_type, + )) + + reverse_media_types_with_extensions[extension] = media_type + + filenames = [] + media_types = [] + + for filename in utils.list_files(collection_path): + file_path = os.path.join(collection_path, filename) + + filename_extension = os.path.splitext(filename)[1] + + filenames.append(filename) + + try: + media_type = reverse_media_types_with_extensions[filename_extension] + except KeyError as error: + raise TypeError("Unable to determine a media type for the file extension of file '{filename}'.".format(filename=filename)) from error + + media_types.append(media_type) + + if hash is not None: + # We include both the filename and the content. + hash.update(os.path.join(data_resource['resPath'], filename).encode('utf8')) + update_digest(hash, file_path) + + data = container_pandas.DataFrame({'filename': filenames}, columns=['filename'], dtype=object) + + metadata = metadata.update((data_resource['resID'],), { + 'structural_type': type(data), + 'semantic_types': [ + 'https://metadata.datadrivendiscovery.org/types/Table', + 'https://metadata.datadrivendiscovery.org/types/FilesCollection', + ], + 'dimension': { + 'name': 'rows', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], + 'length': len(data), + }, + }) + + metadata = metadata.update((data_resource['resID'], metadata_base.ALL_ELEMENTS), { + 'dimension': { + 'name': 'columns', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], + 'length': 1, + }, + }) + + location_base_uri = utils.fix_uri(collection_path) + # We want to make sure you can just concat with the filename. + if not location_base_uri.endswith('/'): + location_base_uri += '/' + + media_types_set = set(media_types) + + extra_media_types = all_media_types_set - media_types_set + if extra_media_types: + logger.warning("File collection '%(resource_id)s' claims more file formats than are used in files. Extraneous formats: %(formats)s", { + 'resource_id': data_resource['resID'], + 'formats': [MEDIA_TYPES_REVERSE.get(format, format) for format in sorted(extra_media_types)], + }) + + # Normalize the list based on real media types used. + all_media_types = sorted(media_types_set) + + column_metadata = { + 'name': 'filename', + 'structural_type': str, + 'location_base_uris': [ + location_base_uri, + ], + # A superset of all media types of files in this collection. + 'media_types': all_media_types, + 'semantic_types': [ + 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', + 'https://metadata.datadrivendiscovery.org/types/FileName', + D3M_RESOURCE_TYPE_CONSTANTS_TO_SEMANTIC_TYPES[data_resource['resType']], + ], + } + + if data_resource.get('columns', None): + columns_metadata = [] + + for column in data_resource['columns']: + columns_metadata.append(self._get_column_metadata(column)) + columns_metadata[-1]['column_index'] = column['colIndex'] + columns_metadata[-1]['column_name'] = column['colName'] + + column_metadata['file_columns'] = columns_metadata + + if data_resource.get('columnsCount', None) is not None: + column_metadata['file_columns_count'] = data_resource['columnsCount'] + + metadata = metadata.update((data_resource['resID'], metadata_base.ALL_ELEMENTS, 0), column_metadata) + + # If there are different rows with different media types, we have to set + # on each row which media type it is being used. + if len(all_media_types) > 1: + # The following modifies metadata for rows directly instead of through metadata methods + # to achieve useful performance because some datasets contain many files which means many + # rows have their "media_types" set. Setting it one by one makes things to slow. + # Here we are taking advantage of quite few assumptions: we are modifying metadata in-place + # because we know it is only us having a reference to it, we directly set metadata for + # rows because we know no other metadata exists for rows, moreover, we also know no other + # metadata exists for rows through any higher ALL_ELEMENTS. + # TODO: Expose this as a general metadata method. + # TODO: Or just optimize, see: https://gitlab.com/datadrivendiscovery/d3m/issues/408 + + resource_metadata_entry = metadata._current_metadata.elements[data_resource['resID']] + resource_row_elements_evolver = resource_metadata_entry.elements.evolver() + resource_row_elements_evolver._reallocate(2 * len(media_types)) + for i, media_type in enumerate(media_types): + column_metadata_entry = metadata_base.MetadataEntry( + metadata=frozendict.FrozenOrderedDict({ + # A media type of this particular file. + 'media_types': (media_type,), + }), + is_empty=False, + ) + + row_metadata_entry = metadata_base.MetadataEntry( + elements=utils.EMPTY_PMAP.set(0, column_metadata_entry), + is_empty=False, + is_elements_empty=False, + ) + + resource_row_elements_evolver.set(i, row_metadata_entry) + + resource_metadata_entry.elements = resource_row_elements_evolver.persistent() + resource_metadata_entry.is_elements_empty = not resource_metadata_entry.elements + resource_metadata_entry.update_is_empty() + + return data, metadata + + def _load_resource_type_table(self, dataset_path: str, data_resource: typing.Dict, metadata: metadata_base.DataMetadata, + hash: typing.Any) -> typing.Tuple[container_pandas.DataFrame, metadata_base.DataMetadata]: + assert not data_resource.get('isCollection', False) + + data = None + column_names = None + data_path = os.path.join(dataset_path, data_resource['resPath']) + + if utils.is_sequence(data_resource['resFormat']) and len(data_resource['resFormat']) == 1: + resource_format = data_resource['resFormat'][0] + elif isinstance(data_resource['resFormat'], typing.Mapping) and len(data_resource['resFormat']) == 1: + resource_format = list(data_resource['resFormat'].keys())[0] + else: + resource_format = None + + if resource_format in ['text/csv', 'text/csv+gzip']: + data = pandas.read_csv( + data_path, + # We do not want to do any conversion of values at this point. + # This should be done by primitives later on. + dtype=str, + # We always expect one row header. + header=0, + # We want empty strings and not NaNs. + na_filter=False, + compression='gzip' if resource_format == 'text/csv+gzip' else None, + encoding='utf8', + low_memory=False, + memory_map=True, + ) + + column_names = list(data.columns) + + if data_resource.get('columnsCount', None) is not None and len(column_names) != data_resource['columnsCount']: + raise ValueError("Mismatch between columns count in data {data_count} and expected count {expected_count}.".format( + data_count=len(column_names), + expected_count=data_resource['columnsCount'], + )) + + if hash is not None: + # We include both the filename and the content. + # TODO: Currently we read the file twice, once for reading and once to compute digest. Could we do it in one pass? Would it make it faster? + hash.update(data_resource['resPath'].encode('utf8')) + update_digest(hash, data_path) + + else: + raise exceptions.NotSupportedError("Resource format '{resource_format}' for table '{resource_path}' is not supported.".format( + resource_format=data_resource['resFormat'], + resource_path=data_resource['resPath'], + )) + + if data is None: + raise FileNotFoundError("Data file for table '{resource_path}' cannot be found.".format( + resource_path=data_resource['resPath'], + )) + + data = container_pandas.DataFrame(data) + + assert column_names is not None + + semantic_types = [D3M_RESOURCE_TYPE_CONSTANTS_TO_SEMANTIC_TYPES[data_resource['resType']]] + + if data_resource['resID'] == 'learningData': + semantic_types.append('https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint') + + metadata = metadata.update((data_resource['resID'],), { + 'structural_type': type(data), + 'semantic_types': semantic_types, + 'dimension': { + 'name': 'rows', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], + 'length': len(data), + }, + }) + + metadata = metadata.update((data_resource['resID'], metadata_base.ALL_ELEMENTS), { + 'dimension': { + 'name': 'columns', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], + 'length': len(column_names), + }, + }) + + for i, column_name in enumerate(column_names): + metadata = metadata.update((data_resource['resID'], metadata_base.ALL_ELEMENTS, i), { + 'name': column_name, + 'structural_type': str, + }) + + metadata_columns = {} + for column in data_resource.get('columns', []): + metadata_columns[column['colIndex']] = column + + for i in range(len(column_names)): + if i in metadata_columns: + if column_names[i] != metadata_columns[i]['colName']: + raise ValueError("Mismatch between column name in data '{data_name}' and column name in metadata '{metadata_name}'.".format( + data_name=column_names[i], + metadata_name=metadata_columns[i]['colName'], + )) + + column_metadata = self._get_column_metadata(metadata_columns[i]) + else: + column_metadata = { + 'semantic_types': [ + D3M_COLUMN_TYPE_CONSTANTS_TO_SEMANTIC_TYPES['unknown'], + ], + } + + if 'https://metadata.datadrivendiscovery.org/types/Boundary' in column_metadata['semantic_types'] and 'boundary_for' not in column_metadata: + # Let's reconstruct for which column this is a boundary: currently + # this seems to be the first non-boundary column before this one. + for column_index in range(i - 1, 0, -1): + column_semantic_types = metadata.query((data_resource['resID'], metadata_base.ALL_ELEMENTS, column_index)).get('semantic_types', ()) + if 'https://metadata.datadrivendiscovery.org/types/Boundary' not in column_semantic_types: + column_metadata['boundary_for'] = { + 'resource_id': data_resource['resID'], + 'column_index': column_index, + } + break + + metadata = metadata.update((data_resource['resID'], metadata_base.ALL_ELEMENTS, i), column_metadata) + + current_boundary_start = None + current_boundary_list: typing.Tuple[str, ...] = None + column_index = 0 + while column_index < len(column_names): + column_semantic_types = metadata.query((data_resource['resID'], metadata_base.ALL_ELEMENTS, column_index)).get('semantic_types', ()) + if is_simple_boundary(column_semantic_types): + # Let's reconstruct which type of a boundary this is. Heuristic is simple. + # If there are two boundary columns next to each other, it is an interval. + if current_boundary_start is None: + assert current_boundary_list is None + + count = 1 + for next_column_index in range(column_index + 1, len(column_names)): + if is_simple_boundary(metadata.query((data_resource['resID'], metadata_base.ALL_ELEMENTS, next_column_index)).get('semantic_types', ())): + count += 1 + else: + break + + if count == 2: + current_boundary_start = column_index + current_boundary_list = INTERVAL_SEMANTIC_TYPES + else: + # Unsupported group of boundary columns, let's skip them all. + column_index += count + continue + + column_semantic_types = column_semantic_types + (current_boundary_list[column_index - current_boundary_start],) + metadata = metadata.update((data_resource['resID'], metadata_base.ALL_ELEMENTS, column_index), { + 'semantic_types': column_semantic_types, + }) + + if column_index - current_boundary_start + 1 == len(current_boundary_list): + current_boundary_start = None + current_boundary_list = None + + column_index += 1 + + return data, metadata + + def _load_resource_type_edgeList(self, dataset_path: str, data_resource: typing.Dict, metadata: metadata_base.DataMetadata, + hash: typing.Any) -> typing.Tuple[container_pandas.DataFrame, metadata_base.DataMetadata]: + assert not data_resource.get('isCollection', False) + + return self._load_resource_type_table(dataset_path, data_resource, metadata, hash) + + def _load_resource_type_graph( + self, dataset_path: str, data_resource: typing.Dict, metadata: metadata_base.DataMetadata, hash: typing.Any, + ) -> typing.Tuple[container_pandas.DataFrame, metadata_base.DataMetadata]: + assert not data_resource.get('isCollection', False) + + data_path = os.path.join(dataset_path, data_resource['resPath']) + collection_path = os.path.dirname(data_path) + filename = os.path.basename(data_path) + filename_extension = os.path.splitext(filename)[1] + + try: + media_type = FILE_EXTENSIONS[filename_extension] + except KeyError as error: + raise TypeError("Unsupported file extension for file '{filename}'.".format(filename=filename)) from error + + if hash is not None: + # We include both the filename and the content. + hash.update(data_resource['resPath'].encode('utf8')) + update_digest(hash, data_path) + + data = container_pandas.DataFrame({'filename': [filename]}, columns=['filename'], dtype=object) + + metadata = metadata.update((data_resource['resID'],), { + 'structural_type': type(data), + 'semantic_types': [ + 'https://metadata.datadrivendiscovery.org/types/Table', + 'https://metadata.datadrivendiscovery.org/types/FilesCollection', + ], + 'dimension': { + 'name': 'rows', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], + 'length': len(data), + }, + }) + + metadata = metadata.update((data_resource['resID'], metadata_base.ALL_ELEMENTS), { + 'dimension': { + 'name': 'columns', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], + 'length': 1, + }, + }) + + location_base_uri = utils.fix_uri(collection_path) + # We want to make sure you can just concat with the filename. + if not location_base_uri.endswith('/'): + location_base_uri += '/' + + column_metadata = { + 'name': 'filename', + 'structural_type': str, + 'location_base_uris': [ + location_base_uri, + ], + 'media_types': [media_type], + 'semantic_types': [ + 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', + 'https://metadata.datadrivendiscovery.org/types/FileName', + D3M_RESOURCE_TYPE_CONSTANTS_TO_SEMANTIC_TYPES[data_resource['resType']], + ], + } + + metadata = metadata.update((data_resource['resID'], metadata_base.ALL_ELEMENTS, 0), column_metadata) + + return data, metadata + + def _get_column_metadata(self, column: typing.Dict) -> typing.Dict: + semantic_types = [D3M_COLUMN_TYPE_CONSTANTS_TO_SEMANTIC_TYPES[column['colType']]] + + for role in column['role']: + semantic_types.append(D3M_ROLE_CONSTANTS_TO_SEMANTIC_TYPES[role]) + + # Suggested target is an attribute by default. + if 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget' in semantic_types and 'https://metadata.datadrivendiscovery.org/types/Attribute' not in semantic_types: + semantic_types.append('https://metadata.datadrivendiscovery.org/types/Attribute') + + # Suggested privileged data is an attribute by default. + if 'https://metadata.datadrivendiscovery.org/types/SuggestedPrivilegedData' in semantic_types and 'https://metadata.datadrivendiscovery.org/types/Attribute' not in semantic_types: + semantic_types.append('https://metadata.datadrivendiscovery.org/types/Attribute') + + column_metadata: typing.Dict[str, typing.Any] = { + 'semantic_types': semantic_types, + } + + if column.get('colDescription', None): + column_metadata['description'] = column['colDescription'] + + if column.get('refersTo', None): + if isinstance(column['refersTo']['resObject'], str): + if column['refersTo']['resObject'] == 'item': + # We represent collections as a table with one column of filenames. + column_metadata['foreign_key'] = { + 'type': 'COLUMN', + 'resource_id': column['refersTo']['resID'], + 'column_index': 0, + } + # Legacy (before v4.0.0) node reference. + elif column['refersTo']['resObject'] == 'node': + column_metadata['foreign_key'] = { + 'type': 'NODE_ATTRIBUTE', + 'resource_id': column['refersTo']['resID'], + 'node_attribute': 'nodeID', + } + # Legacy (before v4.0.0) edge reference. + elif column['refersTo']['resObject'] == 'edge': + column_metadata['foreign_key'] = { + 'type': 'EDGE_ATTRIBUTE', + 'resource_id': column['refersTo']['resID'], + 'edge_attribute': 'edgeID', + } + else: + raise exceptions.UnexpectedValueError("Unknown \"resObject\" value: {resource_object}".format(resource_object=column['refersTo']['resObject'])) + else: + if 'columnIndex' in column['refersTo']['resObject']: + if 'https://metadata.datadrivendiscovery.org/types/Boundary' in semantic_types: + column_metadata['boundary_for'] = { + 'resource_id': column['refersTo']['resID'], + 'column_index': column['refersTo']['resObject']['columnIndex'], + } + else: + column_metadata['foreign_key'] = { + 'type': 'COLUMN', + 'resource_id': column['refersTo']['resID'], + 'column_index': column['refersTo']['resObject']['columnIndex'], + } + elif 'columnName' in column['refersTo']['resObject']: + if 'https://metadata.datadrivendiscovery.org/types/Boundary' in semantic_types: + column_metadata['boundary_for'] = { + 'resource_id': column['refersTo']['resID'], + 'column_name': column['refersTo']['resObject']['columnName'], + } + else: + column_metadata['foreign_key'] = { + 'type': 'COLUMN', + 'resource_id': column['refersTo']['resID'], + 'column_name': column['refersTo']['resObject']['columnName'], + } + elif 'nodeAttribute' in column['refersTo']['resObject']: + column_metadata['foreign_key'] = { + 'type': 'NODE_ATTRIBUTE', + 'resource_id': column['refersTo']['resID'], + 'node_attribute': column['refersTo']['resObject']['nodeAttribute'], + } + elif 'edgeAttribute' in column['refersTo']['resObject']: + column_metadata['foreign_key'] = { + 'type': 'EDGE_ATTRIBUTE', + 'resource_id': column['refersTo']['resID'], + 'edge_attribute': column['refersTo']['resObject']['edgeAttribute'], + } + else: + raise exceptions.UnexpectedValueError("Unknown \"resObject\" value: {resource_object}".format(resource_object=column['refersTo']['resObject'])) + + if column.get('timeGranularity', None): + # "units" is backwards compatible field name. + # See: https://gitlab.com/datadrivendiscovery/data-supply/issues/215 + unit = column['timeGranularity'].get('unit', column['timeGranularity'].get('units', None)) + column_metadata['time_granularity'] = { + 'value': column['timeGranularity']['value'], + 'unit': TIME_GRANULARITIES[unit], + } + + return column_metadata + + def _merge_score_targets(self, resources: typing.Dict, metadata: metadata_base.DataMetadata, dataset_path: str, hash: typing.Any) -> None: + targets_path = os.path.join(dataset_path, '..', 'targets.csv') + + targets = pandas.read_csv( + targets_path, + # We do not want to do any conversion of values at this point. + # This should be done by primitives later on. + dtype=str, + # We always expect one row header. + header=0, + # We want empty strings and not NaNs. + na_filter=False, + encoding='utf8', + low_memory=False, + memory_map=True, + ) + + for resource_id, resource in resources.items(): + # We assume targets are only in the dataset entry point. + if metadata.has_semantic_type((resource_id,), 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint'): + contains_empty_values = {} + for column_name in targets.columns: + if column_name == 'd3mIndex': + continue + + contains_empty_values[column_name] = targets.loc[:, column_name].eq('').any() + + # We first make sure targets match resource in row order. At this stage all values + # are strings, so we can fill simply with empty strings if it happens that index + # values do not match (which in fact should never happen). + reindexed_targets = targets.set_index('d3mIndex').reindex(resource.loc[:, 'd3mIndex'], fill_value='').reset_index() + + for column_name in reindexed_targets.columns: + if column_name == 'd3mIndex': + continue + + # We match columns based on their names. + if column_name in resource.columns: + if not contains_empty_values[column_name] and reindexed_targets.loc[:, column_name].eq('').any(): + raise exceptions.InvalidDatasetError("'d3mIndex' in 'targets.csv' does not match 'd3mIndex' in the resource '{resource_id}'.".format(resource_id=resource_id)) + + resource.loc[:, column_name] = reindexed_targets.loc[:, column_name] + + resources[resource_id] = resource + + +class CSVLoader(Loader): + """ + A class for loading a dataset from a CSV file. + + Loader supports both loading a dataset from a local file system or remote locations. + URI should point to a file with ``.csv`` file extension. + """ + + def can_load(self, dataset_uri: str) -> bool: + try: + parsed_uri = url_parse.urlparse(dataset_uri, allow_fragments=False) + except Exception: + return False + + if parsed_uri.scheme not in pandas_io_common._VALID_URLS: + return False + + if parsed_uri.scheme == 'file': + if parsed_uri.netloc not in ['', 'localhost']: + return False + + if not parsed_uri.path.startswith('/'): + return False + + for extension in ('', '.gz', '.bz2', '.zip', 'xz'): + if parsed_uri.path.endswith('.csv' + extension): + return True + + return False + + def _load_data(self, resources: typing.Dict, metadata: metadata_base.DataMetadata, *, dataset_uri: str, + compute_digest: ComputeDigest) -> typing.Tuple[metadata_base.DataMetadata, int, typing.Optional[str]]: + try: + buffer, compression, should_close = self._get_buffer_and_compression(dataset_uri) + except FileNotFoundError as error: + raise exceptions.DatasetNotFoundError("CSV dataset '{dataset_uri}' cannot be found.".format(dataset_uri=dataset_uri)) from error + except urllib_error.HTTPError as error: + if error.code == 404: + raise exceptions.DatasetNotFoundError("CSV dataset '{dataset_uri}' cannot be found.".format(dataset_uri=dataset_uri)) from error + else: + raise error + except urllib_error.URLError as error: + if isinstance(error.reason, FileNotFoundError): + raise exceptions.DatasetNotFoundError("CSV dataset '{dataset_uri}' cannot be found.".format(dataset_uri=dataset_uri)) from error + else: + raise error + + # CSV files do not have digest, so "ALWAYS" and "ONLY_IF_MISSING" is the same. + # Allowing "True" for backwards compatibility. + if compute_digest is True or compute_digest == ComputeDigest.ALWAYS or compute_digest == ComputeDigest.ONLY_IF_MISSING: + buffer_digest = self._get_digest(buffer) + else: + buffer_digest = None + + buffer_size = len(buffer.getvalue()) + + data = pandas.read_csv( + buffer, + # We do not want to do any conversion of values at this point. + # This should be done by primitives later on. + dtype=str, + # We always expect one row header. + header=0, + # We want empty strings and not NaNs. + na_filter=False, + compression=compression, + encoding='utf8', + low_memory=False, + ) + + if should_close: + try: + buffer.close() + except Exception: + pass + + if 'd3mIndex' not in data.columns: + # We do not update digest with new data generated here. This is OK because this data is determined by + # original data so original digest still applies. When saving a new digest has to be computed anyway + # because this data will have to be converted to string. + data.insert(0, 'd3mIndex', range(len(data))) + d3m_index_generated = True + else: + d3m_index_generated = False + + data = container_pandas.DataFrame(data) + + resources['learningData'] = data + + metadata = metadata.update(('learningData',), { + 'structural_type': type(data), + 'semantic_types': [ + 'https://metadata.datadrivendiscovery.org/types/Table', + 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint', + ], + 'dimension': { + 'name': 'rows', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], + 'length': len(data), + }, + }) + + metadata = metadata.update(('learningData', metadata_base.ALL_ELEMENTS), { + 'dimension': { + 'name': 'columns', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], + 'length': len(data.columns), + }, + }) + + for i, column_name in enumerate(data.columns): + if i == 0 and d3m_index_generated: + metadata = metadata.update(('learningData', metadata_base.ALL_ELEMENTS, i), { + 'name': column_name, + 'structural_type': numpy.int64, + 'semantic_types': [ + 'http://schema.org/Integer', + 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', + ], + }) + else: + metadata = metadata.update(('learningData', metadata_base.ALL_ELEMENTS, i), { + 'name': column_name, + 'structural_type': str, + 'semantic_types': [ + 'https://metadata.datadrivendiscovery.org/types/UnknownType', + ], + }) + + return metadata, buffer_size, buffer_digest + + def _get_buffer_and_compression(self, dataset_uri: str) -> typing.Tuple[io.BytesIO, str, bool]: + if hasattr(pandas_io_common, 'infer_compression'): + infer_compression = pandas_io_common.infer_compression + else: + # Backwards compatibility for Pandas before 1.0.0. + infer_compression = pandas_io_common._infer_compression + compression = infer_compression(dataset_uri, 'infer') + buffer, _, compression, should_close = pandas_io_common.get_filepath_or_buffer(dataset_uri, 'utf8', compression) + + return buffer, compression, should_close + + def _get_digest(self, buffer: io.BytesIO) -> str: + return hashlib.sha256(buffer.getvalue()).hexdigest() + + # "strict_digest" is ignored, there is no metadata to compare digest against. + # "handle_score_split" is ignored as well. + def load(self, dataset_uri: str, *, dataset_id: str = None, dataset_version: str = None, dataset_name: str = None, lazy: bool = False, + compute_digest: ComputeDigest = ComputeDigest.ONLY_IF_MISSING, strict_digest: bool = False, handle_score_split: bool = True) -> 'Dataset': + assert self.can_load(dataset_uri) + + parsed_uri = url_parse.urlparse(dataset_uri, allow_fragments=False) + + # Pandas requires a host for "file" URIs. + if parsed_uri.scheme == 'file' and parsed_uri.netloc == '': + parsed_uri = parsed_uri._replace(netloc='localhost') + dataset_uri = url_parse.urlunparse(parsed_uri) + + dataset_size = None + dataset_digest = None + + resources: typing.Dict = {} + metadata = metadata_base.DataMetadata() + + if not lazy: + load_lazy = None + + metadata, dataset_size, dataset_digest = self._load_data( + resources, metadata, dataset_uri=dataset_uri, compute_digest=compute_digest, + ) + + else: + def load_lazy(dataset: Dataset) -> None: + # "dataset" can be used as "resources", it is a dict of values. + dataset.metadata, dataset_size, dataset_digest = self._load_data( + dataset, dataset.metadata, dataset_uri=dataset_uri, compute_digest=compute_digest, + ) + + new_metadata = { + 'dimension': {'length': len(dataset)}, + 'stored_size': dataset_size, + } + + if dataset_digest is not None: + new_metadata['digest'] = dataset_digest + + dataset.metadata = dataset.metadata.update((), new_metadata) + dataset.metadata = dataset.metadata.generate(dataset) + + dataset._load_lazy = None + + dataset_metadata = { + 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, + 'structural_type': Dataset, + 'id': dataset_id or dataset_uri, + 'name': dataset_name or os.path.basename(parsed_uri.path), + 'location_uris': [ + dataset_uri, + ], + 'dimension': { + 'name': 'resources', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'], + 'length': len(resources), + }, + } + + if dataset_version is not None: + dataset_metadata['version'] = dataset_version + + if dataset_size is not None: + dataset_metadata['stored_size'] = dataset_size + + if dataset_digest is not None: + dataset_metadata['digest'] = dataset_digest + + metadata = metadata.update((), dataset_metadata) + + return Dataset(resources, metadata, load_lazy=load_lazy) + + +class SklearnExampleLoader(Loader): + """ + A class for loading example scikit-learn datasets. + + URI should be of the form ``sklearn://``, where names come from + ``sklearn.datasets.load_*`` function names. + """ + + def can_load(self, dataset_uri: str) -> bool: + if dataset_uri.startswith('sklearn://'): + return True + + return False + + def _load_data(self, resources: typing.Dict, metadata: metadata_base.DataMetadata, *, dataset_path: str, + compute_digest: ComputeDigest) -> typing.Tuple[metadata_base.DataMetadata, typing.Optional[str], typing.Optional[str]]: + bunch = self._get_bunch(dataset_path) + + # Sklearn datasets do not have digest, so "ALWAYS" and "ONLY_IF_MISSING" is the same. + # Allowing "True" for backwards compatibility. + if compute_digest is True or compute_digest == ComputeDigest.ALWAYS or compute_digest == ComputeDigest.ONLY_IF_MISSING: + bunch_digest = self._get_digest(bunch) + else: + bunch_digest = None + + bunch_description = bunch.get('DESCR', None) or None + + bunch_data = bunch['data'] + bunch_target = bunch['target'] + + if len(bunch_data.shape) == 1: + bunch_data = bunch_data.reshape((bunch_data.shape[0], 1)) + if len(bunch_target.shape) == 1: + bunch_target = bunch_target.reshape((bunch_target.shape[0], 1)) + + column_names = [] + target_values = None + + if 'feature_names' in bunch: + for feature_name in bunch['feature_names']: + column_names.append(str(feature_name)) + + if 'target_names' in bunch: + if len(bunch['target_names']) == bunch_target.shape[1]: + for target_name in bunch['target_names']: + column_names.append(str(target_name)) + else: + target_values = [str(target_value) for target_value in bunch['target_names']] + + if target_values is not None: + converted_target = numpy.empty(bunch_target.shape, dtype=object) + + for i, row in enumerate(bunch_target): + for j, column in enumerate(row): + converted_target[i, j] = target_values[column] + else: + converted_target = bunch_target + + # Add names for any extra columns. We do not really check for duplicates because Pandas allow columns with the same name. + for i in range(len(column_names), bunch_data.shape[1] + converted_target.shape[1]): + column_names.append('column {i}'.format(i=i)) + + data = pandas.concat([pandas.DataFrame(bunch_data), pandas.DataFrame(converted_target)], axis=1) + data.columns = column_names + data = container_pandas.DataFrame(data) + + # We do not update digest with new data generated here. This is OK because this data is determined by + # original data so original digest still applies. When saving a new digest has to be computed anyway + # because this data will have to be converted to string. + data.insert(0, 'd3mIndex', range(len(data))) + + resources['learningData'] = data + + metadata = metadata.update(('learningData',), { + 'structural_type': type(data), + 'semantic_types': [ + 'https://metadata.datadrivendiscovery.org/types/Table', + 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint', + ], + 'dimension': { + 'name': 'rows', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], + 'length': len(data), + }, + }) + + metadata = metadata.update(('learningData', metadata_base.ALL_ELEMENTS), { + 'dimension': { + 'name': 'columns', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], + 'length': len(data.columns), + }, + }) + + metadata = metadata.update(('learningData', metadata_base.ALL_ELEMENTS, 0), { + 'name': 'd3mIndex', + 'structural_type': numpy.int64, + 'semantic_types': [ + 'http://schema.org/Integer', + 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', + ], + }) + + for column_index in range(1, bunch_data.shape[1] + 1): + column_metadata: typing.Dict[str, typing.Any] = { + 'structural_type': bunch_data.dtype.type, + 'semantic_types': [ + 'https://metadata.datadrivendiscovery.org/types/UnknownType', + 'https://metadata.datadrivendiscovery.org/types/Attribute', + ], + 'name': data.columns[column_index], + } + + metadata = metadata.update(('learningData', metadata_base.ALL_ELEMENTS, column_index), column_metadata) + + for column_index in range(bunch_data.shape[1] + 1, bunch_data.shape[1] + bunch_target.shape[1] + 1): + if target_values is not None: + if len(target_values) == 2: + column_type = ['http://schema.org/Boolean'] + elif len(target_values) > 2: + column_type = ['https://metadata.datadrivendiscovery.org/types/CategoricalData'] + else: + raise exceptions.InvalidDatasetError("Too few target values in sklearn dataset.") + else: + column_type = ['https://metadata.datadrivendiscovery.org/types/UnknownType'] + + column_metadata = { + 'structural_type': str if target_values is not None else bunch_target.dtype.type, + 'semantic_types': column_type + [ + 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', + 'https://metadata.datadrivendiscovery.org/types/Attribute', + ], + 'name': data.columns[column_index], + } + + metadata = metadata.update(('learningData', metadata_base.ALL_ELEMENTS, column_index), column_metadata) + + return metadata, bunch_description, bunch_digest + + def _get_digest(self, bunch: typing.Dict) -> str: + hash = hashlib.sha256() + + hash.update(bunch['data'].tobytes()) + hash.update(bunch['target'].tobytes()) + + if 'feature_names' in bunch: + if isinstance(bunch['feature_names'], list): + for feature_name in bunch['feature_names']: + hash.update(feature_name.encode('utf8')) + else: + hash.update(bunch['feature_names'].tobytes()) + + if 'target_names' in bunch: + if isinstance(bunch['target_names'], list): + for target_name in bunch['target_names']: + hash.update(target_name.encode('utf8')) + else: + hash.update(bunch['target_names'].tobytes()) + + if 'DESCR' in bunch: + hash.update(bunch['DESCR'].encode('utf8')) + + return hash.hexdigest() + + def _get_bunch(self, dataset_path: str) -> typing.Dict: + return getattr(datasets, 'load_{dataset_path}'.format(dataset_path=dataset_path))() + + # "strict_digest" is ignored, there is no metadata to compare digest against. + # "handle_score_split is ignored as well. + def load(self, dataset_uri: str, *, dataset_id: str = None, dataset_version: str = None, dataset_name: str = None, lazy: bool = False, + compute_digest: ComputeDigest = ComputeDigest.ONLY_IF_MISSING, strict_digest: bool = False, handle_score_split: bool = True) -> 'Dataset': + assert self.can_load(dataset_uri) + + dataset_path = dataset_uri[len('sklearn://'):] + + if not hasattr(datasets, 'load_{dataset_path}'.format(dataset_path=dataset_path)): + raise exceptions.DatasetNotFoundError("Sklearn dataset '{dataset_uri}' cannot be found.".format(dataset_uri=dataset_uri)) + + dataset_description = None + dataset_digest = None + + resources: typing.Dict = {} + metadata = metadata_base.DataMetadata() + + if not lazy: + load_lazy = None + + metadata, dataset_description, dataset_digest = self._load_data( + resources, metadata, dataset_path=dataset_path, compute_digest=compute_digest, + ) + + else: + def load_lazy(dataset: Dataset) -> None: + # "dataset" can be used as "resources", it is a dict of values. + dataset.metadata, dataset_description, dataset_digest = self._load_data( + dataset, dataset.metadata, dataset_path=dataset_path, compute_digest=compute_digest, + ) + + new_metadata: typing.Dict = { + 'dimension': {'length': len(dataset)}, + } + + if dataset_description is not None: + new_metadata['description'] = dataset_description + + if dataset_digest is not None: + new_metadata['digest'] = dataset_digest + + dataset.metadata = dataset.metadata.update((), new_metadata) + dataset.metadata = dataset.metadata.generate(dataset) + + dataset._load_lazy = None + + dataset_metadata = { + 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, + 'structural_type': Dataset, + 'id': dataset_id or dataset_uri, + 'name': dataset_name or dataset_path, + 'location_uris': [ + dataset_uri, + ], + 'dimension': { + 'name': 'resources', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'], + 'length': len(resources), + }, + } + + if dataset_version is not None: + dataset_metadata['version'] = dataset_version + + if dataset_description is not None: + dataset_metadata['description'] = dataset_description + + if dataset_digest is not None: + dataset_metadata['digest'] = dataset_digest + + metadata = metadata.update((), dataset_metadata) + + return Dataset(resources, metadata, load_lazy=load_lazy) + + +class D3MDatasetSaver(Saver): + """ + A class for saving of D3M datasets. + + This saver supports only saving to local file system. + URI should point to the ``datasetDoc.json`` file in the D3M dataset directory. + """ + + VERSION = '4.1.0' + + def can_save(self, dataset_uri: str) -> bool: + if not self._is_dataset(dataset_uri): + return False + + if not self._is_local_file(dataset_uri): + return False + + return True + + def _is_dataset(self, uri: str) -> bool: + try: + parsed_uri = url_parse.urlparse(uri, allow_fragments=False) + except Exception: + return False + + if os.path.basename(parsed_uri.path) != 'datasetDoc.json': + return False + + return True + + def _is_local_file(self, uri: str) -> bool: + try: + parsed_uri = url_parse.urlparse(uri, allow_fragments=False) + except Exception: + return False + + if parsed_uri.scheme != 'file': + return False + + if parsed_uri.netloc not in ['', 'localhost']: + return False + + if not parsed_uri.path.startswith('/'): + return False + + return True + + def _get_column_description(self, column_index: int, column_name: str, column_metadata: typing.Dict) -> typing.Dict: + column = { + 'colIndex': column_index, + 'colName': column_name, + 'role': [SEMANTIC_TYPES_TO_D3M_ROLES[x] for x in column_metadata.get('semantic_types', []) if x in SEMANTIC_TYPES_TO_D3M_ROLES] + } + column_type = [SEMANTIC_TYPES_TO_D3M_COLUMN_TYPES[semantic_type] for semantic_type in column_metadata.get('semantic_types', []) if semantic_type in SEMANTIC_TYPES_TO_D3M_COLUMN_TYPES] + + # If column semantic_type is not specified we default to unknown type. + if not column_type: + if 'structural_type' in column_metadata: + if utils.is_int(column_metadata['structural_type']): + column['colType'] = SEMANTIC_TYPES_TO_D3M_COLUMN_TYPES['http://schema.org/Integer'] + elif utils.is_float(column_metadata['structural_type']): + column['colType'] = SEMANTIC_TYPES_TO_D3M_COLUMN_TYPES['http://schema.org/Float'] + elif issubclass(column_metadata['structural_type'], bool): + column['colType'] = SEMANTIC_TYPES_TO_D3M_COLUMN_TYPES['http://schema.org/Boolean'] + else: + column['colType'] = SEMANTIC_TYPES_TO_D3M_COLUMN_TYPES['https://metadata.datadrivendiscovery.org/types/UnknownType'] + else: + column['colType'] = SEMANTIC_TYPES_TO_D3M_COLUMN_TYPES['https://metadata.datadrivendiscovery.org/types/UnknownType'] + elif len(column_type) == 1: + column['colType'] = column_type[0] + else: + raise exceptions.InvalidMetadataError( + "More than one semantic type found for column type: {column_type}".format( + column_type=column_type, + ), + ) + + if column_metadata.get('description', None): + column['colDescription'] = column_metadata['description'] + + return column + + def _get_collection_resource_description(self, dataset: 'Dataset', resource_id: str, resource: typing.Any, dataset_location_base_path: typing.Optional[str]) -> typing.Dict: + if not isinstance(resource, container_pandas.DataFrame): + raise exceptions.InvalidArgumentTypeError("Saving a D3M dataset with a collection resource which is not a DataFrame, but '{structural_type}'.".format( + structural_type=type(resource), + )) + if len(resource.columns) != 1: + raise exceptions.InvalidArgumentTypeError("Saving a D3M dataset with a collection resource with an invalid number of columns: {columns}".format( + columns=len(resource.columns), + )) + if not dataset.metadata.has_semantic_type((resource_id, metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/FileName'): + raise exceptions.InvalidArgumentTypeError("Saving a D3M dataset with a collection resource with with a column which does not contain filenames.") + + selector = (resource_id, metadata_base.ALL_ELEMENTS, 0) + metadata, exceptions_with_selectors = dataset.metadata.query_with_exceptions(selector) + + # We check structural type for all rows in a column, but also if any row has a different structural type. + for structural_type in [metadata['structural_type']] + [metadata['structural_type'] for metadata in exceptions_with_selectors.values() if 'structural_type' in metadata]: + if not issubclass(structural_type, str): + raise exceptions.InvalidArgumentTypeError("Saving a D3M dataset with a collection resource with with a column which does not just string values, but also '{structural_type}'.".format( + structural_type=structural_type, + )) + + # We use "location_base_uris" from all rows. We only support "location_base_uris" + # being the same for all rows, so we have to verify that. + all_location_base_uris_nested = [ + list(metadata.get('location_base_uris', [])) + ] + [ + list(metadata['location_base_uris']) for metadata in exceptions_with_selectors.values() if 'location_base_uris' in metadata + ] + + # Flatten the list of lists, remove duplicates, sort for reproducibility. + all_location_base_uris = sorted({all_location_base_uri for all_location_base_uri in itertools.chain.from_iterable(all_location_base_uris_nested)}) + + local_location_base_uris = [location_base_uri for location_base_uri in all_location_base_uris if self._is_local_file(location_base_uri)] + + if not local_location_base_uris: + raise exceptions.NotSupportedError( + "Saving a D3M dataset with a collection resource without local files is not supported: {all_location_base_uris}".format( + all_location_base_uris=all_location_base_uris, + ), + ) + elif len(local_location_base_uris) > 1: + # When there are multiple base locations in D3M dataset format can lead to conflicts + # where same filename in a column points to different files, but we are storing them + # under the same resource path. We verify that there are no conflicts in "_save_collection". + # Because there is no clear way to determine the best common resource path we use a hard-coded one. + resource_path = 'files/' + elif dataset_location_base_path is None: + # We cannot determine the resource path so we use a hard-coded one. + resource_path = 'files/' + else: + location_base_path = url_parse.urlparse(local_location_base_uris[0], allow_fragments=False).path + + # This is a way to check that "dataset_location_base_path" is a prefix of "location_base_path". + if os.path.commonpath([location_base_path, dataset_location_base_path]) != dataset_location_base_path: + raise exceptions.NotSupportedError( + "Saving a D3M dataset with a collection resource with files location not under the dataset directory.", + ) + + resource_path = location_base_path[len(dataset_location_base_path) + 1:] + + # Just a matter of style. + if not resource_path.endswith('/'): + resource_path += '/' + + resource_formats_set = set() + # "media_types" for "ALL_ELEMENTS" is an union of all rows. + for media_type in metadata.get('media_types', []): + # We allow unknown media types. + resource_formats_set.add(MEDIA_TYPES_REVERSE.get(media_type, media_type)) + + resource_formats = {} + + # An empty collection? Or just a collection resource without metadata? + if not resource_formats_set: + if len(resource): + raise ValueError("A collection resource without media types metadata.") + + # An optimized case, all files in a collection belong to the same resource format. + elif len(resource_formats_set) == 1: + file_extensions_set = set() + for filename in resource.iloc[:, 0]: + root, ext = os.path.splitext(filename) + if not ext: + raise ValueError("A filename without a file extension in a collection resource: {filename}".format(filename=filename)) + ext = _remove_extension_dot(ext) + file_extensions_set.add(ext) + + # Sorting to have reproducibility. + resource_formats[resource_formats_set.pop()] = sorted(file_extensions_set) + + else: + resource_formats_of_sets: typing.Dict[str, typing.Set] = {} + + for row_index, filename in enumerate(resource.iloc[:, 0]): + root, ext = os.path.splitext(filename) + if not ext: + raise ValueError("A filename without a file extension in a collection resource: {filename}".format(filename=filename)) + ext = _remove_extension_dot(ext) + + try: + media_types = dataset.metadata.query((resource_id, row_index, 0))['media_types'] + except KeyError: + raise ValueError("A collection resource without media types metadata for row {row_index}.".format(row_index=row_index)) from None + + if len(media_types) != 1: + raise ValueError("Medata should have only one media type per row in a collection resource, at row {row_index}: {media_types}".format(row_index=row_index, media_types=media_types)) + + # We allow unknown media types. + resource_format = MEDIA_TYPES_REVERSE.get(media_types[0], media_types[0]) + + if resource_format not in resource_formats_of_sets: + resource_formats_of_sets[resource_format] = set() + + resource_formats_of_sets[resource_format].add(ext) + + for resource_format, file_extensions in resource_formats_of_sets.items(): + # Sorting to have reproducibility. + resource_formats[resource_format] = sorted(file_extensions) + + resource_type = [SEMANTIC_TYPES_TO_D3M_RESOURCE_TYPES[semantic_type] for semantic_type in metadata.get('semantic_types', []) if semantic_type in SEMANTIC_TYPES_TO_D3M_RESOURCE_TYPES] + + if len(resource_type) != 1: + raise exceptions.InvalidMetadataError( + "Not exactly one semantic type found for resource type: {resource_type}".format( + resource_type=resource_type, + ), + ) + + resource_description = { + 'resID': resource_id, + 'isCollection': True, + 'resFormat': resource_formats, + 'resType': resource_type[0], + 'resPath': resource_path, + } + + columns = self._get_columns_description(dataset, resource_id, resource) + + if columns: + resource_description['columns'] = columns + + if 'file_columns_count' in metadata: + resource_description['columnsCount'] = metadata['file_columns_count'] + + return resource_description + + # We do not use "dataset_location_base_path" but we keep it for all "_get_*_resource_description" methods to have the same signature. + def _get_dataframe_resource_description(self, dataset: 'Dataset', resource_id: str, resource: typing.Any, dataset_location_base_path: typing.Optional[str]) -> typing.Dict: + if dataset.metadata.has_semantic_type((resource_id,), 'https://metadata.datadrivendiscovery.org/types/EdgeList'): + res_type = 'edgeList' + else: + res_type = 'table' + + resource_description = { + 'resID': resource_id, + 'isCollection': False, + 'resFormat': {'text/csv': ['csv']}, + 'resType': res_type, + 'columnsCount': len(resource.columns), + } + + if dataset.metadata.has_semantic_type((resource_id,), 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint'): + if resource_id != 'learningData': + logger.error("Saving a dataset with a dataset entry point with resource ID not equal to 'learningData', but '%(resource_id)s'.", {'resource_id': resource_id}) + resource_description['resPath'] = 'tables/learningData.csv' + else: + resource_description['resPath'] = 'tables/{resource_id}.csv'.format(resource_id=resource_id) + + columns = self._get_columns_description(dataset, resource_id, resource) + + if columns: + resource_description['columns'] = columns + + return resource_description + + # TODO: Make it easier to subclass to support other resource types. + def _get_resource_description(self, dataset: 'Dataset', resource_id: str, resource: typing.Any, dataset_location_base_path: typing.Optional[str]) -> typing.Dict: + if dataset.metadata.has_semantic_type((resource_id,), 'https://metadata.datadrivendiscovery.org/types/FilesCollection'): + return self._get_collection_resource_description(dataset, resource_id, resource, dataset_location_base_path) + + elif isinstance(resource, container_pandas.DataFrame): + return self._get_dataframe_resource_description(dataset, resource_id, resource, dataset_location_base_path) + + else: + raise exceptions.NotSupportedError("Saving a D3M dataset with a resource with structural type '{structural_type}' is not supported.".format(structural_type=type(resource))) + + def _get_columns_description(self, dataset: 'Dataset', resource_id: str, resource: typing.Any) -> typing.List[typing.Dict]: + columns = [] + + # Traverse file columns in collections. + if dataset.metadata.has_semantic_type((resource_id,), 'https://metadata.datadrivendiscovery.org/types/FilesCollection'): + # We know there is only one column here. This has been verified in "_get_collection_resource_description". + column_metadata = dataset.metadata.query((resource_id, metadata_base.ALL_ELEMENTS, 0)) + for file_column_metadata in column_metadata.get('file_columns', []): + columns.append(self._get_column_description(file_column_metadata['column_index'], file_column_metadata['column_name'], file_column_metadata)) + + # Traverse columns in a DataFrame. + elif isinstance(resource, container_pandas.DataFrame): + number_of_columns = len(resource.columns) + for column_index in range(number_of_columns): + column_selector = (resource_id, metadata_base.ALL_ELEMENTS, column_index) + column_metadata = dataset.metadata.query(column_selector) + + column = self._get_column_description(column_index, column_metadata['name'], column_metadata) + + if 'boundary_for' in column_metadata and 'foreign_key' in column_metadata: + raise exceptions.NotSupportedError("Both boundary and foreign key are not supported.") + + elif 'foreign_key' in column_metadata: + if column_metadata['foreign_key']['type'] == 'COLUMN': + refers_to = { + 'resID': column_metadata['foreign_key']['resource_id'], + 'resObject': {}, + } + + if 'column_name' in column_metadata['foreign_key']: + refers_to['resObject'] = { + 'columnName': column_metadata['foreign_key']['column_name'], + } + referring_column_index = dataset.metadata.get_column_index_from_column_name( + column_metadata['foreign_key']['column_name'], + at=(column_metadata['foreign_key']['resource_id'],), + ) + elif 'column_index' in column_metadata['foreign_key']: + refers_to['resObject'] = { + 'columnIndex': column_metadata['foreign_key']['column_index'], + } + referring_column_index = column_metadata['foreign_key']['column_index'] + else: + raise exceptions.InvalidMetadataError(f"'foreign_key' is missing a column reference, in metadata of column {column_index} of resource '{resource_id}'.") + + # A special case to handle a reference to a file collection. + if dataset.metadata.has_semantic_type( + (column_metadata['foreign_key']['resource_id'],), + 'https://metadata.datadrivendiscovery.org/types/FilesCollection', + ) and dataset.metadata.has_semantic_type( + (column_metadata['foreign_key']['resource_id'], metadata_base.ALL_ELEMENTS, referring_column_index), + 'https://metadata.datadrivendiscovery.org/types/FileName', + ): + refers_to['resObject'] = 'item' + + column['refersTo'] = refers_to + + elif column_metadata['foreign_key']['type'] == 'NODE_ATTRIBUTE': + column['refersTo'] = { + 'resID': column_metadata['foreign_key']['resource_id'], + 'resObject': { + 'nodeAttribute': column_metadata['foreign_key']['node_attribute'], + }, + } + + elif column_metadata['foreign_key']['type'] == 'EDGE_ATTRIBUTE': + column['refersTo'] = { + 'resID': column_metadata['foreign_key']['resource_id'], + 'resObject': { + 'edgeAttribute': column_metadata['foreign_key']['edge_attribute'], + }, + } + + elif 'boundary_for' in column_metadata: + refers_to = { + # "resource_id" is optional in our metadata and it + # means the reference is local to the resource. + 'resID': column_metadata['boundary_for'].get('resource_id', resource_id), + 'resObject': {}, + } + + if 'column_name' in column_metadata['boundary_for']: + refers_to['resObject'] = { + 'columnName': column_metadata['boundary_for']['column_name'], + } + elif 'column_index' in column_metadata['boundary_for']: + refers_to['resObject'] = { + 'columnIndex': column_metadata['boundary_for']['column_index'], + } + else: + raise exceptions.InvalidMetadataError(f"'boundary_for' is missing a column reference, in metadata of column {column_index} of resource '{resource_id}'.") + + column['refersTo'] = refers_to + + if 'time_granularity' in column_metadata: + try: + column['timeGranularity'] = { + 'value': column_metadata['time_granularity']['value'], + 'unit': TIME_GRANULARITIES_REVERSE[column_metadata['time_granularity']['unit']], + } + except KeyError as error: + raise exceptions.InvalidMetadataError(f"'time_granularity' is invalid, in metadata of column {column_index} of resource '{resource_id}'.") from error + + columns.append(column) + + return columns + + def _get_dataset_description(self, dataset: 'Dataset') -> typing.Dict: + dataset_description: typing.Dict[str, typing.Any] = { + 'about': { + 'datasetSchemaVersion': self.VERSION, + }, + } + + dataset_root_metadata = dataset.metadata.query(()) + + for d3m_path, (dataset_path, required) in D3M_TO_DATASET_FIELDS.items(): + value = utils.get_dict_path(dataset_root_metadata, dataset_path) + if value is not None: + utils.set_dict_path(dataset_description, d3m_path, value) + elif required: + raise exceptions.InvalidMetadataError(f"Dataset metadata field '{'.'.join(dataset_path)}' is required when saving.") + + for x in [dataset_root_metadata.get('stored_size', None), dataset_description['about'].get('approximateSize', None)]: + if x is not None: + exponent = int((math.log10(x) // 3) * 3) + try: + unit = SIZE_TO_UNITS[exponent] + except KeyError as error: + raise KeyError("Unit string for '{exponent}' not found in lookup dictionary {SIZE_TO_UNITS}.".format(exponent=exponent, SIZE_TO_UNITS=SIZE_TO_UNITS)) from error + dataset_description['about']['approximateSize'] = str(x // (10 ** exponent)) + ' ' + unit + break + + # We are only using the first URI due to design of D3M dataset format. Remaining URIs should be stored in qualities. + if dataset_root_metadata.get('source', {}).get('uris', []): + dataset_description['about']['sourceURI'] = dataset_root_metadata['source']['uris'][0] + + dataset_location_uris = [location_uri for location_uri in dataset_root_metadata.get('location_uris', []) if self._is_local_file(location_uri)] + + if dataset_location_uris: + # If there are multiple local URIs, we pick the first. + dataset_location_base_path = os.path.dirname(url_parse.urlparse(dataset_location_uris[0], allow_fragments=False).path) + else: + dataset_location_base_path = None + + data_resources = [] + + for resource_id, resource in dataset.items(): + resource_description = self._get_resource_description(dataset, resource_id, resource, dataset_location_base_path) + + data_resources.append(resource_description) + + dataset_description['dataResources'] = data_resources + + return dataset_description + + def _generate_metadata_qualities(self, dataset: 'Dataset') -> typing.List: + # We start with canonical metadata. + metadata_to_save = dataset._canonical_metadata(dataset.metadata) + + # We remove digest. + metadata_to_save = metadata_to_save.update((), {'digest': metadata_base.NO_VALUE}) + + for resource_id, resource in dataset.items(): + if isinstance(resource, container_pandas.DataFrame): + # All columns in the DataFrame will be saved as strings, so we have to update + # metadata first to reflect that, before we save metadata. + metadata_to_save = metadata_to_save.update((resource_id, metadata_base.ALL_ELEMENTS, metadata_base.ALL_ELEMENTS), {'structural_type': str}) + + qualities = [] + for metadata_entry in metadata_to_save.to_internal_json_structure(): + restricted_to = { + 'resID': metadata_entry['selector'][0] if metadata_entry['selector'] else '', + } + + if metadata_entry['selector']: + restricted_to['resComponent'] = { + 'selector': metadata_entry['selector'][1:], + } + + qualities.append({ + 'qualName': 'metadata', + 'qualValue': metadata_entry['metadata'], + 'qualValueType': 'dict', + 'restrictedTo': restricted_to, + }) + + return qualities + + def save(self, dataset: 'Dataset', dataset_uri: str, *, compute_digest: ComputeDigest = ComputeDigest.ALWAYS, preserve_metadata: bool = True) -> None: + assert self.can_save(dataset_uri) + + dataset_description = self._get_dataset_description(dataset) + + if preserve_metadata: + dataset_description['qualities'] = self._generate_metadata_qualities(dataset) + + dataset_path = os.path.dirname(url_parse.urlparse(dataset_uri, allow_fragments=False).path) + os.makedirs(dataset_path, 0o755, exist_ok=False) + + dataset_description_path = os.path.join(dataset_path, 'datasetDoc.json') + + # We use "x" mode to make sure file does not already exist. + with open(dataset_description_path, 'x', encoding='utf8') as f: + json.dump(dataset_description, f, indent=2, allow_nan=False) + + for resource_description in dataset_description['dataResources']: + resource_id = resource_description['resID'] + resource = dataset[resource_id] + + self._save_resource(dataset, dataset_uri, dataset_path, resource_description, resource_id, resource) + + # We calculate digest of the new dataset and write it into datasetDoc.json + dataset_description['about']['digest'] = get_d3m_dataset_digest(dataset_description_path) + with open(dataset_description_path, 'w', encoding='utf8') as f: + json.dump(dataset_description, f, indent=2, allow_nan=False) + + # TODO: Make it easier to subclass to support non-local "location_base_uris". + def _save_collection(self, dataset: 'Dataset', dataset_uri: str, dataset_path: str, resource_description: typing.Dict, resource_id: str, resource: typing.Any) -> None: + # Here we can assume collection resource is a DataFrame which contains exactly one + # column containing filenames. This has been verified in "_get_collection_resource_description". + assert isinstance(resource, container_pandas.DataFrame), type(resource) + assert len(resource.columns) == 1, resource.columns + + already_copied: typing.Set[typing.Tuple[str, str]] = set() + linking_warning_issued = False + + for row_index, filename in enumerate(resource.iloc[:, 0]): + # "location_base_uris" is required for collections. + location_base_uris = dataset.metadata.query((resource_id, row_index, 0))['location_base_uris'] + + local_location_base_uris = [location_base_uri for location_base_uri in location_base_uris if self._is_local_file(location_base_uri)] + + # We verified in "_get_collection_resource_description" that there is only one local URI. + assert len(local_location_base_uris) == 1, local_location_base_uris + local_location_base_uri = local_location_base_uris[0] + + # "location_base_uris" should be made so that we can just concat with the filename + # ("location_base_uris" end with "/"). + source_uri = local_location_base_uri + filename + source_path = url_parse.urlparse(source_uri, allow_fragments=False).path + + destination_path = os.path.join(dataset_path, resource_description['resPath'], filename) + + # Multiple rows can point to the same file, so we do not have to copy them multiple times. + if (source_path, destination_path) in already_copied: + continue + + os.makedirs(os.path.dirname(destination_path), 0o755, exist_ok=True) + + linked = False + + try: + os.link(source_path, destination_path) + linked = True + + except FileExistsError as error: + # If existing file is the same, then this is OK. Multiple rows can point to the same file. + if os.path.samefile(source_path, destination_path): + linked = True + elif filecmp.cmp(source_path, destination_path, shallow=False): + linked = True + # But otherwise we raise an exception. + else: + raise exceptions.AlreadyExistsError( + "Destination file '{destination_path}' already exists with different content than '{source_path}' has.".format( + destination_path=destination_path, + source_path=source_path, + ), + ) from error + + except OSError as error: + # OSError: [Errno 18] Invalid cross-device link + if error.errno == errno.EXDEV: + pass + else: + raise error + + # If we can't make a hard-link we try to copy the file. + if not linked: + if not linking_warning_issued: + linking_warning_issued = True + logger.warning("Saving dataset to '%(dataset_uri)s' cannot use hard-linking.", {'dataset_uri': dataset_uri}) + + try: + with open(source_path, 'rb') as source_file: + with open(destination_path, 'xb') as destination_file: + shutil.copyfileobj(source_file, destination_file) + + except FileExistsError as error: + # If existing file is the same, then this is OK. Multiple rows can point to the same file. + if os.path.samefile(source_path, destination_path): + pass + elif filecmp.cmp(source_path, destination_path, shallow=False): + pass + # But otherwise we raise an exception. + else: + raise exceptions.AlreadyExistsError( + "Destination file '{destination_path}' already exists with different content than '{source_path}' has.".format( + destination_path=destination_path, + source_path=source_path, + ), + ) from error + + already_copied.add((source_path, destination_path)) + + # TODO: Make it easier to subclass to support other column types. + def _save_dataframe(self, dataset: 'Dataset', dataset_path: str, resource_description: typing.Dict, resource_id: str, resource: typing.Any) -> None: + destination_path = os.path.join(dataset_path, resource_description['resPath']) + # A subset of "simple_data_types". + # TODO: Support additional types. + # Dicts we can try to convert to "json" column type. Lists of floats we can convert to "realVector". + # We could also probably support boolean values. + supported_column_structural_types = (str, float, int, numpy.integer, numpy.float64, numpy.bool_, type(None)) + + # We verify if structural types of columns are supported. + for column_index in range(dataset.metadata.query((resource_id, metadata_base.ALL_ELEMENTS))['dimension']['length']): + selector = (resource_id, metadata_base.ALL_ELEMENTS, column_index) + metadata, exceptions_with_selectors = dataset.metadata.query_with_exceptions(selector) + + # We check structural type for all rows in a column, but also if any row has a different structural type. + for structural_type in [metadata['structural_type']] + [metadata['structural_type'] for metadata in exceptions_with_selectors.values() if 'structural_type' in metadata]: + if not issubclass(structural_type, supported_column_structural_types): + raise exceptions.NotSupportedError("Saving a D3M dataset with a column with structural type '{structural_type}' is not supported.".format(structural_type=structural_type)) + + os.makedirs(os.path.dirname(destination_path), 0o755, exist_ok=True) + + # We use "x" mode to make sure file does not already exist. + resource.to_csv(destination_path, mode='x', encoding='utf8') + + # TODO: Make it easier to subclass to support other resource types. + def _save_resource(self, dataset: 'Dataset', dataset_uri: str, dataset_path: str, resource_description: typing.Dict, resource_id: str, resource: typing.Any) -> None: + if resource_description.get('isCollection', False): + self._save_collection(dataset, dataset_uri, dataset_path, resource_description, resource_id, resource) + + elif isinstance(resource, container_pandas.DataFrame): + self._save_dataframe(dataset, dataset_path, resource_description, resource_id, resource) + + else: + raise exceptions.NotSupportedError("Saving a D3M dataset with a resource with structural type '{structural_type}' is not supported.".format(structural_type=type(resource))) + + +D = typing.TypeVar('D', bound='Dataset') + + +# TODO: It should be probably immutable. +class Dataset(dict): + """ + A class representing a dataset. + + Internally, it is a dictionary containing multiple resources (e.g., tables). + + Parameters + ---------- + resources: + A map from resource IDs to resources. + metadata: + Metadata associated with the ``data``. + load_lazy: + If constructing a lazy dataset, calling this function will read all the + data and convert the dataset to a non-lazy one. + generate_metadata: bool + Automatically generate and update the metadata. + check: + DEPRECATED: argument ignored. + source: + DEPRECATED: argument ignored. + timestamp: + DEPRECATED: argument ignored. + """ + + metadata: metadata_base.DataMetadata = None + loaders: typing.List[Loader] = [ + D3MDatasetLoader(), + CSVLoader(), + SklearnExampleLoader(), + OpenMLDatasetLoader(), + ] + savers: typing.List[Saver] = [ + D3MDatasetSaver(), + ] + + @deprecate.arguments('source', 'timestamp', 'check', message="argument ignored") + def __init__(self, resources: typing.Mapping, metadata: metadata_base.DataMetadata = None, *, + load_lazy: typing.Callable[['Dataset'], None] = None, generate_metadata: bool = False, + check: bool = True, source: typing.Any = None, timestamp: datetime.datetime = None) -> None: + super().__init__(resources) + + if isinstance(resources, Dataset) and metadata is None: + # We made a copy, so we do not have to generate metadata. + self.metadata = resources.metadata + elif metadata is not None: + # We were provided metadata, so we do not have to generate metadata. + self.metadata = metadata + else: + self.metadata = metadata_base.DataMetadata() + if generate_metadata: + self.metadata = self.metadata.generate(self) + + self._load_lazy = load_lazy + + @classmethod + def load(cls, dataset_uri: str, *, dataset_id: str = None, dataset_version: str = None, dataset_name: str = None, lazy: bool = False, + compute_digest: ComputeDigest = ComputeDigest.ONLY_IF_MISSING, strict_digest: bool = False, handle_score_split: bool = True) -> 'Dataset': + """ + Tries to load dataset from ``dataset_uri`` using all registered dataset loaders. + + Parameters + ---------- + dataset_uri: + A URI to load. + dataset_id: + Override dataset ID determined by the loader. + dataset_version: + Override dataset version determined by the loader. + dataset_name: + Override dataset name determined by the loader. + lazy: + If ``True``, load only top-level metadata and not whole dataset. + compute_digest: + Compute a digest over the data? + strict_digest: + If computed digest does not match the one provided in metadata, raise an exception? + handle_score_split: + If a scoring dataset has target values in a separate file, merge them in? + + Returns + ------- + A loaded dataset. + """ + + for loader in cls.loaders: + if loader.can_load(dataset_uri): + return loader.load( + dataset_uri, dataset_id=dataset_id, dataset_version=dataset_version, dataset_name=dataset_name, + lazy=lazy, compute_digest=compute_digest, strict_digest=strict_digest, handle_score_split=handle_score_split, + ) + + raise exceptions.DatasetUriNotSupportedError( + "No known loader could load dataset from '{dataset_uri}'.".format(dataset_uri=dataset_uri), + ) + + def save(self, dataset_uri: str, *, compute_digest: ComputeDigest = ComputeDigest.ALWAYS, preserve_metadata: bool = True) -> None: + """ + Tries to save dataset to ``dataset_uri`` using all registered dataset savers. + + Parameters + ---------- + dataset_uri: + A URI to save to. + compute_digest: + Compute digest over the data when saving? + preserve_metadata: + When saving a dataset, store its metadata as well? + """ + + for saver in self.savers: + if saver.can_save(dataset_uri): + saver.save(self, dataset_uri, compute_digest=compute_digest, preserve_metadata=preserve_metadata) + return + + raise exceptions.DatasetUriNotSupportedError("No known saver could save dataset to '{dataset_uri}'.".format(dataset_uri=dataset_uri)) + + def is_lazy(self) -> bool: + """ + Return whether this dataset instance is lazy and not all data has been loaded. + + Returns + ------- + ``True`` if this dataset instance is lazy. + """ + + return self._load_lazy is not None + + def load_lazy(self) -> None: + """ + Read all the data and convert the dataset to a non-lazy one. + """ + + if self._load_lazy is not None: + self._load_lazy(self) + + # TODO: Allow one to specify priority which would then insert loader at a different place and not at the end? + @classmethod + def register_loader(cls, loader: Loader) -> None: + """ + Registers a new dataset loader. + + Parameters + ---------- + loader: + An instance of the loader class implementing a new loader. + """ + + cls.loaders.append(loader) + + # TODO: Allow one to specify priority which would then insert saver at a different place and not at the end? + @classmethod + def register_saver(cls, saver: Saver) -> None: + """ + Registers a new dataset saver. + + Parameters + ---------- + saver: + An instance of the saver class implementing a new saver. + """ + + cls.savers.append(saver) + + def __repr__(self) -> str: + return self.__str__() + + def _get_description_keys(self) -> typing.Sequence[str]: + return 'id', 'name', 'location_uris' + + def __str__(self) -> str: + metadata = self.metadata.query(()) + + return '{class_name}({description})'.format( + class_name=type(self).__name__, + description=', '.join('{key}=\'{value}\''.format(key=key, value=metadata[key]) for key in self._get_description_keys() if key in metadata), + ) + + def copy(self: D) -> D: + # Metadata is copied from provided iterable. + return type(self)(resources=self, load_lazy=self._load_lazy) + + def __copy__(self: D) -> D: + return self.copy() + + def select_rows(self: D, row_indices_to_keep: typing.Mapping[str, typing.Sequence[int]]) -> D: + """ + Generate a new Dataset from the row indices for DataFrames. + + Parameters + ---------- + row_indices_to_keep: + This is a dict where key is resource ID and value is a sequence of row indices to keep. + If a resource ID is missing, the whole related resource is kept. + + Returns + ------- + Returns a new Dataset. + """ + + resources = {} + metadata = self.metadata + + for resource_id, resource in self.items(): + # We keep any resource which is missing from "row_indices_to_keep". + if resource_id not in row_indices_to_keep: + resources[resource_id] = resource + else: + if not isinstance(resource, container_pandas.DataFrame): + raise exceptions.InvalidArgumentTypeError("Only DataFrame resources can have rows selected, not '{type}'.".format(type=type(resource))) + + row_indices = sorted(row_indices_to_keep[resource_id]) + resources[resource_id] = self[resource_id].iloc[row_indices, :].reset_index(drop=True) + + # TODO: Expose this as a general metadata method. + # In that case this has to be done recursively over all nested ALL_ELEMENTS. + # Here we are operating at resource level so we have to iterate only over first + # ALL_ELEMENTS and resource's element itself. + + # Change the metadata. Update the number of rows in the split. + # This makes a copy so that we can modify metadata in-place. + metadata = metadata.update( + (resource_id,), + { + 'dimension': { + 'length': len(row_indices), + }, + }, + ) + + # Remove all rows not in this split and reorder those which are. + for element_metadata_entry in [ + metadata._current_metadata.all_elements, + metadata._current_metadata.elements[resource_id], + ]: + if element_metadata_entry is None: + continue + + elements = element_metadata_entry.elements + new_elements_evolver = utils.EMPTY_PMAP.evolver() + for i, row_index in enumerate(row_indices): + if row_index in elements: + new_elements_evolver.set(i, elements[row_index]) + element_metadata_entry.elements = new_elements_evolver.persistent() + element_metadata_entry.is_elements_empty = not element_metadata_entry.elements + element_metadata_entry.update_is_empty() + + return type(self)(resources, metadata) + + def get_relations_graph(self) -> typing.Dict[str, typing.List[typing.Tuple[str, bool, int, int, typing.Dict]]]: + """ + Builds the relations graph for the dataset. + + Each key in the output corresponds to a resource/table. The value under a key is the list of + edges this table has. The edge is represented by a tuple of four elements. For example, + if the edge is ``(resource_id, True, index_1, index_2, custom_state)``, it + means that there is a foreign key that points to table ``resource_id``. Specifically, + ``index_1`` column in the current table points to ``index_2`` column in the table ``resource_id``. + + ``custom_state`` is an empty dict when returned from this method, but allows users + of this graph to store custom state there. + + Returns + ------- + Dict[str, List[Tuple[str, bool, int, int, Dict]]] + Returns the relation graph in adjacency representation. + """ + + graph: typing.Dict[str, typing.List[typing.Tuple[str, bool, int, int, typing.Dict]]] = collections.defaultdict(list) + + for resource_id in self.keys(): + if not issubclass(self.metadata.query((resource_id,))['structural_type'], container_pandas.DataFrame): + continue + + columns_length = self.metadata.query((resource_id, metadata_base.ALL_ELEMENTS,))['dimension']['length'] + for index in range(columns_length): + column_metadata = self.metadata.query((resource_id, metadata_base.ALL_ELEMENTS, index)) + + if 'foreign_key' not in column_metadata: + continue + + if column_metadata['foreign_key']['type'] != 'COLUMN': + continue + + foreign_resource_id = column_metadata['foreign_key']['resource_id'] + + # "COLUMN" foreign keys should not point to non-DataFrame resources. + assert isinstance(self[foreign_resource_id], container_pandas.DataFrame), type(self[foreign_resource_id]) + + if 'column_index' in column_metadata['foreign_key']: + foreign_index = column_metadata['foreign_key']['column_index'] + elif 'column_name' in column_metadata['foreign_key']: + foreign_index = self.metadata.get_column_index_from_column_name(column_metadata['foreign_key']['column_name'], at=(foreign_resource_id,)) + else: + raise exceptions.UnexpectedValueError("Invalid foreign key: {foreign_key}".format(foreign_key=column_metadata['foreign_key'])) + + # "True" and "False" implies forward and backward relationships, respectively. + graph[resource_id].append((foreign_resource_id, True, index, foreign_index, {})) + graph[foreign_resource_id].append((resource_id, False, foreign_index, index, {})) + + return graph + + def get_column_references_by_column_index(self) -> typing.Dict[str, typing.Dict[metadata_base.ColumnReference, typing.List[metadata_base.ColumnReference]]]: + references: typing.Dict[str, typing.Dict[metadata_base.ColumnReference, typing.List[metadata_base.ColumnReference]]] = { + 'confidence_for': {}, + 'rank_for': {}, + 'boundary_for': {}, + 'foreign_key': {}, + } + + for resource_id, resource in self.items(): + if not isinstance(resource, container_pandas.DataFrame): + continue + + resource_references = self.metadata.get_column_references_by_column_index(resource_id, at=(resource_id,)) + + references['confidence_for'].update(resource_references['confidence_for']) + references['rank_for'].update(resource_references['rank_for']) + references['boundary_for'].update(resource_references['boundary_for']) + references['foreign_key'].update(resource_references['foreign_key']) + + return references + + @classmethod + def _canonical_dataset_description(cls, dataset_description: typing.Dict, *, set_no_value: bool = False) -> typing.Dict: + """ + Currently, this is just removing any local URIs the description might have. + """ + + # Making a copy. + dataset_description = dict(dataset_description) + + utils.filter_local_location_uris(dataset_description, empty_value=metadata_base.NO_VALUE if set_no_value else None) + + return dataset_description + + def to_json_structure(self, *, canonical: bool = False) -> typing.Dict: + """ + Returns only a top-level dataset description. + """ + + # Using "to_json_structure" and not "to_internal_json_structure" because + # it is not indented that this would be parsed back directly, but just used + # to know where to find the dataset. + dataset_description = utils.to_json_structure(self.metadata.query(())) + + if canonical: + dataset_description = self._canonical_dataset_description(dataset_description) + + metadata_base.CONTAINER_SCHEMA_VALIDATOR.validate(dataset_description) + + return dataset_description + + @classmethod + def _canonical_metadata(cls, metadata: metadata_base.DataMetadata) -> metadata_base.DataMetadata: + """ + Currently, this is just removing any local URIs the metadata might have. + """ + + metadata = metadata.update((), cls._canonical_dataset_description(metadata.query(()), set_no_value=True)) + + metadata = cls._canonical_metadata_traverse(metadata, metadata, []) + + return metadata + + @classmethod + def _canonical_metadata_traverse(cls, metadata: metadata_base.DataMetadata, output_metadata: metadata_base.DataMetadata, selector: metadata_base.ListSelector) -> metadata_base.DataMetadata: + # "ALL_ELEMENTS" is always first, if it exists, which works in our favor here. + elements = metadata.get_elements(selector) + + for element in elements: + new_selector = selector + [element] + new_metadata = dict(metadata._query(new_selector, metadata._current_metadata, 0)) + utils.filter_local_location_uris(new_metadata, empty_value=metadata_base.NO_VALUE) + output_metadata = output_metadata.update(new_selector, new_metadata) + + output_metadata = cls._canonical_metadata_traverse(metadata, output_metadata, new_selector) + + return output_metadata + + +def dataset_serializer(obj: Dataset) -> dict: + data = { + 'metadata': obj.metadata, + 'dataset': dict(obj), + } + + if type(obj) is not Dataset: + data['type'] = type(obj) + + return data + + +def dataset_deserializer(data: dict) -> Dataset: + dataset = data.get('type', Dataset)(data['dataset'], data['metadata']) + return dataset + + +if pyarrow_lib is not None: + pyarrow_lib._default_serialization_context.register_type( + Dataset, 'd3m.dataset', + custom_serializer=dataset_serializer, + custom_deserializer=dataset_deserializer, + ) + + +def get_dataset( + dataset_uri: str, *, compute_digest: ComputeDigest = ComputeDigest.ONLY_IF_MISSING, + strict_digest: bool = False, lazy: bool = False, + datasets_dir: str = None, handle_score_split: bool = True, +) -> Dataset: + if datasets_dir is not None: + datasets, problem_descriptions = utils.get_datasets_and_problems(datasets_dir, handle_score_split) + + if dataset_uri in datasets: + dataset_uri = datasets[dataset_uri] + + dataset_uri = utils.fix_uri(dataset_uri) + + return Dataset.load(dataset_uri, compute_digest=compute_digest, strict_digest=strict_digest, lazy=lazy) + + +def describe_handler(arguments: argparse.Namespace, *, dataset_resolver: typing.Callable = None) -> None: + if dataset_resolver is None: + dataset_resolver = get_dataset + + output_stream = getattr(arguments, 'output', sys.stdout) + + has_errored = False + + for dataset_path in arguments.datasets: + if getattr(arguments, 'list', False): + print(dataset_path, file=output_stream) + + try: + start_timestamp = time.perf_counter() + dataset = dataset_resolver( + dataset_path, + compute_digest=ComputeDigest[getattr(arguments, 'compute_digest', ComputeDigest.ONLY_IF_MISSING.name)], + strict_digest=getattr(arguments, 'strict_digest', False), + lazy=getattr(arguments, 'lazy', False), + ) + end_timestamp = time.perf_counter() + except Exception as error: + if getattr(arguments, 'continue', False): + traceback.print_exc(file=output_stream) + print(f"Error loading dataset: {dataset_path}", file=output_stream) + has_errored = True + continue + else: + raise Exception(f"Error loading dataset: {dataset_path}") from error + + try: + if getattr(arguments, 'print', False) or getattr(arguments, 'metadata', False) or getattr(arguments, 'time', False): + if getattr(arguments, 'print', False): + pprint.pprint(dataset, stream=output_stream) + if getattr(arguments, 'metadata', False): + dataset.metadata.pretty_print(handle=output_stream) + if getattr(arguments, 'time', False): + print(f"Time: {(end_timestamp - start_timestamp):.3f}s", file=output_stream) + else: + dataset_description = dataset.to_json_structure(canonical=True) + + json.dump( + dataset_description, + output_stream, + indent=(getattr(arguments, 'indent', 2) or None), + sort_keys=getattr(arguments, 'sort_keys', False), + allow_nan=False, + ) # type: ignore + output_stream.write('\n') + except Exception as error: + if getattr(arguments, 'continue', False): + traceback.print_exc(file=output_stream) + print(f"Error describing dataset: {dataset_path}", file=output_stream) + has_errored = True + continue + else: + raise Exception(f"Error describing dataset: {dataset_path}") from error + + if has_errored: + sys.exit(1) + + +def convert_handler(arguments: argparse.Namespace, *, dataset_resolver: typing.Callable = None) -> None: + if dataset_resolver is None: + dataset_resolver = get_dataset + + try: + dataset = dataset_resolver( + arguments.input_uri, + compute_digest=ComputeDigest[getattr(arguments, 'compute_digest', ComputeDigest.ONLY_IF_MISSING.name)], + strict_digest=getattr(arguments, 'strict_digest', False), + ) + except Exception as error: + raise Exception(f"Error loading dataset '{arguments.input_uri}'.") from error + + output_uri = utils.fix_uri(arguments.output_uri) + + try: + dataset.save(output_uri, preserve_metadata=getattr(arguments, 'preserve_metadata', True)) + except Exception as error: + raise Exception(f"Error saving dataset '{arguments.input_uri}' to '{output_uri}'.") from error + + +def main(argv: typing.Sequence) -> None: + raise exceptions.NotSupportedError("This CLI has been removed. Use \"python3 -m d3m dataset describe\" instead.") + + +if __name__ == '__main__': + main(sys.argv) diff --git a/d3m/d3m/container/list.py b/d3m/d3m/container/list.py new file mode 100644 index 0000000..56591ba --- /dev/null +++ b/d3m/d3m/container/list.py @@ -0,0 +1,170 @@ +import datetime +import typing + +import numpy # type: ignore +import pandas # type: ignore + +from d3m import deprecate +from d3m.metadata import base as metadata_base + +# See: https://gitlab.com/datadrivendiscovery/d3m/issues/66 +try: + from pyarrow import lib as pyarrow_lib # type: ignore +except ModuleNotFoundError: + pyarrow_lib = None + +__all__ = ('List',) + +L = typing.TypeVar('L', bound='List') + + +class List(list): + """ + Extended Python standard `list` with the ``metadata`` attribute. + + You should use only standard data and container types as its elements. + + Metadata attribute is immutable, so if you ``update`` it, you should reassign it back:: + + l.metadata = l.metadata.update(...) + + `List` is mutable, but this can introduce issues during runtime if a primitive + modifies its inputs directly. Callers of primitives are encouraged + to make it immutable to assure such behavior is detected/prevented, + and primitives should copy inputs to a mutable `List` before modifying it. + + Parameters + ---------- + iterable: + Optional initial values for the list. + metadata: + Optional initial metadata for the top-level of the list, or top-level metadata to be updated + if ``iterable`` is another instance of this list class. + generate_metadata: + Automatically generate and update the metadata. + check: + DEPRECATED: argument ignored. + source: + DEPRECATED: argument ignored. + timestamp: + DEPRECATED: argument ignored. + + Attributes + ---------- + metadata: + Metadata associated with the list. + """ + + metadata: metadata_base.DataMetadata + + @deprecate.arguments('source', 'timestamp', 'check', message="argument ignored") + def __init__(self, iterable: typing.Iterable = (), metadata: typing.Dict[str, typing.Any] = None, *, + generate_metadata: bool = False, check: bool = True, source: typing.Any = None, + timestamp: datetime.datetime = None) -> None: + if isinstance(iterable, pandas.DataFrame): + super().__init__(type(self)(row) for row in iterable.itertuples(index=False, name=None)) + else: + if isinstance(iterable, numpy.matrix): + # One cannot iterate over a matrix segment by segment. You always get back + # a matrix (2D structure) and not an array of rows or columns. By converting + # it to an array such iteration segment by segment works. + iterable = numpy.array(iterable) + super().__init__(iterable) + + from d3m import types + + if isinstance(iterable, types.Container): + if isinstance(iterable, List): + # We made a copy, so we do not have to generate metadata. + self.metadata: metadata_base.DataMetadata = iterable.metadata + else: + self.metadata: metadata_base.DataMetadata = iterable.metadata + if generate_metadata: + self.metadata = self.metadata.generate(self) + + if metadata is not None: + self.metadata: metadata_base.DataMetadata = self.metadata.update((), metadata) + else: + self.metadata: metadata_base.DataMetadata = metadata_base.DataMetadata(metadata) + if generate_metadata: + self.metadata = self.metadata.generate(self) + + def copy(self: L) -> L: + # Metadata is copied from provided iterable. + return type(self)(iterable=self) + + @typing.overload # type: ignore + def __getitem__(self, i: int) -> typing.Any: + ... + + def __getitem__(self: L, s: slice) -> L: # type: ignore + if isinstance(s, slice): + lst = type(self)(iterable=super().__getitem__(s)) + # TODO: We could do a slice in metadata as well? + # Update dimensions. Slice per-element metadata. + lst.metadata = self.metadata + return lst + else: + return super().__getitem__(s) + + def __add__(self: L, x: typing.List) -> L: + lst = type(self)(iterable=super().__add__(x)) + # TODO: We could do add in metadata as well? + # Update dimensions. Maybe x is List and has metadata. + # What to do if both have conflicting ALL_ELEMENTS metadata? + lst.metadata = self.metadata + return lst + + def __iadd__(self: L, x: typing.Iterable) -> L: + super().__iadd__(x) + # TODO: We could do add in metadata as well? + # Update dimensions. Maybe x is List and has metadata. + # What to do if both have conflicting ALL_ELEMENTS metadata? + return self + + def __mul__(self: L, n: int) -> L: + lst = type(self)(iterable=super().__mul__(n)) + # TODO: We could do multiply in metadata as well? + # Update dimensions. Multiplicate per-element metadata. + lst.metadata = self.metadata + return lst + + def __rmul__(self: L, n: int) -> L: + lst = type(self)(iterable=super().__rmul__(n)) + # TODO: We could do multiply in metadata as well? + # Update dimensions. Multiplicate per-element metadata. + lst.metadata = self.metadata + return lst + + def __setstate__(self, state: dict) -> None: + self.__dict__ = state + + def __reduce__(self) -> typing.Tuple[typing.Callable, typing.Tuple, dict]: + reduced = super().__reduce__() + return reduced + + +def list_serializer(obj: List) -> dict: + data = { + 'metadata': obj.metadata, + 'list': list(obj), + } + + if type(obj) is not List: + data['type'] = type(obj) + + return data + + +def list_deserializer(data: dict) -> List: + data_list = data.get('type', List)(data['list']) + data_list.metadata = data['metadata'] + return data_list + + +if pyarrow_lib is not None: + pyarrow_lib._default_serialization_context.register_type( + List, 'd3m.list', + custom_serializer=list_serializer, + custom_deserializer=list_deserializer, + ) diff --git a/d3m/d3m/container/numpy.py b/d3m/d3m/container/numpy.py new file mode 100644 index 0000000..bf75f77 --- /dev/null +++ b/d3m/d3m/container/numpy.py @@ -0,0 +1,128 @@ +import datetime +import typing + +import numpy # type: ignore + +from d3m import deprecate +from d3m.metadata import base as metadata_base + +# See: https://gitlab.com/datadrivendiscovery/d3m/issues/66 +try: + from pyarrow import lib as pyarrow_lib # type: ignore +except ModuleNotFoundError: + pyarrow_lib = None + +__all__ = ('ndarray',) + +# This implementation is based on these guidelines: +# https://docs.scipy.org/doc/numpy-1.13.0/user/basics.subclassing.html + +N = typing.TypeVar('N', bound='ndarray') + + +# TODO: We could implement also __array_ufunc__ and adapt metadata as well after in-place changes to data? +class ndarray(numpy.ndarray): + """ + Extended `numpy.ndarray` with the ``metadata`` attribute. + + Parameters + ---------- + input_array: + Anything array-like to create an instance from. Including lists and standard numpy arrays. + metadata: + Optional initial metadata for the top-level of the array, or top-level metadata to be updated + if ``input_array`` is another instance of this array class. + generate_metadata: + Automatically generate and update the metadata. + check: + DEPRECATED: argument ignored. + source: + DEPRECATED: argument ignored. + timestamp: + DEPRECATED: argument ignored. + + Attributes + ---------- + metadata: + Metadata associated with the array. + """ + + metadata: metadata_base.DataMetadata + + @deprecate.arguments('source', 'timestamp', 'check', message="argument ignored") + def __new__(cls: typing.Type[N], input_array: typing.Sequence, metadata: typing.Dict[str, typing.Any] = None, *, + generate_metadata: bool = False, check: bool = True, source: typing.Any = None, timestamp: datetime.datetime = None) -> N: + array = numpy.asarray(input_array).view(cls) + + # Importing here to prevent import cycle. + from d3m import types + + if isinstance(input_array, types.Container): + if isinstance(input_array, ndarray): + # We made a copy, so we do not have to generate metadata. + array.metadata = input_array.metadata # type: ignore + else: + array.metadata = input_array.metadata + if generate_metadata: + array.metadata = array.metadata.generate(array) + + if metadata is not None: + array.metadata = array.metadata.update((), metadata) + else: + array.metadata = metadata_base.DataMetadata(metadata) + if generate_metadata: + array.metadata = array.metadata.generate(array) + + return array + + def __array_finalize__(self, obj: typing.Any) -> None: + # If metadata attribute already exists. + if hasattr(self, 'metadata'): + return + + if obj is not None and isinstance(obj, ndarray) and hasattr(obj, 'metadata'): + # TODO: We could adapt (if this is after a slice) metadata instead of just copying? + self.metadata: metadata_base.DataMetadata = obj.metadata + else: + self.metadata = metadata_base.DataMetadata() + + def __reduce__(self) -> typing.Tuple: + reduced = list(super().__reduce__()) + + reduced[2] = { + 'numpy': reduced[2], + 'metadata': self.metadata, + } + + return tuple(reduced) + + def __setstate__(self, state: dict) -> None: + super().__setstate__(state['numpy']) + + self.metadata = state['metadata'] + + +def ndarray_serializer(obj: ndarray) -> dict: + data = { + 'metadata': obj.metadata, + 'numpy': obj.view(numpy.ndarray), + } + + if type(obj) is not ndarray: + data['type'] = type(obj) + + return data + + +def ndarray_deserializer(data: dict) -> ndarray: + array = data['numpy'].view(data.get('type', ndarray)) + array.metadata = data['metadata'] + return array + + +if pyarrow_lib is not None: + pyarrow_lib._default_serialization_context.register_type( + ndarray, 'd3m.ndarray', + custom_serializer=ndarray_serializer, + custom_deserializer=ndarray_deserializer, + ) diff --git a/d3m/d3m/container/pandas.py b/d3m/d3m/container/pandas.py new file mode 100644 index 0000000..e36eff7 --- /dev/null +++ b/d3m/d3m/container/pandas.py @@ -0,0 +1,495 @@ +import copy as copy_module +import datetime +import logging +import typing + +import numpy # type: ignore +import pandas # type: ignore +from pandas.core.dtypes import common as pandas_common # type: ignore + +from . import list as container_list +from d3m import deprecate, exceptions +from d3m.metadata import base as metadata_base + +# See: https://gitlab.com/datadrivendiscovery/d3m/issues/66 +try: + from pyarrow import lib as pyarrow_lib # type: ignore +except ModuleNotFoundError: + pyarrow_lib = None + +__all__ = ('DataFrame',) + +logger = logging.getLogger(__name__) + +# This implementation is based on these guidelines: +# https://pandas.pydata.org/pandas-docs/stable/internals.html#subclassing-pandas-data-structures + +D = typing.TypeVar('D', bound='DataFrame') + +Data = typing.Union[typing.Sequence, typing.Mapping] + + +# We have to convert our container "List" to regular list because Pandas do not accept list +# subclasses. See: https://github.com/pandas-dev/pandas/issues/21226 +def convert_lists(data: Data = None) -> typing.Optional[Data]: + if isinstance(data, list) and len(data): + if isinstance(data, container_list.List): + data = list(data) + if isinstance(data, list) and isinstance(data[0], container_list.List): + data = [list(row) for row in data] + + return data + + +def convert_ndarray(data: Data = None) -> typing.Optional[Data]: + """ + If ndarray has more than 2 dimensions, deeper dimensions are converted to stand-alone numpy arrays. + """ + + if isinstance(data, numpy.ndarray) and len(data.shape) > 2: + outer_array = numpy.ndarray(shape=(data.shape[0], data.shape[1]), dtype=numpy.object) + for i in range(data.shape[0]): + for j in range(data.shape[1]): + # This retains the type, so if "data" is a container "ndarray", then also "data[i, j]" is. + outer_array[i, j] = data[i, j] + + return outer_array + + return data + + +class DataFrame(pandas.DataFrame): + """ + Extended `pandas.DataFrame` with the ``metadata`` attribute. + + Parameters + ---------- + data: + Anything array-like to create an instance from. + metadata: + Optional initial metadata for the top-level of the data frame, or top-level metadata to be updated + if ``data`` is another instance of this data frame class. + index: + Index to use for resulting frame. + columns: + Column labels to use for resulting frame. + dtype: + Data type to force. + copy: + Copy data from inputs. + generate_metadata: + Automatically generate and update the metadata. + check: + DEPRECATED: argument ignored. + source: + DEPRECATED: argument ignored. + timestamp: + DEPRECATED: argument ignored. + + Attributes + ---------- + metadata: + Metadata associated with the data frame. + """ + + metadata: metadata_base.DataMetadata + + # Reversed properties. + _metadata = ['metadata'] + + @property + def _constructor(self) -> type: + return DataFrame + + @deprecate.arguments('source', 'timestamp', 'check', message="argument ignored") + def __init__(self, data: Data = None, metadata: typing.Dict[str, typing.Any] = None, index: typing.Union[pandas.Index, Data] = None, + columns: typing.Union[pandas.Index, Data] = None, dtype: typing.Union[numpy.dtype, str, pandas_common.ExtensionDtype] = None, copy: bool = False, *, + generate_metadata: bool = False, check: bool = True, source: typing.Any = None, timestamp: datetime.datetime = None) -> None: + # If not a constructor call to this exact class, then a child constructor + # is responsible to call a pandas constructor. + if type(self) is DataFrame: + pandas.DataFrame.__init__(self, data=convert_ndarray(convert_lists(data)), index=index, columns=columns, dtype=dtype, copy=copy) + + # Importing here to prevent import cycle. + from d3m import types + + if isinstance(data, types.Container): # type: ignore + if isinstance(data, DataFrame): + # We made a copy, so we do not have to generate metadata. + self.metadata: metadata_base.DataMetadata = data.metadata + else: + self.metadata: metadata_base.DataMetadata = data.metadata + if generate_metadata: + self.metadata = self.metadata.generate(self) + + if metadata is not None: + self.metadata: metadata_base.DataMetadata = self.metadata.update((), metadata) + else: + self.metadata: metadata_base.DataMetadata = metadata_base.DataMetadata(metadata) + if generate_metadata: + self.metadata = self.metadata.generate(self) + + def __finalize__(self: D, other: typing.Any, method: str = None, **kwargs: typing.Any) -> D: + self = super().__finalize__(other, method, **kwargs) + + # Merge operation: using metadata of the left object. + if method == 'merge': + obj = other.left + # Concat operation: using metadata of the first object. + elif method == 'concat': + obj = other.objs[0] + else: + obj = other + + if isinstance(obj, DataFrame): + # TODO: We could adapt (if this is after a slice) metadata instead of just copying? + self.metadata: metadata_base.DataMetadata = obj.metadata + # "metadata" attribute should already be set in "__init__", + # but if we got here without it, let's set it now. + elif not hasattr(self, 'metadata'): + self.metadata: metadata_base.DataMetadata = metadata_base.DataMetadata() + + return self + + def __getstate__(self) -> dict: + state = super().__getstate__() + + state['metadata'] = self.metadata + + return state + + def __setstate__(self, state: dict) -> None: + super().__setstate__(state) + + self.metadata = state['metadata'] + + def to_csv(self, path_or_buf: typing.Union[typing.IO[typing.Any], str] = None, sep: str = ',', na_rep: str = '', + float_format: str = None, columns: typing.Sequence = None, header: typing.Union[bool, typing.Sequence[str]] = True, + index: bool = False, **kwargs: typing.Any) -> typing.Optional[str]: + """ + Extends `pandas.DataFrame` to provide better default method for writing DataFrames to CSV files. + If ``header`` argument is not explicitly provided column names are derived from metadata of the DataFrame. + By default DataFrame indices are not written. + + See Also + -------- + `pandas.DataFrame.to_csv `_ + + Parameters + ---------- + path_or_buf: + File path or object, if None is provided the result is returned as a string. + sep: + String of length 1. Field delimiter for the output file. + na_rep: + Missing data representation. + float_format: + Format string for floating point numbers. + columns: + Columns to write. + header: + Write out the column names. If a list of strings is given it is assumed to be aliases for the column names. + index: + Write row names (index). + kwargs: + Other arguments. + """ + + if header is True: + header = [] + for column_index in range(len(self.columns)): + # We use column name from the DataFrame if metadata does not have it. This allows a bit more compatibility. + header.append(self.metadata.query_column(column_index).get('name', self.columns[column_index])) + + result = super().to_csv(path_or_buf=path_or_buf, sep=sep, na_rep=na_rep, float_format=float_format, columns=columns, header=header, index=index, **kwargs) + + # Make sure handles are flushed so that no data is lost when used with CLI file handles. + # CLI file handles are generally used outside of a context manager which would otherwise + # handle that. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/436 + if hasattr(path_or_buf, 'flush') and not getattr(path_or_buf, 'closed', False): + typing.cast(typing.IO, path_or_buf).flush() + + return result + + def select_columns(self: D, columns: typing.Sequence[metadata_base.SimpleSelectorSegment], *, allow_empty_columns: bool = False) -> D: + """ + Returns a new DataFrame with data and metadata only for given ``columns``. + Moreover, columns are renumbered based on the position in ``columns`` list. + Top-level metadata stays unchanged, except for updating the length of the columns dimension to + the number of columns. + + So if the ``columns`` is ``[3, 6, 5]`` then output DataFrame will have three columns, ``[0, 1, 2]``, + mapping data and metadata for columns ``3`` to ``0``, ``6`` to ``1`` and ``5`` to ``2``. + + This allows also duplication of columns. + """ + + if not columns and not allow_empty_columns: + raise exceptions.InvalidArgumentValueError("No columns selected.") + + output = self.iloc[:, list(columns)] + + # We want to make sure it is a true copy. + if output._is_view: + output = output.copy() + else: + output._set_is_copy(copy=False) + + output.metadata = self.metadata.select_columns(columns, allow_empty_columns=allow_empty_columns) + + return output + + def remove_columns(self: D, column_indices: typing.Sequence[int]) -> D: + """ + Removes columns from the DataFrame and returns one without them, together with all + metadata for columns removed as well. + + It throws an exception if no columns would be left after removing columns. + """ + + # We are not using "drop" because we are dropping by the column index (to support columns with same name). + + columns = list(range(self.shape[1])) + + if not columns: + raise ValueError("No columns to remove.") + + for column_index in column_indices: + columns.remove(column_index) + + if not columns: + raise ValueError("Removing columns would have removed the last column.") + + output = self.iloc[:, list(columns)] + + # We want to make sure it is a true copy. + if output._is_view: + output = output.copy() + else: + output._set_is_copy(copy=False) + + output.metadata = self.metadata.select_columns(columns) + + return output + + def append_columns(self: D, right: 'DataFrame', *, use_right_metadata: bool = False) -> D: + """ + Appends all columns from ``right`` to the right of this DataFrame, together with all metadata + of columns. + + Metadata at the top-level of ``right`` DataFrame is ignored, not merged, except if ``use_right_metadata`` + is set, in which case top-level metadata of this DataFrame is ignored and one from ``right`` is + used instead. + """ + + outputs = pandas.concat([self, right], axis=1) + outputs.metadata = self.metadata + + outputs.metadata = outputs.metadata.append_columns(right.metadata, use_right_metadata=use_right_metadata) + + return outputs + + def insert_columns(self: D, columns: 'DataFrame', at_column_index: int) -> D: + """ + Inserts all columns from ``columns`` before ``at_column_index`` column in this DataFrame, + pushing all existing columns to the right. + + E.g., ``at_column_index == 0`` means inserting ``columns`` at the beginning of this DataFrame. + + Top-level metadata of ``columns`` is ignored. + """ + + columns_length = self.shape[1] + + if at_column_index < 0: + raise exceptions.InvalidArgumentValueError("\"at_column_index\" is smaller than 0.") + if at_column_index > columns_length: + raise exceptions.InvalidArgumentValueError("\"at_column_index\" is larger than the range of existing columns.") + + if at_column_index == 0: + return columns.append_columns(self, use_right_metadata=True) + + if at_column_index == columns_length: + return self.append_columns(columns) + + # TODO: This could probably be optimized without all the slicing and joining. + + before = self.select_columns(list(range(0, at_column_index))) + after = self.select_columns(list(range(at_column_index, columns_length))) + + return before.append_columns(columns).append_columns(after) + + def _replace_column(self: D, column_index: int, columns: 'DataFrame', columns_column_index: int) -> D: + # We do not use "self.iloc[:, column_index] = columns.iloc[:, columns_column_index]" + # but use the following as a workaround. + # See: https://github.com/pandas-dev/pandas/issues/22036 + # "self.iloc[:, [column_index]] = columns.iloc[:, [columns_column_index]]" does not work either. + # See: https://github.com/pandas-dev/pandas/issues/22046 + output = pandas.concat([self.iloc[:, 0:column_index], columns.iloc[:, [columns_column_index]], self.iloc[:, column_index + 1:]], axis=1) + output.metadata = output.metadata._replace_column(column_index, columns.metadata, columns_column_index) + return output + + def replace_columns(self: D, columns: 'DataFrame', column_indices: typing.Sequence[int], *, copy: bool = True) -> D: + """ + Replaces columns listed in ``column_indices`` with ``columns``, in order, in this DataFrame. + + ``column_indices`` and ``columns`` do not have to match in number of columns. Columns are first + replaced in order for matching indices and columns. If then there are more ``column_indices`` than + ``columns``, additional ``column_indices`` columns are removed. If there are more ``columns`` than + ``column_indices`` columns, then additional ``columns`` are inserted after the last replaced column. + + If ``column_indices`` is empty, then the behavior is equivalent to calling ``append_columns``. + + Top-level metadata of ``columns`` is ignored. + """ + + # TODO: This could probably be optimized without all the slicing and joining. + + if not column_indices: + return self.append_columns(columns) + + if copy: + # We have to copy because "_replace" is modifying data in-place. + outputs = copy_module.copy(self) + else: + outputs = self + + columns_length = columns.shape[1] + columns_to_remove = [] + i = 0 + + # This loop will run always at least once, so "column_index" will be set. + while i < len(column_indices): + column_index = column_indices[i] + + if i < columns_length: + outputs = outputs._replace_column(column_index, columns, i) + else: + # If there are more column indices than columns in "columns", we + # select additional columns for removal. + columns_to_remove.append(column_index) + + i += 1 + + # When there are less column indices than columns in "columns", we insert the rest after + # the last replaced column. + if i < columns_length: + columns = columns.select_columns(list(range(i, columns_length))) + # "column_index" points to the last place we inserted a column, so "+ 1" points after it. + outputs = outputs.insert_columns(columns, column_index + 1) + + # We remove columns at the end so that we do not break and column index used before. + # When removing columns, column indices shift. + if columns_to_remove: + outputs = outputs.remove_columns(columns_to_remove) + + return outputs + + def _sort_right_indices(self: 'DataFrame', right: D, indices: typing.Sequence[int], right_indices: typing.Sequence[int]) -> D: + # We try to handle different cases. + + # We do not do anything special. We assume both indices are the same. + if len(indices) == 1 and len(right_indices) == 1: + # TODO: Handle the case when not all index values exist and "reindex" fills values in: we should fill with NA relevant to the column type. + return right.set_index(right.iloc[:, right_indices[0]]).reindex(self.iloc[:, indices[0]]).reset_index(drop=True) + + index_names = [self.metadata.query_column(index).get('name', None) for index in indices] + right_index_names = [right.metadata.query_column(right_index).get('name', None) for right_index in right_indices] + + index_series = [self.iloc[:, index] for index in indices] + right_index_series = [right.iloc[:, right_index] for right_index in right_indices] + + # Number match, names match, order match, things look good. + if index_names == right_index_names: + # We know the length is larger than 1 because otherwise the first case would match. + assert len(indices) > 1 + assert len(indices) == len(right_indices) + + # TODO: Handle the case when not all index values exist and "reindex" fills values in: we should fill with NA relevant to the column type. + return right.set_index(right_index_series).reindex(index_series).reset_index(drop=True) + + sorted_index_names = sorted(index_names) + sorted_right_index_names = sorted(right_index_names) + + # Number and names match, but not the order. + if sorted_index_names == sorted_right_index_names: + # We know the length is larger than 1 because otherwise the first case would match. + assert len(indices) > 1 + assert len(indices) == len(right_indices) + + # We sort index series to be in the sorted order based on index names. + index_series = [s for _, s in sorted(zip(index_names, index_series), key=lambda pair: pair[0])] + right_index_series = [s for _, s in sorted(zip(right_index_names, right_index_series), key=lambda pair: pair[0])] + + # TODO: Handle the case when not all index values exist and "reindex" fills values in: we should fill with NA relevant to the column type. + return right.set_index(right_index_series).reindex(index_series).reset_index(drop=True) + + if len(index_series) == len(right_index_series): + # We know the length is larger than 1 because otherwise the first case would match. + assert len(indices) > 1 + + logger.warning("Primary indices both on left and right not have same names, but they do match in number.") + + # TODO: Handle the case when not all index values exist and "reindex" fills values in: we should fill with NA relevant to the column type. + return right.set_index(right_index_series).reindex(index_series).reset_index(drop=True) + + # It might be that there are duplicate columns on either or even both sides, + # but that should be resolved by adding a primitive to remove duplicate columns first. + raise ValueError("Left and right primary indices do not match in number.") + + def horizontal_concat(self: D, right: D, *, use_index: bool = True, remove_second_index: bool = True, use_right_metadata: bool = False) -> D: + """ + Similar to ``append_columns``, but it respects primary index columns, by default. + + It has some heuristics how it tries to match up primary index columns in the case that there are + multiple of them, but generally it aligns samples by all primary index columns. + + It is required that both inputs have the same number of samples. + """ + + self.metadata._check_same_number_of_samples(right.metadata) + + left_indices = self.metadata.get_index_columns() + right_indices = right.metadata.get_index_columns() + + if left_indices and right_indices: + if use_index: + old_right_metadata = right.metadata + right = self._sort_right_indices(right, left_indices, right_indices) + # TODO: Reorder metadata rows as well. + # This should be relatively easy because we can just modify + # "right.metadata._current_metadata.metadata" map. + right.metadata = old_right_metadata + + # Removing second primary key columns. + if remove_second_index: + right = right.remove_columns(right_indices) + + return self.append_columns(right, use_right_metadata=use_right_metadata) + + +def dataframe_serializer(obj: DataFrame) -> dict: + data = { + 'metadata': obj.metadata, + 'pandas': pandas.DataFrame(obj), + } + + if type(obj) is not DataFrame: + data['type'] = type(obj) + + return data + + +def dataframe_deserializer(data: dict) -> DataFrame: + df = data.get('type', DataFrame)(data['pandas']) + df.metadata = data['metadata'] + return df + + +if pyarrow_lib is not None: + pyarrow_lib._default_serialization_context.register_type( + DataFrame, 'd3m.dataframe', + custom_serializer=dataframe_serializer, + custom_deserializer=dataframe_deserializer, + ) diff --git a/d3m/d3m/container/utils.py b/d3m/d3m/container/utils.py new file mode 100644 index 0000000..989ec59 --- /dev/null +++ b/d3m/d3m/container/utils.py @@ -0,0 +1,50 @@ +import uuid +import os +import json +import typing + +from d3m import container as container_module, exceptions, utils +from d3m.container import dataset as dataset_module + + +def save_container(container: typing.Any, output_dir: str) -> None: + # Saving data. + if isinstance(container, container_module.Dataset): + dataset_root_metadata = container.metadata.query(()) + + missing_metadata: typing.Dict = {} + for d3m_path, (dataset_path, required) in dataset_module.D3M_TO_DATASET_FIELDS.items(): + if not required: + continue + + if utils.get_dict_path(dataset_root_metadata, dataset_path) is None: + # TODO: Use some better value instead of this random value? + utils.set_dict_path(missing_metadata, dataset_path, str(uuid.uuid4())) + + if missing_metadata: + container = container.copy() + container.metadata = container.metadata.update((), missing_metadata) + + # Dataset saver creates any missing directories. + dataset_uri = 'file://{dataset_path}'.format(dataset_path=os.path.abspath(os.path.join(output_dir, 'datasetDoc.json'))) + container.save(dataset_uri) + else: + # We do not want to override anything. + os.makedirs(output_dir, exist_ok=False) + dataframe_path = os.path.join(output_dir, 'data.csv') + + if isinstance(container, container_module.DataFrame): + container.to_csv(dataframe_path) + elif isinstance(container, (container_module.List, container_module.ndarray)): + container = container_module.DataFrame(container) + container.to_csv(dataframe_path) + else: + raise exceptions.NotSupportedError("Value with type '{value_type}' cannot be saved as a container type.".format(value_type=type(container))) + + # Saving metadata. This is just for debugging purposes, so we are + # using "to_json_structure" and not "to_internal_json_structure". + input_metadata = container.metadata.to_json_structure() + metadata_path = os.path.join(output_dir, 'metadata.json') + + with open(metadata_path, 'w') as outfile: + json.dump(input_metadata, outfile, indent=2, sort_keys=True, allow_nan=False) diff --git a/d3m/d3m/contrib/__init__.py b/d3m/d3m/contrib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/d3m/d3m/contrib/pipelines/f596cd77-25f8-4d4c-a350-bb30ab1e58f6.yml b/d3m/d3m/contrib/pipelines/f596cd77-25f8-4d4c-a350-bb30ab1e58f6.yml new file mode 100644 index 0000000..e95ecd5 --- /dev/null +++ b/d3m/d3m/contrib/pipelines/f596cd77-25f8-4d4c-a350-bb30ab1e58f6.yml @@ -0,0 +1,31 @@ +id: f596cd77-25f8-4d4c-a350-bb30ab1e58f6 +schema: https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json +source: + name: Mitar +created: "2020-04-18T11:42:44.138742Z" +name: Scoring pipeline +description: |- + A general scoring pipeline. +inputs: + - name: predictions + - name: score dataset +outputs: + - name: scores + data: steps.0.produce +steps: + # Step 0. + - type: PRIMITIVE + primitive: + id: 799802fb-2e11-4ab7-9c5e-dda09eb52a70 + version: 0.5.0 + python_path: d3m.primitives.evaluation.compute_scores.Core + name: Compute scores given the metrics to use + arguments: + inputs: + type: CONTAINER + data: inputs.0 + score_dataset: + type: CONTAINER + data: inputs.1 + outputs: + - id: produce diff --git a/d3m/d3m/contrib/primitives/__init__.py b/d3m/d3m/contrib/primitives/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/d3m/d3m/contrib/primitives/compute_scores.py b/d3m/d3m/contrib/primitives/compute_scores.py new file mode 100644 index 0000000..e229769 --- /dev/null +++ b/d3m/d3m/contrib/primitives/compute_scores.py @@ -0,0 +1,369 @@ +import inspect +import os.path +import typing + +import pandas # type: ignore + +import d3m +from d3m import container, exceptions, metrics, utils as d3m_utils +from d3m.base import utils as base_utils +from d3m.metadata import base as metadata_base, hyperparams, problem +from d3m.primitive_interfaces import base, transformer + +__all__ = ('ComputeScoresPrimitive',) + +# Primitives needs an installation section so that digest is computed and available for the primitive. +if d3m.__version__[0].isdigit(): + installation = [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package': 'd3m', + 'version': d3m.__version__, + }] +else: + installation = [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/d3m.git@{git_commit}#egg=d3m'.format( + git_commit=d3m_utils.current_git_commit(os.path.dirname(__file__)), + ), + }] + +Inputs = container.DataFrame +Outputs = container.DataFrame + + +class MetricsHyperparams(hyperparams.Hyperparams, set_names=False): + metric = hyperparams.Enumeration( + values=[metric.name for metric in problem.PerformanceMetric], + # Default is ignored. + # TODO: Remove default. See: https://gitlab.com/datadrivendiscovery/d3m/issues/141 + default='ACCURACY', + ) + pos_label = hyperparams.Hyperparameter[typing.Union[str, None]](None) + k = hyperparams.Hyperparameter[typing.Union[int, None]](None) + + +class AllLabelsHyperparams(hyperparams.Hyperparams, set_names=False): + # Default is ignored. + # TODO: Remove default. See: https://gitlab.com/datadrivendiscovery/d3m/issues/141 + column_name = hyperparams.Hyperparameter[str]('') + labels = hyperparams.Set( + # Default is ignored. + # TODO: Remove default. See: https://gitlab.com/datadrivendiscovery/d3m/issues/141 + elements=hyperparams.Hyperparameter[str](''), + default=(), + ) + + +class Hyperparams(hyperparams.Hyperparams): + metrics = hyperparams.Set( + elements=MetricsHyperparams, + default=(), + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="A set of metrics to compute.", + ) + all_labels = hyperparams.Set( + elements=AllLabelsHyperparams, + default=(), + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="All labels available in a dataset, per target column. When provided for a target column, it overrides all labels from metadata or data for that target column.", + ) + add_normalized_scores = hyperparams.UniformBool( + default=True, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Add additional column with normalized scores?" + ) + + +class ComputeScoresPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + A primitive that takes a DataFrame with predictions and a scoring Dataset (test split with + target values present), and computes scores for given metrics and outputs them as a DataFrame. + + It searches only the dataset entry point resource for target columns + (which should be marked with ``https://metadata.datadrivendiscovery.org/types/TrueTarget`` + semantic type) in the scoring Dataset. + + Primitive does not align rows between truth DataFrame and predictions DataFrame, it + is expected that metric code does that if necessary. Similarly, it does not align + columns order either. + + It uses metadata to construct the truth DataFrame and renames the index column to match + the standard names ``d3mIndex``. It encodes any float vectors as strings. + + For predictions DataFrame it expects that it is already structured correctly with correct + column names and it leaves to metric code to validate that truth DataFrame and predictions + DataFrame match. It does not use or expect metadata on predictions DataFrame. Predictions + DataFrame should already have float vectors encoded as strings. + """ + + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = metadata_base.PrimitiveMetadata( + { + 'id': '799802fb-2e11-4ab7-9c5e-dda09eb52a70', + 'version': '0.5.0', + 'name': "Compute scores given the metrics to use", + 'python_path': 'd3m.primitives.evaluation.compute_scores.Core', + 'source': { + 'name': d3m.__author__, + 'contact': 'mailto:mitar.d3m@tnode.com', + 'uris': [ + 'https://gitlab.com/datadrivendiscovery/d3m/blob/master/d3m/contrib/primitives/compute_scores.py', + 'https://gitlab.com/datadrivendiscovery/d3m.git', + ], + }, + 'installation': installation, + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.ACCURACY_SCORE, + metadata_base.PrimitiveAlgorithmType.F1_SCORE, + ], + 'primitive_family': metadata_base.PrimitiveFamily.EVALUATION, + }, + ) + + def produce( # type: ignore + self, *, inputs: Inputs, score_dataset: container.Dataset, timeout: float = None, + iterations: int = None, + ) -> base.CallResult[Outputs]: + if not self.hyperparams['metrics']: + raise ValueError("\"metrics\" hyper-parameter cannot be empty.") + + truth, all_labels = self._get_truth(score_dataset) + predictions = self._get_predictions(inputs) + + for target_column in self.hyperparams['all_labels']: + all_labels[target_column['column_name']] = list(target_column['labels']) + + outputs: typing.Dict[str, typing.List] = { + 'metric': [], + 'value': [], + } + + if self.hyperparams['add_normalized_scores']: + outputs['normalized'] = [] + + for metric_configuration in self.hyperparams['metrics']: + metric = problem.PerformanceMetric[metric_configuration['metric']] + metric_class = metric.get_class() + + params = {} + + if 'all_labels' in inspect.signature(metric_class).parameters and all_labels: + params['all_labels'] = all_labels + + for param_name, param_value in metric_configuration.items(): + if param_name == 'metric': + continue + if param_value is None: + continue + params[param_name] = param_value + + if metric.requires_confidence() and metrics.CONFIDENCE_COLUMN not in predictions.columns: + raise exceptions.InvalidArgumentValueError( + f"Metric {metric.name} requires confidence column in predictions, but it is not available.", + ) + if metric.requires_rank() and metrics.RANK_COLUMN not in predictions.columns: + raise exceptions.InvalidArgumentValueError( + f"Metric {metric.name} requires rank column in predictions, but it is not available.", + ) + + score = metric_class(**params).score(truth, predictions) + + outputs['metric'].append(metric.name) + outputs['value'].append(score) + + if self.hyperparams['add_normalized_scores']: + outputs['normalized'].append(metric.normalize(score)) + + # Dictionary key order is preserved in Python 3.6+ which makes column order as we want it. + results = container.DataFrame(data=outputs, columns=list(outputs.keys()), generate_metadata=True) + + # Not really necessary, but it does not hurt. In theory somebody could list same metric multiple times + # (maybe with different params), so we use "PrimaryMultiKey" here. + results.metadata = results.metadata.add_semantic_type( + (metadata_base.ALL_ELEMENTS, 0), + 'https://metadata.datadrivendiscovery.org/types/PrimaryMultiKey', + ) + results.metadata = results.metadata.add_semantic_type( + (metadata_base.ALL_ELEMENTS, 1), + 'https://metadata.datadrivendiscovery.org/types/Score', + ) + if self.hyperparams['add_normalized_scores']: + results.metadata = results.metadata.add_semantic_type( + (metadata_base.ALL_ELEMENTS, 2), + 'https://metadata.datadrivendiscovery.org/types/Score', + ) + + return base.CallResult(results) + + def multi_produce( # type: ignore + self, *, produce_methods: typing.Sequence[str], inputs: Inputs, + score_dataset: container.Dataset, timeout: float = None, iterations: int = None, + ) -> base.MultiCallResult: + return self._multi_produce( + produce_methods=produce_methods, timeout=timeout, iterations=iterations, + inputs=inputs, score_dataset=score_dataset, + ) + + def fit_multi_produce( # type: ignore + self, *, produce_methods: typing.Sequence[str], inputs: Inputs, + score_dataset: container.Dataset, timeout: float = None, iterations: int = None + ) -> base.MultiCallResult: + return self._fit_multi_produce( + produce_methods=produce_methods, timeout=timeout, iterations=iterations, + inputs=inputs, score_dataset=score_dataset, + ) + + # TODO: Instead of extracting true targets only from the dataset entry point, first denormalize and then extract true targets. + def _get_truth(self, score_dataset: container.Dataset) -> typing.Tuple[pandas.DataFrame, typing.Dict[str, typing.Any]]: + """ + Extracts true targets from the Dataset's entry point, or the only tabular resource. + It requires that there is only one primary index column, which it makes the first + column, named ``d3mIndex``. Then true target columns follow. + + We return a regular Pandas DataFrame with column names matching those in the metadata, + and a dict mapping target columns to all label values in those columns, if available in metadata. + We convert all columns to strings to match what would be loaded from ``predictions.csv`` file. + It encodes any float vectors as strings. + """ + + main_resource_id, main_resource = base_utils.get_tabular_resource(score_dataset, None, has_hyperparameter=False) + + # We first copy before modifying in-place. + main_resource = container.DataFrame(main_resource, copy=True) + main_resource = self._encode_columns(main_resource) + + dataframe = self._to_dataframe(main_resource) + + indices = list(score_dataset.metadata.get_index_columns(at=(main_resource_id,))) + targets = list(score_dataset.metadata.list_columns_with_semantic_types( + ['https://metadata.datadrivendiscovery.org/types/TrueTarget'], + at=(main_resource_id,), + )) + + if not indices: + raise exceptions.InvalidArgumentValueError("No primary index column.") + elif len(indices) > 1: + raise exceptions.InvalidArgumentValueError("More than one primary index column.") + if not targets: + raise ValueError("No true target columns.") + + dataframe = dataframe.iloc[:, indices + targets] + + dataframe = dataframe.rename({dataframe.columns[0]: metrics.INDEX_COLUMN}) + + if metrics.CONFIDENCE_COLUMN in dataframe.columns[1:]: + raise ValueError("True target column cannot be named \"confidence\". It is a reserved name.") + if metrics.RANK_COLUMN in dataframe.columns[1:]: + raise ValueError("True target column cannot be named \"rank\". It is a reserved name.") + if metrics.INDEX_COLUMN in dataframe.columns[1:]: + raise ValueError("True target column cannot be named \"d3mIndex\". It is a reserved name.") + + if d3m_utils.has_duplicates(dataframe.columns): + duplicate_names = list(dataframe.columns) + for name in set(dataframe.columns): + duplicate_names.remove(name) + raise exceptions.InvalidArgumentValueError( + "True target columns have duplicate names: {duplicate_names}".format( + duplicate_names=sorted(set(duplicate_names)), + ), + ) + + all_labels = {} + + for target_column_name, main_resource_column_index in zip(dataframe.columns[1:], targets): + try: + column_labels = score_dataset.metadata.query_column_field(main_resource_column_index, 'all_distinct_values', at=(main_resource_id,)) + except KeyError: + continue + + all_labels[target_column_name] = [str(label) for label in column_labels] + + return dataframe, all_labels + + def _get_predictions(self, inputs: Inputs) -> pandas.DataFrame: + """ + It requires that predictions already have the right structure (one ``d3mIndex`` + column, at most one ``confidence`` column, at most one ``rank`` column, + no duplicate column names). + + We return a regular Pandas DataFrame with column names matching those in the metadata. + We convert all columns to strings to match what would be loaded from ``predictions.csv`` file. + Predictions DataFrame should already have float vectors encoded as strings. + """ + + dataframe = self._to_dataframe(inputs) + + if metrics.INDEX_COLUMN not in dataframe.columns: + raise exceptions.InvalidArgumentValueError("No primary index column.") + + if d3m_utils.has_duplicates(dataframe.columns): + duplicate_names = list(dataframe.columns) + for name in set(dataframe.columns): + duplicate_names.remove(name) + raise exceptions.InvalidArgumentValueError( + "Predicted target columns have duplicate names: {duplicate_names}".format( + duplicate_names=sorted(set(duplicate_names)), + ), + ) + + return dataframe + + def _to_dataframe(self, inputs: container.DataFrame) -> pandas.DataFrame: + # We have to copy, otherwise setting "columns" modifies original DataFrame as well. + dataframe = pandas.DataFrame(inputs, copy=True) + + column_names = [] + for column_index in range(len(inputs.columns)): + column_names.append(inputs.metadata.query_column(column_index).get('name', inputs.columns[column_index])) + + # Make sure column names are correct. + dataframe.columns = column_names + + # Convert all columns to string. + return dataframe.astype(str) + + @classmethod + def _encode_columns(cls, inputs: Outputs) -> Outputs: + """ + Encode numpy arrays of numbers into float vectors. + """ + + outputs = inputs + target_columns = outputs.metadata.list_columns_with_semantic_types( + ('https://metadata.datadrivendiscovery.org/types/PredictedTarget',), + ) + + for column_index in target_columns: + structural_type = outputs.metadata.query_column(column_index).get('structural_type', None) + + if structural_type is None: + continue + + if not issubclass(structural_type, container.ndarray): + continue + + new_column = [] + all_strings = True + for value in outputs.iloc[:, column_index]: + assert isinstance(value, container.ndarray) + + if value.ndim == 1: + new_column.append(','.join(str(v) for v in value)) + else: + all_strings = False + break + + if not all_strings: + continue + + outputs_metadata = outputs.metadata + outputs.iloc[:, column_index] = new_column + outputs.metadata = outputs_metadata.update_column(column_index, { + 'structural_type': str, + 'dimension': metadata_base.NO_VALUE, + }) + outputs.metadata = outputs.metadata.remove( + (metadata_base.ALL_ELEMENTS, column_index, metadata_base.ALL_ELEMENTS), + recursive=True, + ) + + return outputs diff --git a/d3m/d3m/deprecate.py b/d3m/d3m/deprecate.py new file mode 100644 index 0000000..375dbfb --- /dev/null +++ b/d3m/d3m/deprecate.py @@ -0,0 +1,143 @@ +import functools +import logging +import sys +import typing + +logger = logging.getLogger(__name__) + + +class Context(typing.NamedTuple): + function: typing.Optional[str] + argument: typing.Optional[str] + filename: str + module: str + lineno: int + + +def function(message: str = None) -> typing.Callable: + """ + A decorator which issues a warning if a wrapped function is called. + """ + + def decorator(f: typing.Callable) -> typing.Callable: + already_warned: typing.Set[Context] = set() + + @functools.wraps(f) + def wrapper(*args: typing.Any, **kwargs: typing.Any) -> typing.Any: + frame = sys._getframe(1) + try: + while frame: + # If function has multiple decorators, skip decorators as callers and find the real caller. + if frame.f_code.co_filename != __file__: + break + + frame = frame.f_back + + if not frame: + if message is None: + logger.warning( + "Calling a deprecated function '%(function)s'.", + { + 'function': f.__name__, + }, + ) + else: + logger.warning( + "Calling a deprecated function '%(function)s': %(message)s", + { + 'function': f.__name__, + 'message': message, + }, + ) + return f(*args, **kwargs) + + context = Context(f.__name__, None, frame.f_code.co_filename, frame.f_globals.get('__name__', None), frame.f_lineno) + + finally: + del frame + + if context in already_warned: + return f(*args, **kwargs) + already_warned.add(context) + + if message is None: + logger.warning("%(module)s: Calling a deprecated function '%(function)s' in '%(filename)s' at line %(lineno)s.", context._asdict()) + else: + logger.warning("%(module)s: Calling a deprecated function '%(function)s' in '%(filename)s' at line %(lineno)s: %(message)s", dict(context._asdict(), message=message)) + + return f(*args, **kwargs) + + return wrapper + + return decorator + + +def arguments(*deprecated_arguments: str, message: str = None) -> typing.Callable: + """ + A decorator which issues a warning if any of the ``deprecated_arguments`` is being + passed to the wrapped function. + """ + + def decorator(f: typing.Callable) -> typing.Callable: + already_warned: typing.Set[Context] = set() + + @functools.wraps(f) + def wrapper(*args: typing.Any, **kwargs: typing.Any) -> typing.Any: + for argument in deprecated_arguments: + if argument in kwargs: + frame = sys._getframe(1) + try: + while frame: + # If function has multiple decorators, skip decorators as callers and find the real caller. + if frame.f_code.co_filename != __file__: + break + + frame = frame.f_back + + if not frame: + if message is None: + logger.warning( + "Providing a deprecated argument '%(argument)s' to '%(function)s' function.", + { + 'argument': argument, + 'function': f.__name__, + }, + ) + else: + logger.warning( + "Providing a deprecated argument '%(argument)s' to '%(function)s' function: %(message)s", + { + 'argument': argument, + 'function': f.__name__, + 'message': message, + }, + ) + break + + context = Context(f.__name__, argument, frame.f_code.co_filename, frame.f_globals.get('__name__', None), frame.f_lineno) + + finally: + del frame + + if context in already_warned: + break + already_warned.add(context) + + if message is None: + logger.warning( + "%(module)s: Providing a deprecated argument '%(argument)s' to '%(function)s' function in '%(filename)s' at line %(lineno)s.", + context._asdict(), + ) + else: + logger.warning( + "%(module)s: Providing a deprecated argument '%(argument)s' to '%(function)s' function in '%(filename)s' at line %(lineno)s: %(message)s", + dict(context._asdict(), message=message), + ) + + break + + return f(*args, **kwargs) + + return wrapper + + return decorator diff --git a/d3m/d3m/environment_variables.py b/d3m/d3m/environment_variables.py new file mode 100644 index 0000000..f586667 --- /dev/null +++ b/d3m/d3m/environment_variables.py @@ -0,0 +1,22 @@ +# Environment variables describing runtime environment. +# From inside Docker container it is not really possible to obtain +# information about the Docker image used for the container. This +# is why we use environment variable to pass this information in. +# See descriptions of "base_docker_image" and "docker_image" metadata. +D3M_BASE_IMAGE_NAME = 'D3M_BASE_IMAGE_NAME' +D3M_BASE_IMAGE_DIGEST = 'D3M_BASE_IMAGE_DIGEST' +D3M_IMAGE_NAME = 'D3M_IMAGE_NAME' +D3M_IMAGE_DIGEST = 'D3M_IMAGE_DIGEST' + +# Limits on CPU and memory compute resources available to the runtime +# can be communicated also through environment variables because it is +# not always easy to determine them from inside limited environment +# that not all resources visible are also available. +# Should be in Kubernetes units or equivalent. +# See: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu +# https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-memory +D3M_CPU = 'D3MCPU' +D3M_RAM = 'D3MRAM' + +# Used by pipeline resolver to configure where to search for files with pipelines. +PIPELINES_PATH = 'PIPELINES_PATH' diff --git a/d3m/d3m/exceptions.py b/d3m/d3m/exceptions.py new file mode 100644 index 0000000..65bd006 --- /dev/null +++ b/d3m/d3m/exceptions.py @@ -0,0 +1,187 @@ + +class NotSupportedError(RuntimeError): + """ + Functionality is not supported. + """ + + +class NotSupportedVersionError(RuntimeError): + """ + This version is not supported. + """ + + +class InvalidArgumentValueError(ValueError): + """ + Provided argument to the function is invalid in value. + """ + + +class InvalidReturnValueError(ValueError): + """ + Returned value from the function is invalid. + """ + + +class InvalidArgumentTypeError(TypeError): + """ + Provided argument to the function is invalid in type. + """ + + +class InvalidReturnTypeError(TypeError): + """ + Type of the returned value from the function is invalid. + """ + + +class NotFoundError(ValueError): + """ + Something requested could not be found. + """ + + +class AlreadyExistsError(ValueError): + """ + Something which should not exist already exists. + """ + + +class MismatchError(ValueError): + """ + A value does not match expected value. + """ + + +class MissingValueError(ValueError): + """ + The required value has not been provided. + """ + + +class DigestMismatchError(MismatchError): + """ + A digest does not match the expect digest. + """ + + +class DimensionalityMismatchError(MismatchError): + """ + Dimensionality mismatch occurs in array computations. + """ + + +class UnexpectedValueError(ValueError): + """ + Value occurred not in a fixed list of possible or supported values, + e.g., during parsing of data with expected schema. + """ + + +class UnexpectedTypeError(TypeError): + """ + Type occurred not in a fixed list of possible or supported types, + e.g., during parsing of data with expected schema. + """ + + +class DatasetUriNotSupportedError(NotSupportedError): + """ + Provided dataset URI is not supported. + """ + + +class ProblemUriNotSupportedError(NotSupportedError): + """ + Provided problem URI is not supported. + """ + + +class DatasetNotFoundError(FileNotFoundError, NotFoundError): + """ + Provided dataset URI cannot be resolved to a dataset. + """ + + +class ProblemNotFoundError(FileNotFoundError, NotFoundError): + """ + Provided problem URI cannot be resolved to a problem. + """ + + +class InvalidStateError(AssertionError): + """ + Program ended up in an invalid or unexpected state, or a state does not match the current code path. + """ + + +class InvalidMetadataError(ValueError): + """ + Metadata is invalid. + """ + + +class InvalidPrimitiveCodeError(ValueError): + """ + Primitive does not match standard API. + """ + + +class ColumnNameError(KeyError): + """ + Table column with name not found. + """ + + +class InvalidPipelineError(ValueError): + """ + Pipeline is invalid. + """ + + +class InvalidPipelineRunError(ValueError): + """ + Pipeline run is invalid. + """ + + +class InvalidProblemError(ValueError): + """ + Problem description is invalid. + """ + + +class InvalidDatasetError(ValueError): + """ + Dataset is invalid. + """ + + +class PrimitiveNotFittedError(InvalidStateError): + """ + The primitive has not been fitted. + """ + + +class PermissionDeniedError(RuntimeError): + """ + No permissions to do or access something. + """ + + +class StepFailedError(RuntimeError): + """ + Running a pipeline step failed. + """ + + +class SamplingError(ArithmeticError): + """ + Error during sampling. + """ + + +class SamplingNotPossibleError(SamplingError): + """ + Sampling is not possible. + """ diff --git a/d3m/d3m/index.py b/d3m/d3m/index.py new file mode 100644 index 0000000..8f948fa --- /dev/null +++ b/d3m/d3m/index.py @@ -0,0 +1,538 @@ +import argparse +import contextlib +import json +import hashlib +import importlib +import importlib.abc +import importlib.machinery +import inspect +import logging +import os.path +import pprint +import subprocess +import shutil +import sys +import time +import traceback +import typing +from xmlrpc import client as xmlrpc # type: ignore + +import frozendict # type: ignore +import pycurl # type: ignore + +from d3m import exceptions, namespace, utils +from d3m.primitive_interfaces import base + +__all__ = ('search', 'get_primitive', 'get_primitive_by_id', 'get_loaded_primitives', 'load_all', 'register_primitive', 'discover') + +logger = logging.getLogger(__name__) + +DEFAULT_INDEX = 'https://pypi.org/pypi' +DEFAULT_OUTPUT = '.' + + +class _SENTINEL_TYPE: + __slots__ = () + + def __repr__(self) -> str: + return '_SENTINEL' + + +_SENTINEL = _SENTINEL_TYPE() + +_loaded_primitives: typing.Set[typing.Type[base.PrimitiveBase]] = set() + + +def search(*, primitive_path_prefix: str = None) -> typing.Sequence[str]: + """ + Returns a list of primitive paths (Python paths under ``d3m.primitives`` namespace) + for all known (discoverable through entry points) primitives, or limited by the + ``primitive_path_prefix`` search argument. + + Not all returned primitive paths are not necessary loadable and it is not necessary that + they are all really pointing to primitive classes, because this method does not try to + load them yet to determine any of that. + + Parameters + ---------- + primitive_path_prefix: + Optionally limit returned primitive paths only to those whose path start with ``primitive_name_prefix``. + + Returns + ------- + A list of primitive paths. + """ + + if primitive_path_prefix is None: + primitive_path_prefix = '' + + results = [] + + for entry_point in namespace.entry_points(): + primitive_path = 'd3m.primitives.{entry_point_name}'.format( + entry_point_name=entry_point.name, + ) + + if primitive_path.startswith(primitive_path_prefix): + results.append(primitive_path) + + # We also go over all loaded primitives to also search over any primitives directly + # registered using "register_primitive" and not through an entry point. + for primitive in get_loaded_primitives(): + primitive_path = primitive.metadata.query()['python_path'] + + if primitive_path in results: + continue + + if primitive_path.startswith(primitive_path_prefix): + results.append(primitive_path) + + return sorted(results) + + +def get_primitive(primitive_path: str) -> typing.Type[base.PrimitiveBase]: + """ + Loads (if not already) a primitive class and returns it. + + Parameters + ---------- + primitive_path: + A Python path under ``d3m.primitives`` namespace of a primitive. + + Returns + ------- + A primitive class. + """ + + if not primitive_path: + raise exceptions.InvalidArgumentValueError("Primitive path is required.") + + if not primitive_path.startswith('d3m.primitives.'): + raise exceptions.InvalidArgumentValueError("Primitive path does not start with \"d3m.primitives\".") + + path, name = primitive_path.rsplit('.', 1) + + module = importlib.import_module(path) + + return getattr(module, name) + + +def get_primitive_by_id(primitive_id: str) -> typing.Type[base.PrimitiveBase]: + """ + Returns a primitive class based on its ID from all currently loaded primitives. + + Parameters + ---------- + primitive_id: + An ID of a primitive. + + Returns + ------- + A primitive class. + """ + + for primitive in get_loaded_primitives(): + if primitive.metadata.query()['id'] == primitive_id: + return primitive + + raise exceptions.InvalidArgumentValueError("Unable to get primitive '{primitive_id}'.".format(primitive_id=primitive_id)) + + +def get_loaded_primitives() -> typing.Sequence[typing.Type[base.PrimitiveBase]]: + """ + Returns a list of all currently loaded primitives. + + Returns + ------- + A list of all currently loaded primitives. + """ + + return list(_loaded_primitives) + + +def load_all(blocklist: typing.Collection[str] = None) -> None: + """ + Loads all primitives available and populates ``d3m.primitives`` namespace with them. + + If a primitive cannot be loaded, an error is logged, but loading of other primitives + continue. + + Parameters + ---------- + blocklist: + A collection of primitive path prefixes to not (try to) load. + """ + + if blocklist is None: + blocklist = [] + + for primitive_path in search(): + if any(primitive_path.startswith(blocklist_prefix) for blocklist_prefix in blocklist): + continue + + try: + get_primitive(primitive_path) + except Exception: + logger.exception("Could not load the primitive: %(primitive_path)s", {'primitive_path': primitive_path}) + + +# TODO: "primitive_path" is not really necessary because it could just be extracted from primitive's metadata. +# We do not allow them to be different anyway. +def register_primitive(primitive_path: str, primitive: typing.Type[base.PrimitiveBase]) -> None: + """ + Registers a primitive under ``d3m.primitives`` namespace. + + This is useful to register primitives not necessary installed on the system + or which are generated at runtime. It is also useful for testing purposes. + + ``primitive_path`` has to start with ``d3m.primitives``. + + Parameters + ---------- + primitive_path: + A primitive path to register a primitive under. + primitive: + A primitive class to register. + """ + + if not primitive_path: + raise exceptions.InvalidArgumentValueError("Path under which to register a primitive is required.") + + if not primitive_path.startswith('d3m.primitives.'): + raise exceptions.InvalidArgumentValueError("Path under which to register a primitive does not start with \"d3m.primitives\".") + + if not inspect.isclass(primitive): + raise exceptions.InvalidArgumentTypeError("Primitive to register has to be a class.") + + if not issubclass(primitive, base.PrimitiveBase): + raise exceptions.InvalidArgumentTypeError("Primitive to register is not a subclass of PrimitiveBase.") + + if primitive.metadata.query()['python_path'] != primitive_path: + raise exceptions.InvalidArgumentValueError("Primitive's \"python_path\" in metadata does not match the path under which to register it: {python_path} vs. {primitive_path}".format( + python_path=primitive.metadata.query()['python_path'], + primitive_path=primitive_path, + )) + + modules_path, name = primitive_path.rsplit('.', 1) + # We remove "d3m.primitives" from the list of modules. + modules = modules_path.split('.')[2:] + + if 'd3m.primitives' not in sys.modules: + import d3m.primitives # type: ignore + + # Create any modules which do not yet exist. + current_path = 'd3m.primitives' + for module_name in modules: + module_path = current_path + '.' + module_name + + if module_path not in sys.modules: + try: + importlib.import_module(module_path) + except ModuleNotFoundError: + # This can happen if this module is not listed in any of entry points. But we want to allow + # registering primitives also outside of existing entry points, so we create a module here. + + # Because we just could not load the module, we know that if the attribute exists, + # it has to be something else, which we do not want to clobber. + if hasattr(sys.modules[current_path], module_name): + raise ValueError("'{module_path}' is already defined.".format(module_path)) + + module_spec = importlib.machinery.ModuleSpec(module_path, namespace.Loader(), is_package=True) + module = importlib.util.module_from_spec(module_spec) + module_spec.loader.exec_module(module) + + sys.modules[module_path] = module + setattr(sys.modules[current_path], module_name, module) + + current_path = module_path + + if hasattr(sys.modules[current_path], name): + existing_value = getattr(sys.modules[current_path], name) + # Registering twice the same primitive is a noop. + if existing_value is primitive: + return + + # Maybe we are just registering this primitive. But if not... + if existing_value is not _SENTINEL: + raise ValueError("'{module}.{name}' is already defined as '{existing_value}'.".format(module=current_path, name=name, existing_value=existing_value)) + + setattr(sys.modules[current_path], name, primitive) + _loaded_primitives.add(primitive) + + +def discover(index: str = 'https://pypi.org/pypi') -> typing.Tuple[str, ...]: + """ + Returns package names from PyPi which provide D3M primitives. + + This is determined by them having a ``d3m_primitive`` among package keywords. + + Parameters + ---------- + index: + Base URL of Python Package Index to use. + + Returns + ------- + A list of package names. + """ + + client = xmlrpc.ServerProxy(index) + hits = client.search({'keywords': 'd3m_primitive'}) + return tuple(sorted({package['name'] for package in hits})) + + +def download_files(primitive_metadata: frozendict.FrozenOrderedDict, output: str, redownload: bool) -> None: + last_progress_call = None + + def curl_progress(download_total: int, downloaded: int, upload_total: int, uploaded: int) -> None: + nonlocal last_progress_call + + # Output at most once every 10 seconds. + now = time.time() + if last_progress_call is None or now - last_progress_call > 10: + last_progress_call = now + + print("Downloaded {downloaded}/{download_total} B".format( + downloaded=downloaded, + download_total=download_total, + ), flush=True) + + for installation_entry in primitive_metadata.get('installation', []): + if installation_entry['type'] not in ['FILE', 'TGZ']: + continue + + # We store into files based on digest. In this way we deduplicate same + # files used by multiple primitives. + output_path = os.path.join(output, installation_entry['file_digest']) + + if installation_entry['type'] == 'FILE': + if os.path.isfile(output_path) and not redownload: + print("File for volume {type}/{key} for primitive {python_path} ({primitive_id}) already exists, skipping: {file_uri}".format( + python_path=primitive_metadata['python_path'], + primitive_id=primitive_metadata['id'], + type=installation_entry['type'], + key=installation_entry['key'], + file_uri=installation_entry['file_uri'], + ), flush=True) + continue + elif installation_entry['type'] == 'TGZ': + if os.path.isdir(output_path) and not redownload: + print("Directory for volume {type}/{key} for primitive {python_path} ({primitive_id}) already exists, skipping: {file_uri}".format( + python_path=primitive_metadata['python_path'], + primitive_id=primitive_metadata['id'], + type=installation_entry['type'], + key=installation_entry['key'], + file_uri=installation_entry['file_uri'], + ), flush=True) + continue + + # Cleanup. + if os.path.isdir(output_path): + shutil.rmtree(output_path) + elif os.path.exists(output_path): + os.remove(output_path) + + print("Downloading file for volume {type}/{key} for primitive {python_path} ({primitive_id}): {file_uri}".format( + python_path=primitive_metadata['python_path'], + primitive_id=primitive_metadata['id'], + type=installation_entry['type'], + key=installation_entry['key'], + file_uri=installation_entry['file_uri'], + ), flush=True) + + output_file_obj: typing.BinaryIO = None + output_tar_process = None + + try: + if installation_entry['type'] == 'FILE': + output_file_obj = open(output_path, 'wb') + elif installation_entry['type'] == 'TGZ': + os.makedirs(output_path, mode=0o755, exist_ok=True) + output_tar_process = subprocess.Popen(['tar', '-xz', '-C', output_path], stdin=subprocess.PIPE) + output_file_obj = typing.cast(typing.BinaryIO, output_tar_process.stdin) + + hash = hashlib.sha256() + downloaded = 0 + start = time.time() + + def write(data: bytes) -> None: + nonlocal hash + nonlocal downloaded + + hash.update(data) + downloaded += len(data) + + output_file_obj.write(data) + + while True: + try: + with contextlib.closing(pycurl.Curl()) as curl: + curl.setopt(curl.URL, installation_entry['file_uri']) + curl.setopt(curl.WRITEFUNCTION, write) + curl.setopt(curl.NOPROGRESS, False) + curl.setopt(curl.FOLLOWLOCATION, True) + curl.setopt(getattr(curl, 'XFERINFOFUNCTION', curl.PROGRESSFUNCTION), curl_progress) + curl.setopt(curl.LOW_SPEED_LIMIT, 30 * 1024) + curl.setopt(curl.LOW_SPEED_TIME, 30) + curl.setopt(curl.RESUME_FROM, downloaded) + + curl.perform() + break + + except pycurl.error as error: + if error.args[0] == pycurl.E_OPERATION_TIMEDOUT: + # If timeout, retry/resume. + print("Timeout. Retrying.", flush=True) + else: + raise + + end = time.time() + + print("Downloaded {downloaded} B in {seconds} second(s).".format( + downloaded=downloaded, + seconds=end - start, + ), flush=True) + + if output_tar_process is not None: + # Close the input to the process to signal that we are done. + output_file_obj.close() + output_file_obj = None + + # Wait for 60 seconds to finish writing everything out. + if output_tar_process.wait(60) != 0: + raise subprocess.CalledProcessError(output_tar_process.returncode, output_tar_process.args) + output_tar_process = None + + if installation_entry['file_digest'] != hash.hexdigest(): + raise ValueError("Digest for downloaded file does not match one from metadata. Metadata digest: {metadata_digest}. Computed digest: {computed_digest}.".format( + metadata_digest=installation_entry['file_digest'], + computed_digest=hash.hexdigest(), + )) + + except Exception: + # Cleanup. + if output_tar_process is not None: + try: + output_tar_process.kill() + output_tar_process.wait() + output_file_obj = None + except Exception: + # We ignore errors cleaning up. + pass + if os.path.isdir(output_path): + shutil.rmtree(output_path) + elif os.path.exists(output_path): + os.remove(output_path) + + raise + + finally: + if output_file_obj is not None: + output_file_obj.close() + + +# TODO: Add more ways to search for primitives (by name, keywords, etc.). +# TODO: Allow displaying results with more than just a primitive path. +def search_handler(arguments: argparse.Namespace) -> None: + for primitive_path in search(primitive_path_prefix=getattr(arguments, 'prefix', None)): + print(primitive_path) + + +def discover_handler(arguments: argparse.Namespace) -> None: + for package_name in discover(index=getattr(arguments, 'index', DEFAULT_INDEX)): + print(package_name) + + +def describe_handler(arguments: argparse.Namespace) -> None: + output_stream = getattr(arguments, 'output', sys.stdout) + + has_errored = False + + for primitive_path in arguments.primitives: + if getattr(arguments, 'list', False): + print(primitive_path, file=output_stream) + + try: + try: + primitive = get_primitive(primitive_path) + except Exception: + primitive = None + + if primitive is None: + load_all() + primitive = get_primitive_by_id(primitive_path) + except Exception as error: + if getattr(arguments, 'continue', False): + traceback.print_exc(file=output_stream) + print(f"Error loading primitive: {primitive_path}", file=output_stream) + has_errored = True + continue + else: + raise Exception(f"Error loading primitive: {primitive_path}") from error + + try: + # Using "to_json_structure" and not "to_internal_json_structure" because + # it is not indented that this would be parsed back directly, but just used + # to know where to find the primitive (using "installation" section). + primitive_description = primitive.metadata.to_json_structure() + + if getattr(arguments, 'print', False): + pprint.pprint(primitive_description, stream=output_stream) + + else: + json.dump( + primitive_description, + output_stream, + indent=(getattr(arguments, 'indent', 2) or None), + sort_keys=getattr(arguments, 'sort_keys', False), + allow_nan=False, + ) # type: ignore + output_stream.write('\n') + except Exception as error: + if getattr(arguments, 'continue', False): + traceback.print_exc(file=output_stream) + print(f"Error describing primitive: {primitive_path}", file=output_stream) + has_errored = True + continue + else: + raise Exception(f"Error describing primitive: {primitive_path}") from error + + if has_errored: + sys.exit(1) + + +def download_handler(arguments: argparse.Namespace) -> None: + for primitive_path in search(primitive_path_prefix=getattr(arguments, 'prefix', None)): + try: + primitive_class = get_primitive(primitive_path) + except Exception: + logger.exception("Could not load the primitive: %(primitive_path)s", {'primitive_path': primitive_path}) + continue + + try: + download_files(primitive_class.metadata.query(), getattr(arguments, 'output', DEFAULT_OUTPUT), getattr(arguments, 'redownload', False)) + except Exception: + logger.exception("Error downloading files for: %(primitive_path)s", {'primitive_path': primitive_path}) + + +def main(argv: typing.Sequence) -> None: + # We have to disable importing while type checking because it makes + # an import cycle in mypy which makes many typing errors. + if not typing.TYPE_CHECKING: + # Importing here to prevent import cycle. + from d3m import cli + + logging.basicConfig() + + logger.warning("This CLI is deprecated. Use \"python3 -m d3m index\" instead.") + + parser = argparse.ArgumentParser(description="Explore D3M primitives.") + cli.primitive_configure_parser(parser) + + arguments = parser.parse_args(argv[1:]) + + cli.primitive_handler(arguments, parser) + + +if __name__ == '__main__': + main(sys.argv) diff --git a/d3m/d3m/metadata/__init__.py b/d3m/d3m/metadata/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/d3m/d3m/metadata/base.py b/d3m/d3m/metadata/base.py new file mode 100644 index 0000000..3861f37 --- /dev/null +++ b/d3m/d3m/metadata/base.py @@ -0,0 +1,4034 @@ +import collections +import copy +import datetime +import functools +import json +import logging +import inspect +import itertools +import operator +import os.path +import pickle +import re +import sys +import types +import typing +from urllib import parse as url_parse + +import frozendict # type: ignore +import jsonschema # type: ignore +import numpy # type: ignore +import pandas # type: ignore +from pytypes import type_util # type: ignore + +import d3m +from . import hyperparams as hyperparams_module, primitive_names +from d3m import deprecate, exceptions, utils + +# See: https://gitlab.com/datadrivendiscovery/d3m/issues/66 +try: + from pyarrow import lib as pyarrow_lib # type: ignore +except ModuleNotFoundError: + pyarrow_lib = None + +__all__ = ( + 'ALL_ELEMENTS', 'NO_VALUE', 'DataMetadata', 'PrimitiveMetadata', 'CONTAINER_SCHEMA_VERSION', + 'DATA_SCHEMA_VERSION', 'PRIMITIVE_SCHEMA_VERSION', 'PrimitiveMethodKind', + 'PrimitiveArgumentKind', 'PrimitiveInstallationType', 'PrimitiveAlgorithmType', + 'PrimitiveFamily', 'PrimitivePrecondition', 'PrimitiveEffect', 'ForeignKeyType', 'Context', + 'PipelineRunPhase', 'PipelineStepType', 'PipelineRunStatusState', 'ArgumentType', +) + +logger = logging.getLogger(__name__) + + +def _return_all_elements() -> 'ALL_ELEMENTS_TYPE': + return ALL_ELEMENTS + + +@functools.total_ordering +class ALL_ELEMENTS_TYPE: + __slots__ = () + + def __repr__(self) -> str: + return '__ALL_ELEMENTS__' + + def __lt__(self, other: typing.Any) -> bool: + # "ALL_ELEMENTS" is smaller than anything else, and equal to itself. + # "ALL_ELEMENTS" is a singleton, so is equal only if referentially equal + # (which is a default implementation of "__eq__"). + return self != other + + def __deepcopy__(self, memo: typing.Dict) -> 'ALL_ELEMENTS_TYPE': + return ALL_ELEMENTS + + def __copy__(self) -> 'ALL_ELEMENTS_TYPE': + return ALL_ELEMENTS + + def __reduce__(self) -> typing.Tuple[typing.Callable, typing.Tuple]: + return _return_all_elements, () + + +def _return_no_value() -> 'NO_VALUE_TYPE': + return NO_VALUE + + +class NO_VALUE_TYPE: + __slots__ = () + + def __repr__(self) -> str: + return '__NO_VALUE__' + + def __deepcopy__(self, memo: typing.Dict) -> 'NO_VALUE_TYPE': + return NO_VALUE + + def __copy__(self) -> 'NO_VALUE_TYPE': + return NO_VALUE + + def __reduce__(self) -> typing.Tuple[typing.Callable, typing.Tuple]: + return _return_no_value, () + + +ALL_ELEMENTS = ALL_ELEMENTS_TYPE() +NO_VALUE = NO_VALUE_TYPE() + +COMMIT_HASH_REGEX = re.compile(r'^[0-9a-f]{40}$') + +ARGUMENT_NAME_REGEX = re.compile(r'^[A-Za-z][A-Za-z_0-9]*$') + +CONTAINER_SCHEMA_VERSION = 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json' +DATA_SCHEMA_VERSION = 'https://metadata.datadrivendiscovery.org/schemas/v0/data.json' +PRIMITIVE_SCHEMA_VERSION = 'https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json' + +SCHEMAS_PATH = os.path.join(os.path.dirname(__file__), 'schemas', 'v0') + +# A map of all known schemas from their URIs to loaded JSONs. Not validated. +SCHEMAS = {} +for schema_uri in [ + CONTAINER_SCHEMA_VERSION, + DATA_SCHEMA_VERSION, + 'https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json', + 'https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json', + 'https://metadata.datadrivendiscovery.org/schemas/v0/pipeline_run.json', + PRIMITIVE_SCHEMA_VERSION, + 'https://metadata.datadrivendiscovery.org/schemas/v0/problem.json', +]: + schema_filename = os.path.basename(schema_uri) + with open(os.path.join(SCHEMAS_PATH, schema_filename), 'r', encoding='utf8') as schema_file: + SCHEMAS[schema_uri] = json.load(schema_file) + +# We validate schemas using unmodified validator. +for schema_json in SCHEMAS.values(): + jsonschema.Draft7Validator.check_schema(schema_json) + +DEFINITIONS_JSON = SCHEMAS['https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json'] + +CONTAINER_SCHEMA_VALIDATOR, DATA_SCHEMA_VALIDATOR, PRIMITIVE_SCHEMA_VALIDATOR = utils.load_schema_validators(SCHEMAS, ('container.json', 'data.json', 'primitive.json')) + +HYPERPARAMETER_REQUIRED_SEMANTIC_TYPES = { + 'https://metadata.datadrivendiscovery.org/types/TuningParameter', + 'https://metadata.datadrivendiscovery.org/types/ControlParameter', + 'https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter', + 'https://metadata.datadrivendiscovery.org/types/MetafeatureParameter', +} + +TABULAR_SEMANTIC_TYPES = { + 'https://metadata.datadrivendiscovery.org/types/Table', + 'https://metadata.datadrivendiscovery.org/types/TabularRow', + 'https://metadata.datadrivendiscovery.org/types/TabularColumn', +} + +ALL_SEMANTIC_TYPES = set(utils._get_names(DEFINITIONS_JSON, 'definitions.semantic_types.items.anyOf[*].enum[*]')) + +# A list of all fields which is being generated by "_generate_metadata" method. +ALL_GENERATED_FIELDS = [ + 'schema', + 'structural_type', + 'semantic_types', + 'dimension', + 'name', +] + +PrimitiveMethodKind = utils.create_enum_from_json_schema_enum( + 'PrimitiveMethodKind', DEFINITIONS_JSON, + 'definitions.primitive_code.properties.instance_methods.additionalProperties.properties.kind.oneOf[*].enum[*]', + module=__name__, +) +PrimitiveArgumentKind = utils.create_enum_from_json_schema_enum( + 'PrimitiveArgumentKind', DEFINITIONS_JSON, + 'definitions.primitive_code.properties.arguments.additionalProperties.properties.kind.oneOf[*].enum[*]', + module=__name__, +) +PrimitiveInstallationType = utils.create_enum_from_json_schema_enum( + 'PrimitiveInstallationType', DEFINITIONS_JSON, + [ + 'definitions.installation.items.oneOf[*].properties.type.enum[*]', + 'definitions.installation.items.oneOf[*].allOf[*].properties.type.enum[*]' + ], + module=__name__, +) +PrimitiveAlgorithmType = utils.create_enum_from_json_schema_enum( + 'PrimitiveAlgorithmType', DEFINITIONS_JSON, + 'definitions.algorithm_types.items.oneOf[*].enum[*]', + module=__name__, +) +PrimitiveFamily = utils.create_enum_from_json_schema_enum( + 'PrimitiveFamily', DEFINITIONS_JSON, + 'definitions.primitive_family.oneOf[*].enum[*]', + module=__name__, +) +PrimitivePrecondition = utils.create_enum_from_json_schema_enum( + 'PrimitivePrecondition', DEFINITIONS_JSON, + 'definitions.preconditions.items.oneOf[*].enum[*]', + module=__name__, +) +PrimitiveEffect = utils.create_enum_from_json_schema_enum( + 'PrimitiveEffect', DEFINITIONS_JSON, + 'definitions.effects.items.oneOf[*].enum[*]', + module=__name__, +) +ForeignKeyType = utils.create_enum_from_json_schema_enum( + 'ForeignKeyType', DEFINITIONS_JSON, + 'definitions.foreign_key.oneOf[*].properties.type.enum[*]', + module=__name__, +) +Context = utils.create_enum_from_json_schema_enum( + 'Context', DEFINITIONS_JSON, + 'definitions.context.oneOf[*].enum[*]', + module=__name__, +) +PipelineRunPhase = utils.create_enum_from_json_schema_enum( + 'PipelineRunPhase', DEFINITIONS_JSON, + 'definitions.pipeline_run.properties.phase.anyOf[*].enum[*]', + module=__name__, +) +PipelineStepType = utils.create_enum_from_json_schema_enum( + 'PipelineStepType', DEFINITIONS_JSON, + 'definitions.pipeline_steps.items.oneOf[*].properties.type.enum[*]', + module=__name__, +) +PipelineRunStatusState = utils.create_enum_from_json_schema_enum( + 'StatusState', DEFINITIONS_JSON, + 'definitions.status.properties.state.enum[*]', + module=__name__, +) +# Enumeration of argument and hyper-parameter types to a primitive in a step. +ArgumentType = utils.create_enum_from_json_schema_enum( + 'ArgumentType', DEFINITIONS_JSON, + 'definitions[container_argument,container_arguments,primitive_argument,primitive_arguments,data_argument,data_arguments,value_argument].properties.type.enum[*]', + module=__name__, +) + +M = typing.TypeVar('M', bound='MetadataEntry') +T = typing.TypeVar('T', bound='Metadata') +D = typing.TypeVar('D', bound='DataMetadata') +P = typing.TypeVar('P', bound='PrimitiveMetadata') +SimpleSelectorSegment = typing.Union[int, str] +SelectorSegment = typing.Union[SimpleSelectorSegment, ALL_ELEMENTS_TYPE] +ListSelector = typing.List[SelectorSegment] +TupleSelector = typing.Tuple[SelectorSegment, ...] +# A list or tuple of integers, strings, or ALL_ELEMENTS. +Selector = typing.Union[ListSelector, TupleSelector] + +# We register additional immutable values. We are doing it this way to overcome issues with import cycles. +if ALL_ELEMENTS not in utils.additional_immutable_values: + utils.additional_immutable_values += (ALL_ELEMENTS,) +if NO_VALUE not in utils.additional_immutable_values: + utils.additional_immutable_values += (NO_VALUE,) + + +class ColumnReference(typing.NamedTuple): + resource_id: str + column_index: int + + +class MetadataEntry: + __slots__ = ('elements', 'all_elements', 'metadata', 'is_empty', 'is_elements_empty') + + def __init__( + self, elements: utils.PMap = utils.EMPTY_PMAP, all_elements: 'MetadataEntry' = None, + metadata: frozendict.FrozenOrderedDict = frozendict.FrozenOrderedDict(), is_empty: bool = True, + is_elements_empty: bool = True, + ) -> None: + self.elements = elements + self.all_elements = all_elements + self.metadata = metadata + self.is_empty = is_empty + self.is_elements_empty = is_elements_empty + + def copy(self: M) -> M: + return type(self)(self.elements, self.all_elements, self.metadata, self.is_empty, self.is_elements_empty) + + def __copy__(self: M) -> M: + return self.copy() + + def update_is_empty(self) -> None: + self.is_empty = not self.metadata and self.is_elements_empty and self.all_elements is None + + +class Metadata: + """ + A basic class to be used as a value for `metadata` attribute + on values passed between primitives. + + Instances are immutable. + + Parameters + ---------- + metadata: + Optional initial metadata for the top-level of the value. + source: + DEPRECATED: argument ignored. + timestamp: + DEPRECATED: argument ignored. + """ + + @deprecate.arguments('source', 'timestamp', message="argument ignored") + def __init__(self, metadata: typing.Dict[str, typing.Any] = None, *, source: typing.Any = None, timestamp: datetime.datetime = None) -> None: + self._current_metadata = MetadataEntry() + + self._hash: int = None + + if metadata is not None: + self._update_in_place((), metadata, self._current_metadata) + + @deprecate.arguments('source', 'timestamp', message="argument ignored") + def update(self: T, selector: Selector, metadata: typing.Dict[str, typing.Any], *, source: typing.Any = None, timestamp: datetime.datetime = None) -> T: + """ + Updates metadata with new ``metadata`` for data pointed to with ``selector``. + + If value of any field is ``NO_VALUE``, that field is deleted. + + It returns a copy of this metadata object with new metadata applied. + + Parameters + ---------- + selector: + A selector pointing to data. + metadata: + A map of fields and values with metadata. + source: + DEPRECATED: argument ignored. + timestamp: + DEPRECATED: argument ignored. + + Returns + ------- + Updated metadata. + """ + + cls = type(self) + + new_metadata = cls() + + new_metadata._update_in_place(selector, metadata, self._current_metadata) + + return new_metadata + + @deprecate.arguments('source', 'timestamp', message="argument ignored") + def remove(self: T, selector: Selector, *, recursive: bool = False, strict_all_elements: bool = False, + source: typing.Any = None, timestamp: datetime.datetime = None) -> T: + """ + Removes all metadata at ``selector``. + + Parameters + ---------- + selector: + A selector to remove metadata at. + recursive: + Should remove also all metadata under the ``selector``? + strict_all_elements: + If ``True``, then when removing ``ALL_ELEMENTS`` entry, do not remove also metadata for all elements it matches. + source: + DEPRECATED: argument ignored. + timestamp: + DEPRECATED: argument ignored. + + Returns + ------- + Updated metadata. + """ + + cls = type(self) + + new_metadata = cls() + + new_metadata._remove_in_place(selector, recursive, strict_all_elements, self._current_metadata) + + return new_metadata + + @deprecate.function(message="create a DataMetadata instance explicitly instead") + @deprecate.arguments('source', 'timestamp', message="argument ignored") + def clear(self: T, metadata: typing.Dict[str, typing.Any] = None, *, source: typing.Any = None, timestamp: datetime.datetime = None) -> T: + """ + DEPRECATED: create a Metadata instance explicitly instead. + + Creates and returns a new (clear) metadata object. + + Parameters + ---------- + metadata: + Optional new initial metadata for the top-level of the value. + source: + DEPRECATED: argument ignored. + timestamp: + DEPRECATED: argument ignored. + + Returns + ------- + New metadata object. + """ + + return type(self)(metadata) + + def _update_in_place(self, selector: Selector, metadata: typing.Dict[str, typing.Any], + parent_current_metadata: MetadataEntry) -> None: + """ + This method exist only for internal purposes and you should never ever call this to update metadata from outside. + """ + + self.check_selector(selector) + + # If metadata is already an instance of frozen dict, we just check that it is immutable. + if isinstance(metadata, frozendict.FrozenOrderedDict): + utils.check_immutable(metadata) + else: + metadata = utils.make_immutable_copy(metadata) + + if not isinstance(metadata, frozendict.FrozenOrderedDict): + raise exceptions.InvalidArgumentTypeError("Metadata should be a dict.") + + self._current_metadata = self._update(selector, parent_current_metadata, metadata) + + def _remove_in_place(self, selector: Selector, recursive: bool, strict_all_elements: bool, + parent_current_metadata: MetadataEntry) -> None: + """ + This method exist only for internal purposes and you should never ever call this to remove metadata from outside. + """ + + self.check_selector(selector) + + self._current_metadata = self._remove(selector, recursive, strict_all_elements, parent_current_metadata) + + # TODO: Allow querying only a subset of metadata (not the whole dict). + # TODO: Maybe cache results? LRU? + def query(self, selector: Selector, *, ignore_all_elements: bool = False, remove_no_value: bool = True) -> frozendict.FrozenOrderedDict: + """ + Returns metadata for data pointed to with ``selector``. + + When querying using ``ALL_ELEMENTS`` means only metadata which has been set using ALL_ELEMENTS + is returned. + + Parameters + ---------- + selector: + A selector to query metadata for. + ignore_all_elements: + By default, metadata from ALL_ELEMENTS is merged with metadata for an element itself. + By setting this argument to ``True``, this is disabled and just metadata from an element is returned. + remove_no_value: + By default all ``NO_VALUE`` values are removed. If set to ``False``, they are not removed. + + Returns + ------- + Metadata at a given selector. + """ + + self.check_selector(selector) + + metadata = self._query(selector, self._current_metadata, 0 if ignore_all_elements else None) + + if remove_no_value: + return self._remove_no_value(metadata) + else: + return metadata + + def query_with_exceptions(self, selector: Selector, *, remove_no_value: bool = True) -> typing.Tuple[frozendict.FrozenOrderedDict, typing.Dict[TupleSelector, frozendict.FrozenOrderedDict]]: + """ + In addition to returning metadata for data pointed to with ``selector``, this method for every ``ALL_ELEMENTS`` + selector segment also returns a map between selectors and metadata for all elements which have metadata + which differs from that of ``ALL_ELEMENTS``. + + Parameters + ---------- + selector: + A selector to query metadata for. + remove_no_value: + By default all ``NO_VALUE`` values are removed. If set to ``False``, they are not removed. + + Returns + ------- + A tuple of metadata at a given selector and a dict of exceptions. + """ + + self.check_selector(selector) + + metadata = self._query(selector, self._current_metadata, None) + if remove_no_value: + metadata = self._remove_no_value(metadata) + + exceptions = self._query_exceptions(selector, self._current_metadata) + + exceptions_with_selectors = {} + for exception_selector in exceptions: + exception_metadata = self._query(exception_selector, self._current_metadata, None) + if remove_no_value: + exception_metadata = self._remove_no_value(exception_metadata) + + if exception_metadata and exception_metadata != metadata: + exceptions_with_selectors[exception_selector] = exception_metadata + + return metadata, exceptions_with_selectors + + def query_field(self, selector: Selector, field: str, *, strict_all_elements: bool = True) -> typing.Any: + """ + Queries metadata for data pointed to with ``selector`` and returns only the + ``field`` of that metadata. Raises `KeyError` exception if metadata or field + is not set. + + ``field`` represents only top-level fields in metadata. + + Parameters + ---------- + selector: + A selector to query metadata for. + field: + A field name to query. + strict_all_elements: + If set, the method does not just return ``field`` value of the metadata + under ``selector``, but checks that the value really holds for all + elements matching the ``selector``, without exception. This is helpful + also if metadata is not compacted and ``field`` value is the same + across all elements, but ``ALL_ELEMENTS`` metadata does not contain + that field. + + Returns + ------- + A value of ``field`` of metadata at ``selector``. + """ + + if not strict_all_elements: + return self.query(selector)[field] + + metadata, exceptions_with_selectors = self.query_with_exceptions(selector) + + # We have a candidate which potentially holds for all elements. + if field in metadata: + value = metadata[field] + + for exception_metadata in exceptions_with_selectors.values(): + # Is there an exception for this field? We care only if field exists, + # then it has to match in the value. But if field does not exist, + # value from "metadata" will be used anyway, so that is OK. + if field in exception_metadata and exception_metadata[field] != value: + raise KeyError("Field '{field}' is not the same across all elements.".format(field=field)) + + return value + + # If selector is without "ALL_ELEMENTS" then field is simply not set. + if ALL_ELEMENTS not in selector: + assert not exceptions_with_selectors + raise KeyError("Field '{field}' is not set.".format(field=field)) + + # Field might be set on all elements, but metadata is no compacted, + # check if field is the same across all metadata exceptions. + # TODO: Check that metadata exceptions cover whole dimension. + # When field is not set for ALL_ELEMENTS, we have to traverse all potential elements, + # not just those which have metadata set, but any which could have it set. We can do + # that if dimension length is set, we can enumerate all elements and check that they + # contain equal field value. But easier it is to just check that dimension length + # matches the number of metadata exceptions. Then we know we have checked all elements + # which can exist on data. And if any element is missing (does not have metadata set), + # it does not have field set anyway, which means it does not match field value of other + # elements. This dimension length comparison can work even in the case when dimension + # is not enumerable (e.g., a dict). Checking dimension lengths becomes tricky when + # multiple ALL_ELEMENTS are present in the selector though, and especially if data + # is jagged (does not have same size sub-dimensions for all elements). An issue is + # also that dimensions defined for DataMetadata and not Metadata. + + # Can raise KeyError. + first_exception_selector, first_exception_metadata = exceptions_with_selectors.popitem() + + # Can raise KeyError. + value = first_exception_metadata[field] + + for exception_metadata in exceptions_with_selectors.values(): + # We require that "field" both exist in all exception metadata and has the same value + # as all other fields (which we check by checking against the first exception metadata). + if field not in exception_metadata or exception_metadata[field] != value: + raise KeyError("Field '{field}' is not the same across all elements.".format(field=field)) + + return value + + def query_field_with_exceptions(self, selector: Selector, field: str) -> typing.Tuple[typing.Any, typing.Dict[TupleSelector, typing.Any]]: + """ + In addition to returning ``field`` of metadata for data pointed to with ``selector``, + this method for every ``ALL_ELEMENTS`` selector segment also returns a map between + selectors and field values for all elements which have field which differs from that + of ``ALL_ELEMENTS``. + + If ``field`` does not exist under ``selector``, ``NO_VALUE`` is returned instead, + and all exceptions are required to contain ``field``. + + ``field`` represents only top-level fields in metadata. + + Parameters + ---------- + selector: + A selector to query metadata for. + field: + A field name to query. + + Returns + ------- + A tuple of value at a given selector and field and a dict of exceptions. + """ + + metadata, exceptions_with_selectors = self.query_with_exceptions(selector) + + if field in metadata: + # If "field" exist in "metadata", we return only those exceptions which contain "field" which + # differs from that in "metadata". Only they are real "exceptions" for this "selector" and "field". + return metadata[field], { + exception_selector: exception_metadata[field] for exception_selector, exception_metadata in exceptions_with_selectors.items() + if field in exception_metadata and exception_metadata[field] != metadata[field] + } + + # If selector is without "ALL_ELEMENTS" then field is simply not set. + if ALL_ELEMENTS not in selector: + assert not exceptions_with_selectors + raise KeyError("Field '{field}' is not set.".format(field=field)) + + field_exceptions = {} + + for exception_selector, exception_metadata in exceptions_with_selectors.items(): + if field not in exception_metadata: + raise KeyError("Field '{field}' is not set.".format(field=field)) + + field_exceptions[exception_selector] = exception_metadata[field] + + return NO_VALUE, field_exceptions + + def _query(self, selector: Selector, metadata_entry: typing.Optional[MetadataEntry], ignore_all_elements: typing.Optional[int]) -> frozendict.FrozenOrderedDict: + if metadata_entry is None: + return frozendict.FrozenOrderedDict() + if len(selector) == 0: + return metadata_entry.metadata + + segment, selector_rest = selector[0], selector[1:] + + if ignore_all_elements is not None: + new_ignore_all_elements = ignore_all_elements - 1 + else: + new_ignore_all_elements = None + + all_elements_metadata = self._query(selector_rest, metadata_entry.all_elements, new_ignore_all_elements) + if segment is ALL_ELEMENTS: + metadata = all_elements_metadata + elif segment in metadata_entry.elements: + segment = typing.cast(SimpleSelectorSegment, segment) + metadata = self._query(selector_rest, metadata_entry.elements[segment], new_ignore_all_elements) + if ignore_all_elements is None or ignore_all_elements > 0: + metadata = self._merge_metadata(all_elements_metadata, metadata) + elif ignore_all_elements is not None and ignore_all_elements <= 0: + metadata = frozendict.FrozenOrderedDict() + else: + metadata = all_elements_metadata + + return metadata + + def _query_exceptions(self, selector: Selector, metadata_entry: typing.Optional[MetadataEntry]) -> typing.Sequence[TupleSelector]: + if metadata_entry is None: + return [] + if len(selector) == 0: + return [] + + segment, selector_rest = selector[0], selector[1:] + + exceptions: typing.List[TupleSelector] = [] + if segment is ALL_ELEMENTS: + if selector_rest: + for exception_selector in self._query_exceptions(selector_rest, metadata_entry.all_elements): + exceptions.append((segment,) + exception_selector) + + for element_segment, element_metadata_entry in metadata_entry.elements.items(): + if selector_rest: + for exception_selector in self._query_exceptions(selector_rest, element_metadata_entry): + exceptions.append((typing.cast(SelectorSegment, element_segment),) + exception_selector) + else: + if element_metadata_entry.metadata: + exceptions.append((element_segment,)) + elif segment in metadata_entry.elements: + element_metadata_entry = metadata_entry.elements[typing.cast(SimpleSelectorSegment, segment)] + if selector_rest: + for exception_selector in self._query_exceptions(selector_rest, element_metadata_entry): + exceptions.append((segment,) + exception_selector) + elif element_metadata_entry.metadata: + exceptions.append((segment,)) + + return exceptions + + def _remove(self, selector: Selector, recursive: bool, strict_all_elements: bool, + metadata_entry: typing.Optional[MetadataEntry]) -> MetadataEntry: + if metadata_entry is None: + new_metadata_entry = MetadataEntry() + else: + new_metadata_entry = metadata_entry.copy() + + if len(selector) == 0: + new_metadata_entry.metadata = frozendict.FrozenOrderedDict() + if recursive: + new_metadata_entry.all_elements = None + new_metadata_entry.elements = utils.EMPTY_PMAP + new_metadata_entry.is_elements_empty = True + new_metadata_entry.is_empty = True + else: + new_metadata_entry.update_is_empty() + return new_metadata_entry + + segment, selector_rest = selector[0], selector[1:] + + if segment is ALL_ELEMENTS: + new_metadata_entry.all_elements = self._remove(selector_rest, recursive, strict_all_elements, new_metadata_entry.all_elements) + if new_metadata_entry.all_elements.is_empty: + new_metadata_entry.all_elements = None + new_metadata_entry.update_is_empty() + + if not strict_all_elements and new_metadata_entry.elements: + new_elements_evolver = new_metadata_entry.elements.evolver() + for element_segment, element_metadata_entry in new_metadata_entry.elements.items(): + new_element_metadata_entry = self._remove(selector_rest, recursive, strict_all_elements, element_metadata_entry) + if new_element_metadata_entry.is_empty: + new_elements_evolver.remove(element_segment) + else: + new_elements_evolver.set(element_segment, new_element_metadata_entry) + new_metadata_entry.elements = new_elements_evolver.persistent() + new_metadata_entry.is_elements_empty = not new_metadata_entry.elements + new_metadata_entry.update_is_empty() + + else: + segment = typing.cast(SimpleSelectorSegment, segment) + if segment in new_metadata_entry.elements: + new_element_metadata_entry = self._remove(selector_rest, recursive, strict_all_elements, new_metadata_entry.elements[segment]) + if new_element_metadata_entry.is_empty: + new_metadata_entry.elements = new_metadata_entry.elements.remove(segment) + else: + new_metadata_entry.elements = new_metadata_entry.elements.set(segment, new_element_metadata_entry) + new_metadata_entry.is_elements_empty = not new_metadata_entry.elements + new_metadata_entry.update_is_empty() + + return new_metadata_entry + + def _update(self, selector: Selector, metadata_entry: typing.Optional[MetadataEntry], + metadata: frozendict.FrozenOrderedDict) -> MetadataEntry: + if metadata_entry is None: + new_metadata_entry = MetadataEntry() + else: + new_metadata_entry = metadata_entry.copy() + + if len(selector) == 0: + # One would think that we could remove "NO_VALUE" values during merging, but we have to + # keep them to know which values we have to remove when merging with all elements metadata. + new_metadata_entry.metadata = self._merge_metadata(new_metadata_entry.metadata, metadata) + new_metadata_entry.update_is_empty() + return new_metadata_entry + + segment, selector_rest = selector[0], selector[1:] + + if segment is ALL_ELEMENTS: + new_metadata_entry.all_elements = self._update(selector_rest, new_metadata_entry.all_elements, metadata) + if new_metadata_entry.all_elements.is_empty: + new_metadata_entry.all_elements = None + new_metadata_entry.update_is_empty() + + if new_metadata_entry.elements: + # Fields on direct elements have precedence over fields on ALL_ELEMENTS, but we want the last + # call to update to take precedence. So all fields found in metadata just set on ALL_ELEMENTS + # are removed from all metadata on direct elements. + new_elements_evolver = new_metadata_entry.elements.evolver() + for element_segment, element_metadata_entry in new_metadata_entry.elements.items(): + new_element_metadata_entry = self._prune(selector_rest, element_metadata_entry, metadata) + if new_element_metadata_entry is None or new_element_metadata_entry.is_empty: + new_elements_evolver.remove(element_segment) + else: + new_elements_evolver.set(element_segment, new_element_metadata_entry) + new_metadata_entry.elements = new_elements_evolver.persistent() + new_metadata_entry.is_elements_empty = not new_metadata_entry.elements + new_metadata_entry.update_is_empty() + + else: + segment = typing.cast(SimpleSelectorSegment, segment) + new_element_metadata_entry = self._update(selector_rest, new_metadata_entry.elements.get(segment, None), metadata) + if new_element_metadata_entry.is_empty: + new_metadata_entry.elements = new_metadata_entry.elements.discard(segment) + else: + new_metadata_entry.elements = new_metadata_entry.elements.set(segment, new_element_metadata_entry) + new_metadata_entry.is_elements_empty = not new_metadata_entry.elements + new_metadata_entry.update_is_empty() + + return new_metadata_entry + + def _merge_metadata(self, metadata1: frozendict.FrozenOrderedDict, metadata2: frozendict.FrozenOrderedDict) -> frozendict.FrozenOrderedDict: + """ + Merges all fields from ``metadata2`` on top of ``metadata1``, recursively. + + Only dicts are merged recursively, arrays are not. + """ + + # Copy so that we can mutate. + metadata = collections.OrderedDict(metadata1) + + for name, value in metadata2.items(): + if name in metadata: + if isinstance(metadata[name], frozendict.FrozenOrderedDict) and isinstance(value, frozendict.FrozenOrderedDict): + merged_value = self._merge_metadata(metadata[name], value) + # If value is an empty dict, but before merging it was not, we just remove the whole field. + if metadata[name] and not merged_value: + del metadata[name] + else: + metadata[name] = merged_value + else: + metadata[name] = value + else: + metadata[name] = value + + return frozendict.FrozenOrderedDict(metadata) + + def _merge_metadata_entries(self, metadata_entry1: MetadataEntry, metadata_entry2: MetadataEntry) -> MetadataEntry: + """ + Merges ``metadata_entry2`` on top of ``metadata_entry1``, recursively, and + returns a new metadata entry. + """ + + output_metadata_entry = MetadataEntry() + + # Merging elements. + new_elements_evolver = metadata_entry1.elements.evolver() + for element_segment, element_metadata_entry in metadata_entry2.elements.items(): + if element_segment not in new_elements_evolver: + new_elements_evolver.set(element_segment, element_metadata_entry) + else: + new_elements_evolver.set( + element_segment, self._merge_metadata_entries(new_elements_evolver[element_segment], element_metadata_entry), + ) + output_metadata_entry.elements = new_elements_evolver.persistent() + output_metadata_entry.is_elements_empty = not output_metadata_entry.elements + + # Merging "ALL_ELEMENTS". + if metadata_entry1.all_elements is not None and metadata_entry2.all_elements is not None: + output_metadata_entry.all_elements = self._merge_metadata_entries(metadata_entry1.all_elements, metadata_entry2.all_elements) + elif metadata_entry1.all_elements is not None: + output_metadata_entry.all_elements = metadata_entry1.all_elements + elif metadata_entry2.all_elements is not None: + output_metadata_entry.all_elements = metadata_entry2.all_elements + + # Merging metadata: + output_metadata_entry.metadata = self._merge_metadata(metadata_entry1.metadata, metadata_entry2.metadata) + + output_metadata_entry.update_is_empty() + + return output_metadata_entry + + def _remove_no_value(self, metadata: frozendict.FrozenOrderedDict) -> frozendict.FrozenOrderedDict: + # Copy so that we can mutate. + metadata = collections.OrderedDict(metadata) + + # We iterate over a list so that we can change dict while iterating. + for name, value in list(metadata.items()): + if value is NO_VALUE: + del metadata[name] + elif isinstance(value, frozendict.FrozenOrderedDict): + new_value = self._remove_no_value(value) + # If value is an empty dict, but before removing "NO_VALUE" it was not, we just remove the whole field. + if metadata[name] and not new_value: + del metadata[name] + else: + metadata[name] = new_value + + return frozendict.FrozenOrderedDict(metadata) + + def _prune(self, selector: Selector, metadata_entry: typing.Optional[MetadataEntry], metadata: frozendict.FrozenOrderedDict) -> typing.Optional[MetadataEntry]: + if metadata_entry is None: + return metadata_entry + + new_metadata_entry = metadata_entry.copy() + + if len(selector) == 0: + new_metadata_entry.metadata = self._prune_metadata(new_metadata_entry.metadata, metadata) + new_metadata_entry.update_is_empty() + return new_metadata_entry + + segment, selector_rest = selector[0], selector[1:] + + if segment is ALL_ELEMENTS: + new_metadata_entry.all_elements = self._prune(selector_rest, new_metadata_entry.all_elements, metadata) + if new_metadata_entry.all_elements is not None and new_metadata_entry.all_elements.is_empty: + new_metadata_entry.all_elements = None + new_metadata_entry.update_is_empty() + + if new_metadata_entry.elements: + new_elements_evolver = new_metadata_entry.elements.evolver() + for element_segment, element_metadata_entry in new_metadata_entry.elements.items(): + new_element_metadata_entry = self._prune(selector_rest, element_metadata_entry, metadata) + if new_element_metadata_entry is None or new_element_metadata_entry.is_empty: + new_elements_evolver.remove(element_segment) + else: + new_elements_evolver.set(element_segment, new_element_metadata_entry) + new_metadata_entry.elements = new_elements_evolver.persistent() + new_metadata_entry.is_elements_empty = not new_metadata_entry.elements + new_metadata_entry.update_is_empty() + + elif segment in new_metadata_entry.elements: + segment = typing.cast(SimpleSelectorSegment, segment) + new_element_metadata_entry = self._prune(selector_rest, new_metadata_entry.elements[segment], metadata) + if new_element_metadata_entry is None or new_element_metadata_entry.is_empty: + new_metadata_entry.elements = new_metadata_entry.elements.remove(segment) + else: + new_metadata_entry.elements = new_metadata_entry.elements.set(segment, new_element_metadata_entry) + new_metadata_entry.is_elements_empty = not new_metadata_entry.elements + new_metadata_entry.update_is_empty() + + return new_metadata_entry + + def _prune_metadata(self, metadata1: frozendict.FrozenOrderedDict, metadata2: frozendict.FrozenOrderedDict) -> frozendict.FrozenOrderedDict: + """ + Removes all fields which are found in ``metadata2`` from ``metadata1``, recursively. + + Values of ``metadata2`` do not matter, except if they are a dict, in which case + removal is done recursively. + """ + + # Copy so that we can mutate. + metadata = collections.OrderedDict(metadata1) + + for name, value in metadata2.items(): + if name not in metadata: + continue + + if isinstance(metadata[name], frozendict.FrozenOrderedDict) and isinstance(value, frozendict.FrozenOrderedDict): + pruned_value = self._prune_metadata(metadata[name], value) + # If value is an empty dict, but before pruning it was not, we just remove the whole field. + if metadata[name] and not pruned_value: + del metadata[name] + else: + metadata[name] = pruned_value + else: + del metadata[name] + + return frozendict.FrozenOrderedDict(metadata) + + def compact(self: T, fields_to_compact: typing.Sequence[str]) -> T: + """ + Compact metadata and return it. Produces equivalent but compact + metadata where equal metadata for all elements in a dimension are compacted + into ``ALL_ELEMENTS`` selector segment. + + Parameters + ---------- + fields_to_compact: + Which fields to compact in the metadata. + + Returns + ------- + Compacted metadata. + """ + + metadata_dict: typing.Dict[TupleSelector, typing.Dict] = collections.OrderedDict() + + for metadata_description in self.to_internal_simple_structure(): + metadata_dict[tuple(metadata_description['selector'])] = metadata_description['metadata'] + + metadata_dict = self._compact_metadata(metadata_dict, fields_to_compact) + + new_metadata = copy.copy(self) + + for selector, metadata in metadata_dict.items(): + metadata = utils.make_immutable_copy(metadata) + + if not isinstance(metadata, frozendict.FrozenOrderedDict): + raise exceptions.InvalidArgumentTypeError("Metadata should be a dict.") + + new_metadata._current_metadata = new_metadata._update(selector, new_metadata._current_metadata, metadata) + + return new_metadata + + # TODO: During compacting, we could also create an Union type of all structural types in elements and set it on "ALL_ELEMENTS". + @classmethod + def _compact_metadata(cls: typing.Type[T], metadata_dict: typing.Dict[TupleSelector, typing.Dict], fields_to_compact: typing.Sequence[str]) -> typing.Dict[TupleSelector, typing.Dict]: + """ + Compacts only top-level fields (if their values are all equal) listed in ``fields_to_compact``. + + Only top-level fields listed in ``fields_to_compact`` will be compacted. The reason for ``fields_to_compact`` + is that it is an optimization, so that we do not have to first go over all metadata to detect which all + fields are there. When used by ``_generate``, ``_generate_metadata`` is producing a fixed set of fields which + works in our advantage. + + We prefer to compact segments at the beginning of the selector over the segments later on. + + Parameters + ---------- + metadata_dict: + A dict where field is selector and value is the metadata dict under this selector. + fields_to_compact: + Which fields to compact in the metadata. + + Returns + ------- + Compacted metadata representation in the form of a dict where fields are selectors. + """ + + # We rely on the fact that dicts preserve order in Python 3.6+ and do not use + # "OrderedDict" here for simplicity (we do not compare by equality dicts here to care + # about order of fields in equality check). + results: typing.Dict[TupleSelector, typing.Dict] = collections.defaultdict(dict) + + # Key is the length of selectors and the value is a list of selectors of the same length. + selector_lengths: typing.Dict[int, typing.List[TupleSelector]] = collections.defaultdict(list) + for selector in metadata_dict.keys(): + selector_lengths[len(selector)].append(selector) + + for length, selectors in sorted(selector_lengths.items(), key=operator.itemgetter(0)): + update_selectors: typing.Dict[TupleSelector, typing.List] = collections.defaultdict(list) + + for field in fields_to_compact: + values_to_selectors: typing.Dict[typing.Any, typing.List[TupleSelector]] = collections.defaultdict(list) + for selector in selectors: + if field in metadata_dict[selector]: + values_to_selectors[metadata_dict[selector][field]].append(selector) + + for value in values_to_selectors.keys(): + compacted_selectors = cls._get_compacted_selectors(values_to_selectors[value], selectors) + + for selector in compacted_selectors: + update_selectors[selector].append({field: value}) + + for selector, items in sorted(update_selectors.items(), key=operator.itemgetter(0)): + for item in items: + results[selector].update(item) + + return collections.OrderedDict(results) + + @classmethod + def _get_compacted_selectors(cls, selectors_to_compact: typing.List[TupleSelector], total_selectors: typing.List[TupleSelector]) -> typing.List[TupleSelector]: + """ + This function returns a compacted representation of ``selectors_to_compact``. + + Parameters + ---------- + selectors_to_compact: + A list of selectors to be compacted which have the same value under a certain field. + total_selectors: + All possible selectors of a certain length. + + Returns + ------- + A list of compacted selectors. + """ + + input_selectors = copy.copy(selectors_to_compact) + input_selectors_set = set(input_selectors) + output_selectors = selectors_to_compact + + length_of_selector = len(input_selectors[0]) + + other_selectors_set = set(total_selectors) - input_selectors_set + + for other_selector in sorted(other_selectors_set): + if cls._selector_overlap(other_selector, input_selectors_set): + other_selectors_set.remove(other_selector) + + for i in range(length_of_selector): + all_segments = {selector[i] for selector in total_selectors} + for index, selector_tuple in enumerate(output_selectors): + can_collapse = True + + for segment in all_segments: + test_selector = list(selector_tuple) + test_selector[i] = segment + if cls._selector_overlap(test_selector, other_selectors_set): + can_collapse = False + + if can_collapse: + selector_list = list(selector_tuple) + selector_list[i] = ALL_ELEMENTS + output_selectors[index] = tuple(selector_list) + + output_selectors = sorted(set(output_selectors)) + + output_selectors = cls._greedy_prune_selector(output_selectors, input_selectors) + + return output_selectors + + @classmethod + def _selector_overlap(cls, test_selector: Selector, selectors_set: typing.Set[TupleSelector]) -> bool: + """ + This function checks if ``test_selector`` overlaps with selectors ``selectors_set``. + + Parameters + ---------- + test_selector: + The input selector. + selectors_set: + A set of selectors. + + Returns + ------- + Whether the selector ``test_selector`` overlaps with any selector in ``selectors_set``. + """ + + for selector in selectors_set: + assert len(selector) == len(test_selector) + + is_same = True + for i in range(len(test_selector)): + if test_selector[i] is ALL_ELEMENTS: + continue + if selector[i] is not ALL_ELEMENTS: + if test_selector[i] != selector[i]: + is_same = False + + if is_same: + return True + + return False + + @classmethod + def _selector_contained(cls, selector_1: Selector, selector_2: Selector) -> bool: + """ + This function checks if ``selector_1`` is contained in ``selector_2``. + + Returns + ------- + Whether ``selector_1`` is contained in ``selector_2``. + + Notes + ----- + This function is different from `_selector_overlap` which checks if two selectors overlap. + """ + + for i in range(len(selector_1)): + if selector_1[i] is ALL_ELEMENTS: + if selector_2[i] is not ALL_ELEMENTS: + return False + continue + if selector_2[i] is not ALL_ELEMENTS: + if selector_1[i] != selector_2[i]: + return False + + return True + + @classmethod + def _greedy_prune_selector(cls, compacted_selectors: typing.List[TupleSelector], selectors_to_compact: typing.List[TupleSelector]) -> typing.List[TupleSelector]: + """ + This method implements a greedy algorithm to remove unnecessary selectors from ``compacted_selectors``. + + Parameters + ---------- + compacted_selectors: + This is an already compacted list of selectors which we get from ``selectors_to_compact``. + selectors_to_compact: + This is the list of original selectors with the same value under a certain field. + + Returns + ------- + The list of selectors where unnecessary selectors have been removed from ``compacted_selectors``. + """ + + # Maps from each selector in "compacted_selectors" to selectors which it covers in "selectors_to_compact". + contained_selectors: typing.Dict[TupleSelector, typing.List[TupleSelector]] = collections.defaultdict(list) + selector_count_mask: typing.Dict[TupleSelector, int] = collections.defaultdict(int) + + # Compute for each selector in "selectors_to_compact" how many selectors in "compacted_selectors" cover them. + # Also builds the "contained_selectors". + for compact_selector in compacted_selectors: + for selector in selectors_to_compact: + if cls._selector_contained(selector, compact_selector): + selector_count_mask[selector] += 1 + contained_selectors[compact_selector].append(selector) + + continue_flag = True + while continue_flag: + continue_flag = False + for compact_selector in compacted_selectors: + remove_flag = True + for selector in contained_selectors[compact_selector]: + if selector_count_mask[selector] == 1: + remove_flag = False + if remove_flag: + continue_flag = True + redundant_selector = compact_selector + if continue_flag: + compacted_selectors.remove(redundant_selector) + for selector in contained_selectors[redundant_selector]: + selector_count_mask[selector] -= 1 + + return compacted_selectors + + @classmethod + def check_selector(cls, selector: Selector) -> None: + """ + Checks that a given ``selector`` is a valid selector. If ``selector`` is invalid it raises an exception. + + It checks that it is a tuple or a list and currently we require that all segments of a selector + are strings, integers, or a special value ``ALL_ELEMENTS``. + + Parameters + ---------- + selector: + Selector to check. + """ + + if not isinstance(selector, (tuple, list)): + raise exceptions.InvalidArgumentTypeError("Selector is not a tuple or a list.") + + for i, segment in enumerate(selector): + if not isinstance(segment, (str, int)) and segment is not ALL_ELEMENTS: + raise exceptions.InvalidArgumentTypeError( + "'{segment}' at {path} is not a str, int, or ALL_ELEMENTS.".format( + segment=segment, + path=list(selector[0:i + 1]), + ), + ) + + def __hash__(self) -> int: + if self._hash is None: + self._hash = hash(self._current_metadata) + + return self._hash + + def __eq__(self, other): # type: ignore + if not isinstance(other, Metadata): + return NotImplemented + + return self._current_metadata == other._current_metadata + + def get_elements(self, selector: Selector) -> typing.Sequence[SelectorSegment]: + """ + Returns a list of element names which exists under a selector, if any. + + Parameters + ---------- + selector: + A selector to return elements under. + + Returns + ------- + List of element names. + """ + + self.check_selector(selector) + + return self._get_elements(selector, self._current_metadata) + + def _get_elements(self, selector: Selector, metadata_entry: typing.Optional[MetadataEntry]) -> typing.Sequence[SelectorSegment]: + if metadata_entry is None: + return [] + if len(selector) == 0: + if metadata_entry.all_elements is not None: + all_elements: ListSelector = [ALL_ELEMENTS] + else: + all_elements = [] + return all_elements + list(metadata_entry.elements.keys()) + + segment, selector_rest = selector[0], selector[1:] + + all_elements_elements = self._get_elements(selector_rest, metadata_entry.all_elements) + if segment is ALL_ELEMENTS: + elements = all_elements_elements + elif segment in metadata_entry.elements: + segment = typing.cast(SimpleSelectorSegment, segment) + elements = self._get_elements(selector_rest, metadata_entry.elements[segment]) + elements = sorted(set(typing.cast(typing.List, all_elements_elements) + typing.cast(typing.List, elements))) + else: + elements = all_elements_elements + + return elements + + def to_internal_json_structure(self) -> typing.Sequence[typing.Dict]: + """ + Converts metadata to a JSON-compatible structure. + + The structure exposes how metadata is stored internally (metadata for ``ALL_ELEMENTS`` + separate from metadata for individual elements) and can change in the future. + This method exist for debugging purposes and to allow serialization of metadata. + Use `to_json_structure` method if you want to access semantically valid + representation of metadata. + + Returns + ------- + A JSON-compatible list of dicts. + """ + + ALL_ELEMENTS_REPR = repr(ALL_ELEMENTS) + + return [ + { + 'selector': [ALL_ELEMENTS_REPR if segment is ALL_ELEMENTS else segment for segment in entry['selector']], + 'metadata': utils.to_reversible_json_structure(entry['metadata']), + } + for entry in self.to_internal_simple_structure() + ] + + def to_internal_simple_structure(self) -> typing.Sequence[typing.Dict]: + """ + Converts metadata to a simple structure, similar to JSON, but with values + left as Python values. + + The structure exposes how metadata is stored internally (metadata for ``ALL_ELEMENTS`` + separate from metadata for individual elements) and can change in the future. + This method exist for debugging purposes and to allow serialization of metadata. + Use `to_simple_structure` method if you want to access semantically valid + representation of metadata. + + Returns + ------- + A list of dicts. + """ + + return self._to_internal_simple_structure([], self._current_metadata) + + @classmethod + def from_internal_json_structure(cls: typing.Type[T], json_structure: typing.Iterable[typing.Dict]) -> T: + """ + Constructs metadata object back from an internal JSON-compatible structure. + as made by ``to_internal_json_structure``. + + Parameters + ---------- + json_structure: + Iterable of the structure. + + Returns + ------- + Constructed metadata object. + """ + + ALL_ELEMENTS_REPR = repr(ALL_ELEMENTS) + + return cls.from_internal_simple_structure( + { + 'selector': [ALL_ELEMENTS if segment == ALL_ELEMENTS_REPR else segment for segment in entry['selector']], + 'metadata': utils.from_reversible_json_structure(entry['metadata']), + } for entry in json_structure + ) + + @classmethod + def from_internal_simple_structure(cls: typing.Type[T], structure: typing.Iterable[typing.Dict]) -> T: + """ + Constructs metadata object back from an internal simple structure, + as made by ``to_internal_simple_structure``. + + Parameters + ---------- + structure: + Iterable of the structure. + + Returns + ------- + Constructed metadata object. + """ + + metadata = cls() + + # TODO: Optimize, see: https://gitlab.com/datadrivendiscovery/d3m/issues/408 + for entry in structure: + metadata = metadata.update(entry['selector'], entry['metadata']) + + return metadata + + def _to_internal_simple_structure(self, selector: Selector, metadata_entry: typing.Optional[MetadataEntry]) -> typing.List[typing.Dict]: + output = [] + + selector = typing.cast(ListSelector, selector) + + if metadata_entry.metadata: + output.append({ + 'selector': list(selector), + 'metadata': metadata_entry.metadata, + }) + + if metadata_entry.all_elements is not None: + output += self._to_internal_simple_structure(selector + [ALL_ELEMENTS], metadata_entry.all_elements) + + for element_segment, element_metadata_entry in metadata_entry.elements.items(): + output += self._to_internal_simple_structure(selector + [element_segment], element_metadata_entry) + + return output + + def to_json_structure(self) -> typing.Sequence[typing.Dict]: + """ + Converts metadata to a JSON-compatible structure. + + The output matches the output one obtain by using `query` method and is a + semantically valid representation of metadata, but it does not matches + how metadata is stored internally. To obtain that, you can use + `to_internal_json_structure` method. + + It does not make a JSON structure which can then be parsed back to + reconstruct original metadata object. To obtain that, you can use + `to_internal_json_structure` method. + + Returns + ------- + A JSON-compatible list of dicts. + """ + + return utils.to_json_structure(self.to_simple_structure()) + + def to_simple_structure(self) -> typing.Sequence[typing.Dict]: + """ + Converts metadata to a simple structure, similar to JSON, but with values + left as Python values. + + The output matches the output one obtain by using `query` method and is a + semantically valid representation of metadata, but it does not matches + how metadata is stored internally. To obtain that, you can use + `to_internal_simple_structure` method. + + It does not make a structure which can then be converted back to + reconstruct original metadata object. To obtain that, you can use + `to_internal_simple_structure` method. + + Returns + ------- + A list of dicts. + """ + + return self._to_simple_structure([]) + + def _to_simple_structure(self, selector: Selector) -> typing.List[typing.Dict]: + output = [] + + selector = typing.cast(ListSelector, selector) + + if 'selector' in inspect.signature(self.query).parameters: + query = self.query + else: + def query(selector: Selector, *, ignore_all_elements: bool = False, remove_no_value: bool = True) -> frozendict.FrozenOrderedDict: + return self.query() # type: ignore + + metadata = query(selector=selector) + if metadata: + output.append({ + 'selector': list(selector), + 'metadata': metadata, + }) + + elements = self.get_elements(selector) + + for element in elements: + output += self._to_simple_structure(selector + [element]) + + return output + + def pretty_print(self, selector: Selector = None, handle: typing.IO[typing.Any] = None, _level: int = 0) -> None: + """ + Pretty-prints metadata to ``handle``, or `sys.stdout` if not specified. + + The output matches the output one obtain by using `query` method and is a + semantically valid representation of metadata, but it does not matches + how metadata is stored internally. To obtain that, you can use + `to_internal_json_structure` and `to_internal_simple_structure` methods. + + Parameters + ---------- + selector: + A selector to start pretty-printing at. + handle: + A handle to pretty-print to. Default is `sys.stdout`. + """ + + if selector is None: + selector = [] + + if handle is None: + handle = sys.stdout + + self.check_selector(selector) + + selector = list(selector) + + if 'selector' in inspect.signature(self.query).parameters: + query = self.query + else: + def query(selector: Selector, *, ignore_all_elements: bool = False, remove_no_value: bool = True) -> frozendict.FrozenOrderedDict: + return self.query() # type: ignore + + indent = ' ' * _level + + handle.write('{indent}Selector:\n{indent} {selector}\n'.format(indent=indent, selector=tuple(selector))) + + handle.write('{indent}Metadata:\n'.format(indent=indent)) + for line in json.dumps(utils.to_json_structure(query(selector=selector)), indent=1, allow_nan=False).splitlines(): + handle.write('{indent} {line}\n'.format(indent=indent, line=line)) + + elements = self.get_elements(selector) + + if not elements: + return + + if ALL_ELEMENTS in elements: + handle.write('{indent}All elements:\n'.format(indent=indent)) + self.pretty_print(selector + [ALL_ELEMENTS], handle=handle, _level=_level + 1) + + first_element = True + for element in elements: + if element is ALL_ELEMENTS: + continue + + if first_element: + handle.write('{indent}Elements:\n'.format(indent=indent)) + first_element = False + + self.pretty_print(selector + [element], handle=handle, _level=_level + 1) + + def _copy_elements_metadata(self, target_metadata: T, from_selector: ListSelector, + to_selector: ListSelector, selector: ListSelector, ignore_all_elements: bool) -> T: + # "ALL_ELEMENTS" is always first, if it exists, which works in our favor here. + # We are copying metadata for both "ALL_ELEMENTS" and elements themselves, so + # we do not have to merge metadata together for elements themselves. + elements = self.get_elements(from_selector + selector) + + for element in elements: + new_selector = selector + [element] + metadata = self._query(from_selector + new_selector, self._current_metadata, 0 if ignore_all_elements else len(from_selector)) + target_metadata = target_metadata.update(to_selector + new_selector, metadata) + target_metadata = self._copy_elements_metadata(target_metadata, from_selector, to_selector, new_selector, ignore_all_elements) + + return target_metadata + + def copy_to(self, target_metadata: T, from_selector: Selector, + to_selector: Selector = (), *, ignore_all_elements: bool = False) -> T: + """ + Recursively copies metadata to ``target_metadata``, starting at the + ``from_selector`` and to a selector starting at ``to_selector``. + """ + + metadata = self._query(from_selector, self._current_metadata, 0 if ignore_all_elements else len(from_selector)) + + # Do not copy top-level "schema" field to a lower level. + if from_selector == () and to_selector != () and 'schema' in metadata: + # Copy so that we can mutate. + metadata_dict = collections.OrderedDict(metadata) + del metadata_dict['schema'] + metadata = frozendict.FrozenOrderedDict(metadata_dict) + + target_metadata = target_metadata.update(to_selector, metadata) + + return self._copy_elements_metadata(target_metadata, list(from_selector), list(to_selector), [], ignore_all_elements) + + +class DataMetadata(Metadata): + """ + A class for metadata for data values. + + It checks all updates against container and data schemas. Note that as such empty (just created) metadata object + does not validate against schemas. Consider setting required fields manually or use `generate` method as a + helper to do so. + + It has additional helper methods for operating on metadata of tabular data. + + Parameters + ---------- + metadata: + Optional initial metadata for the top-level of the value. + for_value: + Optional value to automatically generate metadata for. DEPRECATED: use explicit generate method call instead. + generate_metadata: bool + Automatically generate metadata from ``for_value`` and update the metadata accordingly. + DEPRECATED: use explicit generate method call instead. + check: + DEPRECATED: argument ignored. + source: + DEPRECATED: argument ignored. + timestamp: + DEPRECATED: argument ignored. + """ + + @deprecate.arguments('for_value', 'generate_metadata', message="use explicit generate method call instead") + @deprecate.arguments('source', 'timestamp', 'check', message="argument ignored") + def __init__(self, metadata: typing.Dict[str, typing.Any] = None, for_value: typing.Any = None, *, + generate_metadata: bool = True, check: bool = True, source: typing.Any = None, timestamp: datetime.datetime = None) -> None: + super().__init__(metadata=metadata) + + if for_value is not None and generate_metadata: + self._generate(for_value) + + @deprecate.arguments('source', 'timestamp', 'check', 'for_value', message="argument ignored") + def update(self: D, selector: Selector, metadata: typing.Dict[str, typing.Any], *, for_value: typing.Any = None, + check: bool = True, source: typing.Any = None, timestamp: datetime.datetime = None) -> D: + """ + Updates metadata with new ``metadata`` for data pointed to with ``selector``. + + If value of any field is ``NO_VALUE``, that field is deleted. + + It returns a copy of this metadata object with new metadata applied. + + Parameters + ---------- + selector: + A selector pointing to data. + metadata: + A map of fields and values with metadata. + for_value: + DEPRECATED: argument ignored. + check: + DEPRECATED: argument ignored. + source: + DEPRECATED: argument ignored. + timestamp: + DEPRECATED: argument ignored. + + Returns + ------- + Updated metadata. + """ + + return super().update(selector=selector, metadata=metadata) + + @deprecate.arguments('source', 'timestamp', 'check', 'for_value', message="argument ignored") + def remove(self: D, selector: Selector, *, recursive: bool = False, strict_all_elements: bool = False, + for_value: typing.Any = None, check: bool = True, source: typing.Any = None, timestamp: datetime.datetime = None) -> D: + """ + Removes all metadata at ``selector``. + + Parameters + ---------- + selector: + A selector to remove metadata at. + recursive: + Should remove also all metadata under the ``selector``? + strict_all_elements: + If ``True``, then when removing ``ALL_ELEMENTS`` entry, do not remove also metadata for all elements it matches. + for_value: + DEPRECATED: argument ignored. + check: + DEPRECATED: argument ignored. + source: + DEPRECATED: argument ignored. + timestamp: + DEPRECATED: argument ignored. + + Returns + ------- + Updated metadata. + """ + + return super().remove(selector=selector, recursive=recursive, strict_all_elements=strict_all_elements) + + @deprecate.function(message="use generate method instead") + @deprecate.arguments('source', 'timestamp', 'check', message="argument ignored") + def set_for_value(self: D, for_value: typing.Any = None, *, generate_metadata: bool = True, check: bool = True, + source: typing.Any = None, timestamp: datetime.datetime = None) -> D: + """ + DEPRECATED: use ``generate`` method instead. + + If ``generate_metadata`` is set, generate metadata from ``for_value`` and update the metadata accordingly. + + Parameters + ---------- + for_value: + Value to automatically generate metadata for. + generate_metadata: bool + Automatically generate metadata from ``for_value`` and update the metadata accordingly. + check: + DEPRECATED: argument ignored. + source: + DEPRECATED: argument ignored. + timestamp: + DEPRECATED: argument ignored. + + Returns + ------- + Metadata object updated with automatically generated metadata. + """ + + if for_value is not None and generate_metadata: + return self.generate(for_value) + else: + return self + + def generate(self: D, value: typing.Any = None, *, compact: bool = False) -> D: + """ + Metadata about structure of data (dimensions) and structural types is + generated for the ``value``, and existing metadata is updated accordingly. + + Parameters + ---------- + value: + Value to automatically generate metadata for. + compact: + Compact automatically generated metadata. Produces equivalent but compact + metadata where equal metadata for all elements in a dimension are compacted + into ``ALL_ELEMENTS`` selector segment. + + Returns + ------- + Metadata object updated with automatically generated metadata. + """ + + new_metadata = copy.copy(self) + + new_metadata._generate(value, compact) + + return new_metadata + + def _generate(self, value: typing.Any = None, compact: bool = False) -> None: + # Importing here to prevent import cycle. And to not import it many times inside "_generate_metadata". + from d3m import container, types as d3m_types + + if value is None: + raise exceptions.InvalidArgumentValueError("\"value\" argument cannot be None.") + + generated_metadata_dict = self._generate_metadata(container, d3m_types, value, (), True) + + if compact: + # We make all metadata immutable so that it is hashable, which is required for the "_compact_generated_metadata". + for selector, metadata in generated_metadata_dict.items(): + generated_metadata_dict[selector] = utils.make_immutable_copy(metadata) + + # Because we generated all metadata we know that we can compact it. + # If some metadata holds for all elements we know that we can move it to "ALL_ELEMENTS". + generated_metadata_dict = self._compact_metadata(generated_metadata_dict, ALL_GENERATED_FIELDS) + + self._update_with_generated_metadata(generated_metadata_dict) + + # TODO: Also remove metadata for columns/rows which do not exist anymore. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/336 + + # TODO: Should we handle inheritance between semantic types here? + def has_semantic_type(self, selector: Selector, semantic_type: str) -> bool: + try: + return semantic_type in self.query_field(selector, 'semantic_types') + except KeyError: + return False + + @deprecate.arguments('source', 'timestamp', message="argument ignored") + def remove_semantic_type(self: D, selector: Selector, semantic_type: str, *, source: typing.Any = None, timestamp: datetime.datetime = None) -> D: + try: + semantic_types = self.query_field(selector, 'semantic_types') + except KeyError: + return self + if not semantic_types: + return self + new_semantic_types = tuple(st for st in semantic_types if st != semantic_type) + if new_semantic_types == semantic_types: + return self + return self.update(selector, {'semantic_types': new_semantic_types}) + + @deprecate.arguments('source', 'timestamp', message="argument ignored") + def add_semantic_type(self: D, selector: Selector, semantic_type: str, *, source: typing.Any = None, timestamp: datetime.datetime = None) -> D: + try: + semantic_types = self.query_field(selector, 'semantic_types') + except KeyError: + semantic_types = () + if semantic_type in semantic_types: + return self + semantic_types += (semantic_type,) + return self.update(selector, {'semantic_types': semantic_types}) + + # TODO: This does not look too efficient. Optimize? + def get_elements_with_semantic_type(self, selector: Selector, semantic_type: str) -> typing.Sequence[SelectorSegment]: + all_elements = self.get_elements(selector) + + return [element for element in all_elements if self.has_semantic_type(list(selector) + [element], semantic_type)] + + def query_column(self, column_index: int, *, at: Selector = (), ignore_all_elements: bool = False) -> frozendict.FrozenOrderedDict: + """ + Returns column metadata. + + This assumes that column metadata is stored under ``(ALL_ELEMENTS, column_index)``, at + optionally ``at`` selector, which might not necessary hold if metadata is not compacted. + Consider using `query_column_field`. + + Parameters + ---------- + column_index: + Column index to use. + at: + Selector at which to assume tabular metadata. + ignore_all_elements: + By default, metadata from ALL_ELEMENTS is merged with metadata for an element itself. + By setting this argument to ``True``, this is disabled and just metadata from an element is returned. + + Returns + ------- + Metadata of a given column. + """ + + return self.query(list(at) + [ALL_ELEMENTS, column_index], ignore_all_elements=ignore_all_elements) + + def query_column_field(self, column_index: int, field: str, *, at: Selector = (), strict_all_elements: bool = True) -> typing.Any: + """ + Returns ``field`` value of column metadata. Raises `KeyError` exception if metadata or field + is not set. + + ``field`` represents only top-level fields in metadata. + + Parameters + ---------- + column_index: + Column index to use. + field: + A field name to query. + at: + Selector at which to assume tabular metadata. + strict_all_elements: + If set, the method does not just return ``field`` value of column metadata, + but checks that the value really holds for all rows matching the ``selector``, + without exception. This is helpful also if metadata is not compacted and + ``field`` value is the same across all rows, but ``ALL_ELEMENTS`` metadata + does not contain that field. + + Returns + ------- + A value of ``field`` of a given column. + """ + + return self.query_field(list(at) + [ALL_ELEMENTS, column_index], field, strict_all_elements=strict_all_elements) + + @deprecate.arguments('source', 'timestamp', message="argument ignored") + def update_column(self: D, column_index: int, metadata: typing.Dict[str, typing.Any], *, at: Selector = (), source: typing.Any = None, timestamp: datetime.datetime = None) -> D: + """ + Updates column metadata with new ``metadata`` for column identified by ``column_index``. + + This stores column metadata under ``(ALL_ELEMENTS, column_index)``, at optionally ``at`` selector. + + Parameters + ---------- + column_index: + Column index to update. + metadata: + A map of fields and values with metadata. + at: + Selector at which to assume tabular metadata. + source: + DEPRECATED: argument ignored. + timestamp: + DEPRECATED: argument ignored. + + Returns + ------- + Updated column metadata. + """ + + return self.update(list(at) + [ALL_ELEMENTS, column_index], metadata) + + @deprecate.arguments('source', 'timestamp', 'for_value', message="argument ignored") + def remove_column(self: D, column_index: int, *, at: Selector = (), recursive: bool = False, strict_all_elements: bool = False, + for_value: typing.Any = None, source: typing.Any = None, timestamp: datetime.datetime = None) -> D: + """ + Removes all column metadata for column ``column_index``. + + This removes column metadata under ``(ALL_ELEMENTS, column_index)``, at optionally ``at`` selector. + It does not move to the left metadata for columns after the removed column. + If you want that, use ``remove_columns``. + + Parameters + ---------- + column_index: + Column index to remove. + at: + Selector at which to assume tabular metadata. + recursive: + Should remove also all metadata under the ``selector``? + strict_all_elements: + If ``True``, then when removing ``ALL_ELEMENTS`` entry, do not remove also metadata for all elements it matches. + for_value: + DEPRECATED: argument ignored. + source: + DEPRECATED: argument ignored. + timestamp: + DEPRECATED: argument ignored. + + Returns + ------- + Updated metadata. + """ + + return self.remove( + list(at) + [ALL_ELEMENTS, column_index], recursive=recursive, strict_all_elements=strict_all_elements, + ) + + def get_columns_with_semantic_type(self, semantic_type: str, *, at: Selector = ()) -> typing.Sequence[SelectorSegment]: + return self.get_elements_with_semantic_type(list(at) + [ALL_ELEMENTS], semantic_type) + + def list_columns_with_semantic_types(self, semantic_types: typing.Sequence[str], *, at: Selector = ()) -> typing.Sequence[int]: + """ + This is similar to ``get_columns_with_semantic_type``, but it returns all column indices + for a dimension instead of ``ALL_ELEMENTS`` element. + + Moreover, it operates on a list of semantic types, where a column is returned + if it matches any semantic type on the list. + """ + + columns = [] + + for element in self.get_elements(list(at) + [ALL_ELEMENTS]): + try: + metadata_semantic_types = self.query_field(list(at) + [ALL_ELEMENTS, element], 'semantic_types') + except KeyError: + metadata_semantic_types = () + + # TODO: Should we handle inheritance between semantic types here? + if any(semantic_type in metadata_semantic_types for semantic_type in semantic_types): + if element is ALL_ELEMENTS: + try: + dimension = self.query_field(list(at) + [ALL_ELEMENTS], 'dimension') + except KeyError: + dimension = {} + return list(range(dimension.get('length', 0))) + else: + columns.append(typing.cast(int, element)) + + return columns + + def list_columns_with_structural_types( + self, structural_types: typing.Union[typing.Callable, typing.Sequence[typing.Union[str, type]]], *, + at: Selector = (), + ) -> typing.Sequence[int]: + """ + Returns a list of columns matching any of the structural types listed in + ``structural_types``. Matching allows subclasses of those types. ``structural_types`` can also be + a function to call to check a structural type. + """ + + columns = [] + + if callable(structural_types): + predicate = structural_types + else: + def predicate(typ: type) -> bool: + return any(utils.matches_structural_type(typ, structural_type) for structural_type in typing.cast(typing.Sequence[typing.Union[str, type]], structural_types)) + + for element in self.get_elements(list(at) + [ALL_ELEMENTS]): + try: + metadata_structural_type = self.query_field(list(at) + [ALL_ELEMENTS, element], 'structural_type') + except KeyError: + continue + + if predicate(metadata_structural_type): + if element is ALL_ELEMENTS: + try: + dimension = self.query_field(list(at) + [ALL_ELEMENTS], 'dimension') + except KeyError: + dimension = {} + return list(range(dimension.get('length', 0))) + else: + columns.append(typing.cast(int, element)) + + return columns + + def _merge_generated_metadata(self, old_metadata: frozendict.FrozenOrderedDict, metadata: frozendict.FrozenOrderedDict) -> frozendict.FrozenOrderedDict: + # Copy so that we can mutate. + new_metadata = collections.OrderedDict(metadata) + + # Use generated "name" only if "name" does not already exist. + # This holds even if existing "name" is "NO_VALUE". + if 'name' in new_metadata and 'name' in old_metadata: + del new_metadata['name'] + + if 'name' in new_metadata.get('dimension', {}) and 'name' in old_metadata.get('dimension', {}): + # Copy so that we can mutate. + new_metadata['dimension'] = collections.OrderedDict(new_metadata['dimension']) + del new_metadata['dimension']['name'] + new_metadata['dimension'] = frozendict.FrozenOrderedDict(new_metadata['dimension']) + + if 'semantic_types' in new_metadata: + semantic_types = list(old_metadata.get('semantic_types', [])) + for semantic_type in new_metadata['semantic_types']: + if semantic_type not in semantic_types: + # Only one tabular semantic type can exist at a time. + if semantic_type in TABULAR_SEMANTIC_TYPES: + semantic_types = [st for st in semantic_types if st not in TABULAR_SEMANTIC_TYPES] + semantic_types.append(semantic_type) + new_metadata['semantic_types'] = tuple(semantic_types) + + if 'semantic_types' in new_metadata.get('dimension', {}): + semantic_types = list(old_metadata.get('dimension', {}).get('semantic_types', [])) + for semantic_type in new_metadata['dimension']['semantic_types']: + if semantic_type not in semantic_types: + # Only one tabular semantic type can exist at a time. + if semantic_type in TABULAR_SEMANTIC_TYPES: + semantic_types = [st for st in semantic_types if st not in TABULAR_SEMANTIC_TYPES] + semantic_types.append(semantic_type) + # Copy so that we can mutate. + new_metadata['dimension'] = collections.OrderedDict(new_metadata['dimension']) + new_metadata['dimension']['semantic_types'] = tuple(semantic_types) + new_metadata['dimension'] = frozendict.FrozenOrderedDict(new_metadata['dimension']) + + # If structural type was not generated now, but it exists before, we have to remove it. + # Here we just delete it from "old_metadata" so that it is not re-set back, while + # we really handle it in "_update_with_generated_metadata". + if 'structural_type' not in new_metadata and 'structural_type' in old_metadata: + # Copy so that we can mutate. + old_metadata_dict = collections.OrderedDict(old_metadata) + del old_metadata_dict['structural_type'] + old_metadata = frozendict.FrozenOrderedDict(old_metadata_dict) + + return self._merge_metadata(old_metadata, frozendict.FrozenOrderedDict(new_metadata)) + + def _diff_generated_metadata(self, element_metadata: frozendict.FrozenOrderedDict, metadata: frozendict.FrozenOrderedDict) -> frozendict.FrozenOrderedDict: + """ + When preparing updates for automatically generated metadata we want to make sure we do not override any metadata + directly set on elements with metadata on ``ALL_ELEMENTS``. In this method we compute which metadata to update + after the automatically generated metadata is set for ``ALL_ELEMENTS`` to restore the metadata directly set + on elements. + """ + + # Copy so that we can mutate. + new_element_metadata = collections.OrderedDict(element_metadata) + + # No need to set name if it is equal to metadata on "ALL_ELEMENTS". + if 'name' in new_element_metadata and 'name' in metadata and new_element_metadata['name'] == metadata['name']: + del new_element_metadata['name'] + + # No need to set name if it is equal to metadata on "ALL_ELEMENTS". + if 'name' in new_element_metadata.get('dimension', {}) and 'name' in metadata.get('dimension', {}) and new_element_metadata['dimension']['name'] == metadata['dimension']['name']: + # Copy so that we can mutate. + new_element_metadata['dimension'] = collections.OrderedDict(new_element_metadata['dimension']) + del new_element_metadata['dimension']['name'] + new_element_metadata['dimension'] = frozendict.FrozenOrderedDict(new_element_metadata['dimension']) + + if 'semantic_types' in new_element_metadata and 'semantic_types' in metadata: + # No need to merge semantic types if they are equal to metadata on "ALL_ELEMENTS". + if set(new_element_metadata['semantic_types']) == set(metadata['semantic_types']): + del new_element_metadata['semantic_types'] + else: + semantic_types = list(new_element_metadata['semantic_types']) + for semantic_type in metadata['semantic_types']: + if semantic_type not in semantic_types: + # Only one tabular semantic type can exist at a time. + if semantic_type in TABULAR_SEMANTIC_TYPES: + semantic_types = [st for st in semantic_types if st not in TABULAR_SEMANTIC_TYPES] + semantic_types.append(semantic_type) + new_element_metadata['semantic_types'] = tuple(semantic_types) + + if 'semantic_types' in new_element_metadata.get('dimension', {}) and 'semantic_types' in metadata.get('dimension', {}): + # No need to merge semantic types if they are equal to metadata on "ALL_ELEMENTS". + if set(new_element_metadata['dimension']['semantic_types']) == set(metadata['dimension']['semantic_types']): + new_element_metadata['dimension'] = collections.OrderedDict(new_element_metadata['dimension']) + del new_element_metadata['dimension']['semantic_types'] + new_element_metadata['dimension'] = frozendict.FrozenOrderedDict(new_element_metadata['dimension']) + else: + semantic_types = list(new_element_metadata['dimension']['semantic_types']) + for semantic_type in metadata['dimension']['semantic_types']: + if semantic_type not in semantic_types: + # Only one tabular semantic type can exist at a time. + if semantic_type in TABULAR_SEMANTIC_TYPES: + semantic_types = [st for st in semantic_types if st not in TABULAR_SEMANTIC_TYPES] + semantic_types.append(semantic_type) + # Copy so that we can mutate. + new_element_metadata['dimension'] = collections.OrderedDict(new_element_metadata['dimension']) + new_element_metadata['dimension']['semantic_types'] = tuple(semantic_types) + new_element_metadata['dimension'] = frozendict.FrozenOrderedDict(new_element_metadata['dimension']) + + # Structural type is always set or removed by generated metadata, so it should not be directly set on elements. + if 'structural_type' in new_element_metadata: + del new_element_metadata['structural_type'] + + for generated_field in ALL_GENERATED_FIELDS: + # We already processed these. + if generated_field in {'name', 'dimension', 'semantic_types', 'structural_type'}: + continue + + # No need to set this field if it is equal to metadata on "ALL_ELEMENTS". + if generated_field in new_element_metadata and generated_field in metadata and new_element_metadata[generated_field] == metadata[generated_field]: + del new_element_metadata[generated_field] + + # We iterate over a list so that we can change dict while iterating. + for field in list(new_element_metadata.keys()): + # We already processed these. + if field in ALL_GENERATED_FIELDS: + continue + + # Other fields are never generated, so they are never overridden, so no need to set them again. + del new_element_metadata[field] + + if 'dimension' in new_element_metadata: + # Copy so that we can mutate. + new_element_metadata['dimension'] = collections.OrderedDict(new_element_metadata['dimension']) + + # Length is always set by generated metadata, so it should not be directly set on elements. + if 'length' in new_element_metadata['dimension']: + del new_element_metadata['dimension']['length'] + + # We iterate over a list so that we can change dict while iterating. + for field in list(new_element_metadata['dimension'].keys()): + # We already processed these. + if field in {'name', 'semantic_types'}: + continue + + # Other fields are never generated, so they are never overridden, so no need to set them again. + del new_element_metadata['dimension'][field] + + new_element_metadata['dimension'] = frozendict.FrozenOrderedDict(new_element_metadata['dimension']) + + # If dimension ended up empty, remove it. + if not new_element_metadata['dimension']: + del new_element_metadata['dimension'] + + return frozendict.FrozenOrderedDict(new_element_metadata) + + @classmethod + def _generate_metadata(cls: typing.Type[D], container: types.ModuleType, d3m_types: types.ModuleType, value: typing.Any, + selector: TupleSelector, is_root: bool = False) -> typing.Dict[TupleSelector, typing.Dict]: + """ + Returned metadata should be additionally compacted before use. + + We make sure that the first element of the returned dict is the entry which corresponds to the ``selector``. + + Important: Any top-level field set by this method should be listed in ``ALL_GENERATED_KEYS``. + """ + + generated_metadata: dict = {} + + if is_root: + generated_metadata['schema'] = CONTAINER_SCHEMA_VERSION + + # We use a simple type here, not "utils.get_type" because it is faster and also because we anyway + # traverse the data structure ourselves and store nested typing information ourselves into metadata. + generated_metadata['structural_type'] = type(value) + + # TODO: Traverse structure also for Graph objects. + # Fast path. We first check if the value is of a simple data type. + if isinstance(value, d3m_types.simple_data_types): # type: ignore + # We just store structural type of the value (already present in "generated_metadata"). + return collections.OrderedDict([(selector, generated_metadata)]) + + if isinstance(value, container.List): # type: ignore + generated_metadata['dimension'] = { + 'length': len(value), + } + + metadata_dict = collections.OrderedDict([(selector, generated_metadata)]) + + metadata_dict_list: typing.List[typing.Dict[TupleSelector, typing.Dict]] = [] + for v in value: + # We recurse with selector set to "()"so that it is easier to compare results for equality. + metadata_dict_list.append(cls._generate_metadata(container, d3m_types, v, ())) + + if metadata_dict_list: + # Equality of "OrderedDict" also checks for the equality in order of fields. + if all(element_dict == metadata_dict_list[0] for element_dict in metadata_dict_list): + selector_all_elements = selector + (ALL_ELEMENTS,) + + # All elements are equal, so we use the first element. + for element_selector, element_metadata in metadata_dict_list[0].items(): + # We recursed with selector set to "()" so we have to adapt the real selector now. + new_selector = selector_all_elements + element_selector + assert new_selector not in metadata_dict + metadata_dict[new_selector] = element_metadata + + else: + for element_index, element_dict in enumerate(metadata_dict_list): + for element_selector, element_metadata in element_dict.items(): + # We recursed with selector set to "()" so we have to adapt the real selector now. + new_selector = selector + (element_index,) + element_selector + assert new_selector not in metadata_dict + metadata_dict[new_selector] = element_metadata + + return metadata_dict + + if isinstance(value, container.Dataset): # type: ignore + generated_metadata['dimension'] = { + 'name': 'resources', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'], + 'length': len(value), + } + + metadata_dict = collections.OrderedDict([(selector, generated_metadata)]) + + for k, v in value.items(): + if not isinstance(k, str): + raise TypeError("Dataset resource ID has to be a string, not: {k_type}".format(k_type=type(k))) + metadata_dict.update(cls._generate_metadata(container, d3m_types, v, selector + (k,))) + + # It is unlikely that metadata is equal across dataset resources, so we do not try to compact metadata here. + + return metadata_dict + + if isinstance(value, container.DataFrame): # type: ignore + if len(value.shape) != 2: + raise ValueError("Only two-dimensional DataFrames are supported, at {selector}.".format(selector=selector)) + + generated_metadata['semantic_types'] = ['https://metadata.datadrivendiscovery.org/types/Table'] + + generated_metadata['dimension'] = { + 'name': 'rows', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], + 'length': value.shape[0], + } + + metadata_dict = collections.OrderedDict([(selector, generated_metadata)]) + + # Reusing the variable for next dimension. + generated_metadata = { + 'dimension': { + 'name': 'columns', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], + 'length': value.shape[1], + }, + } + + selector_all_rows = selector + (ALL_ELEMENTS,) + metadata_dict[selector_all_rows] = generated_metadata + + for column_index, dtype in enumerate(value.dtypes): + column_metadata = {} + + # Only if a column name is a string. DataFrame can have a sequence/numbers for column names + # but those are generally automatically generated so we do not use them as column names here. + if isinstance(value.columns[column_index], str): + # We set the name first, so that recursive calls to "_generate_metadata" can potentially + # override it. "_generate_metadata" does not do it for now, but it could do it in the future. + # Generated names to not override names if they already exists in metadata, which is + # handled in the "_update_with_generated_metadata" method. + column_metadata['name'] = value.columns[column_index] + + selector_all_rows_column = selector_all_rows + (column_index,) + + # Values are objects. This could be something as simple as a Python string, or a whole other container value nested. + if dtype.kind == 'O': + metadata_column_dict_list: typing.List[typing.Dict[TupleSelector, dict]] = [] + for row_index, cell_value in enumerate(value.iloc[:, column_index]): + # We recurse with selector set to "()"so that it is easier to compare results for equality. + metadata_column_dict_list.append(cls._generate_metadata(container, d3m_types, cell_value, ())) + + if metadata_column_dict_list: + # Equality of "OrderedDict" also checks for the equality in order of fields. + if all(row_dict == metadata_column_dict_list[0] for row_dict in metadata_column_dict_list): + # All rows are equal, so we use the first row. + for row_selector, row_metadata in metadata_column_dict_list[0].items(): + # We recursed with selector set to "()" so we have to adapt the real selector now. + new_selector = selector_all_rows_column + row_selector + if new_selector == selector_all_rows_column: + row_metadata.update(column_metadata) + assert new_selector not in metadata_dict + metadata_dict[new_selector] = row_metadata + + else: + metadata_dict[selector_all_rows_column] = column_metadata + + for row_index, row_dict in enumerate(metadata_column_dict_list): + for row_selector, row_metadata in row_dict.items(): + # We recursed with selector set to "()" so we have to adapt the real selector now. + new_selector = selector + (row_index, column_index) + row_selector + assert new_selector not in metadata_dict + metadata_dict[new_selector] = row_metadata + + else: + metadata_dict[selector_all_rows_column] = column_metadata + + else: + # DataFrame is trying to be smart and returns sometimes Python types instead + # of numpy types when retrieving values from it. On the other hand, dtypes are + # generally numpy types. So there can be discrepancy between recorded structural + # type in metadata and what you get for some operations out of a DataFrame. + # See: https://github.com/pandas-dev/pandas/issues/20791 + # https://github.com/pandas-dev/pandas/issues/13468 + column_metadata['structural_type'] = dtype.type + metadata_dict[selector_all_rows_column] = column_metadata + + return metadata_dict + + if isinstance(value, container.ndarray): # type: ignore + if not value.shape: + raise ValueError("Zero-dimensional arrays are not supported, at {selector}.".format(selector=selector)) + + metadata_dict = collections.OrderedDict() + + for dimension_index, dimension_length in enumerate(value.shape): + generated_metadata['dimension'] = { + 'length': dimension_length, + } + + if len(value.shape) == 2: + if dimension_index == 0: + generated_metadata['semantic_types'] = ['https://metadata.datadrivendiscovery.org/types/Table'] + generated_metadata['dimension']['name'] = 'rows' + generated_metadata['dimension']['semantic_types'] = ['https://metadata.datadrivendiscovery.org/types/TabularRow'] + elif dimension_index == 1: + generated_metadata['dimension']['name'] = 'columns' + generated_metadata['dimension']['semantic_types'] = ['https://metadata.datadrivendiscovery.org/types/TabularColumn'] + + metadata_dict[selector + (ALL_ELEMENTS,) * dimension_index] = generated_metadata + + # Reusing the variable for next dimension. + generated_metadata = {} + + if value.dtype.kind == 'O': + metadata_cell_dict_list: typing.List[typing.Dict[TupleSelector, typing.Dict]] = [] + metadata_cell_indices: typing.List[typing.Tuple] = [] + + iterator = numpy.nditer(value, flags=['multi_index', 'refs_ok']) + while not iterator.finished: + # We recurse with selector set to "()"so that it is easier to compare results for equality. + metadata_cell_dict_list.append(cls._generate_metadata(container, d3m_types, iterator.value.item(), ())) + metadata_cell_indices.append(tuple(iterator.multi_index)) + iterator.iternext() + + if metadata_cell_dict_list: + # Equality of "OrderedDict" also checks for the equality in order of fields. + if all(cell_dict == metadata_cell_dict_list[0] for cell_dict in metadata_cell_dict_list): + selector_all_cells = selector + (ALL_ELEMENTS,) * len(value.shape) + + # All cells are equal, so we use the first cell. + for cell_selector, cell_metadata in metadata_cell_dict_list[0].items(): + # We recursed with selector set to "()" so we have to adapt the real selector now. + new_selector = selector_all_cells + cell_selector + assert new_selector not in metadata_dict + metadata_dict[new_selector] = cell_metadata + + else: + for cell_index, cell_dict in zip(metadata_cell_indices, metadata_cell_dict_list): + for cell_selector, cell_metadata in cell_dict.items(): + # We recursed with selector set to "()" so we have to adapt the real selector now. + new_selector = selector + cell_index + cell_selector + assert new_selector not in metadata_dict + metadata_dict[new_selector] = cell_metadata + + else: + metadata_dict[selector + (ALL_ELEMENTS,) * len(value.shape)] = {'structural_type': value.dtype.type} + + return metadata_dict + + # We went through all container types and none matched. + if is_root: + assert not isinstance(value, d3m_types.Container), type(value) # type: ignore + raise TypeError("Value is not of a container type, but '{type}'.".format(type=type(value))) + + # A special case for dicts, for which we traverse the structure. + if isinstance(value, dict): + generated_metadata['dimension'] = { + 'length': len(value), + } + + metadata_dict = collections.OrderedDict([(selector, generated_metadata)]) + + metadata_dict_list = [] + metadata_indices: typing.List[typing.Tuple] = [] + for k, v in value.items(): + if not isinstance(k, (str, int)): + raise TypeError("Dict key has to be a string or an integer, not: {k_type}".format(k_type=type(k))) + # We recurse with selector set to "()"so that it is easier to compare results for equality. + metadata_dict_list.append(cls._generate_metadata(container, d3m_types, v, ())) + metadata_indices.append(k) + + if metadata_dict_list: + # Equality of "OrderedDict" also checks for the equality in order of fields. + if all(element_dict == metadata_dict_list[0] for element_dict in metadata_dict_list): + selector_all_elements = selector + (ALL_ELEMENTS,) + + # All elements are equal, so we use the first element. + for element_selector, element_metadata in metadata_dict_list[0].items(): + # We recursed with selector set to "()" so we have to adapt the real selector now. + new_selector = selector_all_elements + element_selector + assert new_selector not in metadata_dict + metadata_dict[new_selector] = element_metadata + + else: + for element_index, element_dict in zip(metadata_indices, metadata_dict_list): + for element_selector, element_metadata in element_dict.items(): + # We recursed with selector set to "()" so we have to adapt the real selector now. + new_selector = selector + (element_index,) + element_selector + assert new_selector not in metadata_dict + metadata_dict[new_selector] = element_metadata + + return metadata_dict + + # We checked for all simple data types, container types, and a dict. Nothing else is left. + assert not isinstance(value, d3m_types.Data) # type: ignore + raise TypeError("Value is not of a data type, but '{type}'.".format(type=type(value))) + + def _update_with_generated_metadata(self, generated_metadata_dict: typing.Dict[TupleSelector, dict]) -> None: + """ + This method works well really just with generated metadata. It has some assumptions what ``generated_metadata_dict`` + contains and how to merge things (merge semantic types, do not override names, clear unset structural types). + """ + + # We first preprocess given updates. We have to specially merge some fields and respect overrides + # on direct elements. + updates: typing.List[typing.Tuple[TupleSelector, dict]] = [] + for selector, metadata in generated_metadata_dict.items(): + existing_metadata, metadata_exceptions = self.query_with_exceptions(selector, remove_no_value=False) + + # If structural type was not generated now, but it exists before, we have to remove it. In "_merge_generated_metadata" we make sure + # it is not re-set back, and here we add an update at the beginning which removes it. The reason why it is at the beginning is that + # it could be that the reason why there is no "structural_type" in "metadata" is because it was moved to metadata for corresponding + # "ALL_ELEMENTS". So, the order is then: we remove it through direct selector, then maye "ALL_ELEMENTS" selector re-sets it back, + # and merged metadata does not re-set it, because we made sure about that in "_merge_generated_metadata". + if 'structural_type' not in metadata and 'structural_type' in existing_metadata: + updates.insert(0, (selector, {'structural_type': NO_VALUE})) + + metadata = self._merge_generated_metadata(existing_metadata, metadata) + + updates.append((selector, metadata)) + + for exception_selector, exception_metadata in metadata_exceptions.items(): + diff_metadata = self._diff_generated_metadata(exception_metadata, metadata) + + if diff_metadata: + updates.append((exception_selector, diff_metadata)) + + for selector, metadata in updates: + metadata = utils.make_immutable_copy(metadata) + + if not isinstance(metadata, frozendict.FrozenOrderedDict): + raise exceptions.InvalidArgumentTypeError("Metadata should be a dict.") + + self._current_metadata = self._update(selector, self._current_metadata, metadata) + + @deprecate.function(message="create a DataMetadata instance explicitly instead") + @deprecate.arguments('source', 'timestamp', 'check', message="argument ignored") + def clear(self: D, metadata: typing.Dict[str, typing.Any] = None, *, for_value: typing.Any = None, + generate_metadata: bool = True, check: bool = True, source: typing.Any = None, timestamp: datetime.datetime = None) -> D: + """ + DEPRECATED: create a DataMetadata instance explicitly instead. + + Creates and returns a new (clear) metadata object. + + Parameters + ---------- + metadata: + Optional new initial metadata for the top-level of the value. + for_value: + Optional value to automatically generate metadata for. + generate_metadata: bool + Automatically generate metadata from ``for_value`` and update the metadata accordingly. + check: + DEPRECATED: argument ignored. + source: + DEPRECATED: argument ignored. + timestamp: + DEPRECATED: argument ignored. + + Returns + ------- + New metadata object. + """ + + # We call wrapped parent method directly so that there are no double warnings. + new_metadata = super().clear.__wrapped__(self, metadata=metadata) + + if for_value is not None and generate_metadata: + new_metadata._generate(for_value) + + return new_metadata + + # TODO: Check if structural types match the real type of a value. + def check(self, value: typing.Any) -> None: + """ + Checks that all metadata has a corresponding data in ``value`` and that every + metadata value is valid according to schema. If not it raises an exception. + + Parameters + ---------- + value: + Value to check against. + """ + + self._check_value(self._current_metadata, value, []) + self._check_metadata([]) + + @classmethod + def _check_value(cls, metadata_entry: MetadataEntry, value: typing.Any, path: typing.List[SimpleSelectorSegment]) -> None: + if metadata_entry.all_elements is not None: + try: + # We should be able to at least compute length at this dimension + # (to signal that it is a sequence or a map). + len(value) + except Exception as error: + raise ValueError("ALL_ELEMENTS set but dimension missing at {path}.".format(path=path)) from error + + if isinstance(value, numpy.matrix): + # One cannot iterate over a matrix segment by segment. You always get back + # a matrix (2D structure) and not an array of rows or columns. By converting + # it to an array such iteration segment by segment works. + value = numpy.array(value) + + if isinstance(value, pandas.DataFrame): + for element_segment, element_metadata_entry in metadata_entry.elements.items(): + try: + # Fetch a row as a list. + element_value = [value.iloc[element_segment, k] for k in range(len(value.columns))] + except Exception as error: + raise ValueError("'{element_segment}' at {path} cannot be resolved.".format(element_segment=element_segment, path=path)) from error + + cls._check_value(element_metadata_entry, element_value, path + [element_segment]) + + else: + for element_segment, element_metadata_entry in metadata_entry.elements.items(): + try: + element_value = value[element_segment] + except Exception as error: + raise ValueError("'{element_segment}' at {path} cannot be resolved.".format(element_segment=element_segment, path=path)) from error + + cls._check_value(element_metadata_entry, element_value, path + [element_segment]) + + def _check_metadata(self, selector: ListSelector) -> None: + metadata = self.query(selector) + + if selector: + DATA_SCHEMA_VALIDATOR.validate(metadata) + else: + CONTAINER_SCHEMA_VALIDATOR.validate(metadata) + + for element in self.get_elements(selector): + self._check_metadata(selector + [element]) + + @classmethod + @deprecate.arguments('for_value', message="argument ignored") + def check_selector(cls, selector: Selector, for_value: typing.Any = None) -> None: + """ + Checks that a given ``selector`` is a valid selector. If ``selector`` is invalid it raises an exception. + + It checks that it is a tuple or a list and currently we require that all segments of a selector + are strings, integers, or a special value ``ALL_ELEMENTS``. + + Parameters + ---------- + selector: + Selector to check. + for_value: + DEPRECATED: argument ignored. + """ + + super().check_selector(selector=selector) + + def get_column_index_from_column_name(self, column_name: str, *, at: Selector = ()) -> int: + column_indices = [] + + for column_index in range(self.query_field(list(at) + [ALL_ELEMENTS], 'dimension')['length']): + try: + if self.query_field(list(at) + [ALL_ELEMENTS, column_index], 'name') == column_name: + column_indices.append(column_index) + except KeyError: + pass + + if len(column_indices) > 1: + raise KeyError( + "Cannot resolve column name '{column_name}' at '{at}' because of duplicate column names".format( + column_name=column_name, + at=at, + ), + ) + elif column_indices: + return column_indices[0] + else: + raise KeyError( + "Cannot resolve column name '{column_name}' at '{at}' because column could not be found.".format( + column_name=column_name, + at=at, + ), + ) + + def select_columns(self: D, columns: typing.Sequence[SimpleSelectorSegment], *, allow_empty_columns: bool = False) -> D: + """ + Returns a new metadata object with metadata only for given ``columns``. + Moreover, columns are renumbered based on the position in ``columns`` list. + Top-level metadata stays unchanged, except for updating the length of the columns dimension to + the number of columns. + + So if the ``columns`` is ``[3, 6, 5]`` then output metadata will have three columns, ``[0, 1, 2]``, + mapping metadata for columns ``3`` to ``0``, ``6`` to ``1`` and ``5`` to ``2``. + + This allows also duplication of columns. + """ + + if not columns and not allow_empty_columns: + raise exceptions.InvalidArgumentValueError("No columns selected.") + + # This makes a copy so that we can modify metadata in-place. + outputs_metadata = self.update( + (ALL_ELEMENTS,), + { + 'dimension': { + 'length': len(columns), + }, + }, + ) + + for element_metadata_entry in itertools.chain( + [outputs_metadata._current_metadata.all_elements], + outputs_metadata._current_metadata.elements.values(), + ): + if element_metadata_entry is None: + continue + + elements = element_metadata_entry.elements + new_elements_evolver = utils.EMPTY_PMAP.evolver() + for i, column_index in enumerate(columns): + if column_index in elements: + # If "column_index" is really numeric, we re-enumerate it. + if isinstance(column_index, int): + new_elements_evolver.set(i, elements[column_index]) + else: + new_elements_evolver.set(column_index, elements[column_index]) + element_metadata_entry.elements = new_elements_evolver.persistent() + element_metadata_entry.is_elements_empty = not element_metadata_entry.elements + element_metadata_entry.update_is_empty() + + # TODO: Update boundary columns and "confidence for" references. + + return outputs_metadata + + def remove_columns(self: D, column_indices: typing.Sequence[int]) -> D: + """ + Removes columns from metadata. + + It moves to the left metadata for columns after removed columns. + If you do not want that, use ``remove_column``. + + It throws an exception if no columns would be left after removing columns. + """ + + columns = list(range(self.query_field((ALL_ELEMENTS,), 'dimension')['length'])) + + if not columns: + raise ValueError("No columns to remove.") + + for column_index in column_indices: + columns.remove(column_index) + + if not columns: + raise ValueError("Removing columns would have removed the last column.") + + # TODO: Update boundary columns and "confidence for" references. + + return self.select_columns(columns) + + def append_columns(self: D, right: D, *, use_right_metadata: bool = False) -> D: + """ + Appends metadata for all columns from ``right`` to the right of this metadata. + + Top-level metadata of ``right`` is ignored, not merged, except if ``use_right_metadata`` + is set, in which case top-level metadata of this metadata is ignored and one from ``right`` is + used instead. + """ + + left_length = self.query_field((ALL_ELEMENTS,), 'dimension')['length'] + right_length = right.query_field((ALL_ELEMENTS,), 'dimension')['length'] + + if not use_right_metadata: + outputs_metadata = self + + for column_index in range(right_length): + # To go over "ALL_ELEMENTS" and all rows. + for element in right.get_elements(()): + outputs_metadata = right.copy_to(outputs_metadata, [element, ALL_ELEMENTS], [element, left_length + column_index], ignore_all_elements=True) + outputs_metadata = right.copy_to(outputs_metadata, [element, column_index], [element, left_length + column_index], ignore_all_elements=True) + + else: + # This makes a copy so that we can modify metadata in-place. + outputs_metadata = right.update( + (ALL_ELEMENTS,), + {}, + ) + + # Move columns and make space for left metadata to be prepended. + # We iterate over a list so that we can change dict while iterating. + for element_metadata_entry in itertools.chain( + [outputs_metadata._current_metadata.all_elements], + outputs_metadata._current_metadata.elements.values(), + ): + if element_metadata_entry is None: + continue + + new_elements_evolver = element_metadata_entry.elements.evolver() + for element, metadata in element_metadata_entry.elements.items(reverse=True): + new_elements_evolver.remove(element) + new_elements_evolver.set(element + left_length, metadata) + element_metadata_entry.elements = new_elements_evolver.persistent() + element_metadata_entry.is_elements_empty = not element_metadata_entry.elements + element_metadata_entry.update_is_empty() + + for column_index in range(left_length): + # To go over "ALL_ELEMENTS" and all rows. + for element in right.get_elements(()): + outputs_metadata = self.copy_to(outputs_metadata, [element, ALL_ELEMENTS], [element, column_index], ignore_all_elements=True) + outputs_metadata = self.copy_to(outputs_metadata, [element, column_index], [element, column_index], ignore_all_elements=True) + + outputs_metadata = outputs_metadata.update((ALL_ELEMENTS,), {'dimension': {'length': left_length + right_length}}) + + # TODO: Update boundary columns and "confidence for" references. + + return outputs_metadata + + def insert_columns(self: D, columns: D, at_column_index: int) -> D: + """ + Inserts metadata for all columns from ``columns`` before ``at_column_index`` column in this metadata, + pushing all existing columns to the right. + + E.g., ``at_column_index == 0`` means inserting ``columns`` at the beginning of this metadata. + + Top-level metadata of ``columns`` is ignored. + """ + + columns_length = columns.query_field((ALL_ELEMENTS,), 'dimension')['length'] + + if at_column_index < 0: + raise exceptions.InvalidArgumentValueError("\"at_column_index\" is smaller than 0.") + if at_column_index > columns_length: + raise exceptions.InvalidArgumentValueError("\"at_column_index\" is larger than the range of existing columns.") + + if at_column_index == 0: + return columns.append_columns(self, use_right_metadata=True) + + if at_column_index == columns_length: + return self.append_columns(columns) + + # TODO: This could probably be optimized without all the slicing and joining. + + before = self.select_columns(list(range(0, at_column_index))) + after = self.select_columns(list(range(at_column_index, columns_length))) + + # TODO: Update boundary columns and "confidence for" references. + + return before.append_columns(columns).append_columns(after) + + def _replace_column(self: D, column_index: int, columns: 'DataMetadata', columns_column_index: int) -> D: + outputs_metadata = self.remove_column(column_index) + + # To go over "ALL_ELEMENTS" and all rows. + for element in columns.get_elements(()): + outputs_metadata = columns.copy_to(outputs_metadata, [element, ALL_ELEMENTS], [element, column_index], ignore_all_elements=True) + outputs_metadata = columns.copy_to(outputs_metadata, [element, columns_column_index], [element, column_index], ignore_all_elements=True) + + return outputs_metadata + + def replace_columns(self: D, columns: D, column_indices: typing.Sequence[int]) -> D: + """ + Replaces columns listed in ``column_indices`` with ``columns``, in order, in this metadata. + + ``column_indices`` and ``columns`` do not have to match in number of columns. Columns are first + replaced in order for matching indices and columns. If then there are more ``column_indices`` than + ``columns``, additional ``column_indices`` columns are removed. If there are more ``columns`` than + ``column_indices`` columns, then additional ``columns`` are inserted after the last replaced column. + + If ``column_indices`` is empty, then the behavior is equivalent to calling ``append_columns``. + + Top-level metadata of ``columns`` is ignored. + """ + + # TODO: This could probably be optimized without all the slicing and joining. + + if not column_indices: + return self.append_columns(columns) + + outputs = self + columns_length = columns.query_field((ALL_ELEMENTS,), 'dimension')['length'] + columns_to_remove = [] + i = 0 + + # This loop will run always at least once, so "column_index" will be set. + while i < len(column_indices): + column_index = column_indices[i] + + if i < columns_length: + outputs = outputs._replace_column(column_index, columns, i) + else: + # If there are more column indices than columns in "columns", we + # select additional columns for removal. + columns_to_remove.append(column_index) + + i += 1 + + # When there are less column indices than columns in "columns", we insert the rest after + # the last replaced column. + if i < columns_length: + columns = columns.select_columns(list(range(i, columns_length))) + # "column_index" points to the last place we inserted a column, so "+ 1" points after it. + outputs = outputs.insert_columns(columns, column_index + 1) + + # We remove columns at the end so that we do not break and column index used before. + # When removing columns, column indices shift. + if columns_to_remove: + outputs = outputs.remove_columns(columns_to_remove) + + # TODO: Update boundary columns and "confidence for" references. + + return outputs + + def _check_same_number_of_samples(self, metadata: 'DataMetadata') -> None: + if self.query_field((), 'dimension')['length'] != metadata.query_field((), 'dimension')['length']: + raise ValueError("Data does not match in the number of samples.") + + def get_index_columns(self, *, at: Selector = ()) -> typing.Sequence[int]: + """ + Returns column indices of the primary index columns. + + It makes sure ``d3mIndex`` is always first listed. + """ + + index_columns = self.list_columns_with_semantic_types(('https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'https://metadata.datadrivendiscovery.org/types/PrimaryMultiKey'), at=at) + + def d3m_index_first(index_column: int) -> int: + try: + if self.query_field((ALL_ELEMENTS, index_column), 'name') == 'd3mIndex': + return -1 + except KeyError: + pass + + return 0 + + return sorted(index_columns, key=d3m_index_first) + + def horizontal_concat(self: D, right: D, *, use_index: bool = True, remove_second_index: bool = True, use_right_metadata: bool = False) -> D: + """ + Similar to ``append_columns``, but it respects primary index columns, by default. + + It is required that both inputs have the same number of samples. + """ + + self._check_same_number_of_samples(right) + + left_indices = self.get_index_columns() + right_indices = right.get_index_columns() + + if left_indices and right_indices: + if use_index: + # TODO: Reorder metadata rows as well. + # We cannot really do this without data? + pass + + # Removing second primary key column. + if remove_second_index: + right = right.remove_columns(right_indices) + + # TODO: Update boundary columns and "confidence for" references. + + return self.append_columns(right, use_right_metadata=use_right_metadata) + + def set_table_metadata(self: D, *, at: Selector = ()) -> D: + at = list(at) + + outputs_metadata = self + + try: + dimension = self.query_field(at + [ALL_ELEMENTS], 'dimension') + except KeyError: + dimension = None + + # If input is at least 2D, then we set table metadata. + if dimension is not None: + metadata = outputs_metadata.query(at) + + semantic_types = list(metadata.get('semantic_types', [])) + if 'https://metadata.datadrivendiscovery.org/types/Table' not in semantic_types: + semantic_types.append('https://metadata.datadrivendiscovery.org/types/Table') + + dimension_semantic_types = list(metadata.get('dimension', {}).get('semantic_types', [])) + if 'https://metadata.datadrivendiscovery.org/types/TabularRow' not in dimension_semantic_types: + dimension_semantic_types.append('https://metadata.datadrivendiscovery.org/types/TabularRow') + dimension_semantic_types = [semantic_type for semantic_type in dimension_semantic_types if semantic_type not in {'https://metadata.datadrivendiscovery.org/types/TabularColumn'}] + + outputs_metadata = outputs_metadata.update(at, { + 'dimension': { + 'name': 'rows', + 'semantic_types': dimension_semantic_types, + }, + 'semantic_types': semantic_types, + }) + + metadata = outputs_metadata.query(at + [ALL_ELEMENTS]) + + dimension_semantic_types = list(metadata.get('dimension', {}).get('semantic_types', [])) + if 'https://metadata.datadrivendiscovery.org/types/TabularColumn' not in dimension_semantic_types: + dimension_semantic_types.append('https://metadata.datadrivendiscovery.org/types/TabularColumn') + dimension_semantic_types = [semantic_type for semantic_type in dimension_semantic_types if semantic_type not in {'https://metadata.datadrivendiscovery.org/types/TabularRow'}] + + new_metadata: typing.Dict = { + 'dimension': { + 'name': 'columns', + 'semantic_types': dimension_semantic_types, + }, + } + + if 'semantic_types' in metadata: + new_metadata['semantic_types'] = [semantic_type for semantic_type in metadata['semantic_types'] if semantic_type not in {'https://metadata.datadrivendiscovery.org/types/Table'}] + if not new_metadata['semantic_types']: + new_metadata['semantic_types'] = NO_VALUE + + outputs_metadata = outputs_metadata.update(at + [ALL_ELEMENTS], new_metadata) + + selector: ListSelector = at + [ALL_ELEMENTS, ALL_ELEMENTS] + while True: + try: + dimension = self.query_field(selector, 'dimension') + except KeyError: + break + + metadata = outputs_metadata.query(selector) + + new_metadata = {} + + if 'semantic_types' in metadata: + new_metadata['semantic_types'] = [semantic_type for semantic_type in metadata['semantic_types'] if semantic_type not in {'https://metadata.datadrivendiscovery.org/types/Table'}] + if not new_metadata['semantic_types']: + new_metadata['semantic_types'] = NO_VALUE + + if 'semantic_types' in dimension: + new_metadata['dimension'] = {} + + dimension_semantic_types = list(dimension['semantic_types']) + if 'https://metadata.datadrivendiscovery.org/types/TabularColumn' in dimension_semantic_types and dimension.get('name', None) == 'columns': + new_metadata['dimension']['name'] = NO_VALUE + if 'https://metadata.datadrivendiscovery.org/types/TabularRow' in dimension_semantic_types and dimension.get('name', None) == 'rows': + new_metadata['dimension']['name'] = NO_VALUE + + dimension_semantic_types = [ + semantic_type for semantic_type in dimension_semantic_types + if semantic_type not in {'https://metadata.datadrivendiscovery.org/types/TabularColumn', 'https://metadata.datadrivendiscovery.org/types/TabularRow'} + ] + new_metadata['dimension']['semantic_types'] = dimension_semantic_types + if not new_metadata['dimension']['semantic_types']: + new_metadata['dimension']['semantic_types'] = NO_VALUE + + if new_metadata: + outputs_metadata = outputs_metadata.update(selector, new_metadata) + + selector.append(ALL_ELEMENTS) + + return outputs_metadata + + def get_column_references_by_column_index(self, current_resource_id: str, *, at: Selector = ()) -> typing.Dict[str, typing.Dict[ColumnReference, typing.List[ColumnReference]]]: + references: typing.Dict[str, typing.Dict[ColumnReference, typing.List[ColumnReference]]] = { + 'confidence_for': {}, + 'rank_for': {}, + 'boundary_for': {}, + 'foreign_key': {}, + } + + for column_index in range(self.query_field(list(at) + [ALL_ELEMENTS], 'dimension')['length']): + column_metadata = self.query_column(column_index, at=at) + + column_reference = ColumnReference(current_resource_id, column_index) + + if 'confidence_for' in column_metadata and 'column_indices' in column_metadata['confidence_for']: + reference_resource_id = column_metadata['confidence_for'].get('resource_id', current_resource_id) + + references['confidence_for'][column_reference] = [ + ColumnReference(reference_resource_id, reference_column_index) + for reference_column_index in column_metadata['confidence_for']['column_indices'] + ] + + if 'rank_for' in column_metadata and 'column_indices' in column_metadata['rank_for']: + reference_resource_id = column_metadata['rank_for'].get('resource_id', current_resource_id) + + references['rank_for'][column_reference] = [ + ColumnReference(reference_resource_id, reference_column_index) + for reference_column_index in column_metadata['rank_for']['column_indices'] + ] + + if 'boundary_for' in column_metadata and 'column_index' in column_metadata['boundary_for']: + reference_resource_id = column_metadata['boundary_for'].get('resource_id', current_resource_id) + + references['boundary_for'][column_reference] = [ + ColumnReference(reference_resource_id, column_metadata['boundary_for']['column_index']), + ] + + if 'foreign_key' in column_metadata and column_metadata['foreign_key']['type'] == 'COLUMN' and 'column_index' in column_metadata['foreign_key']: + reference_resource_id = column_metadata['foreign_key']['resource_id'] + + references['foreign_key'][column_reference] = [ + ColumnReference(reference_resource_id, column_metadata['foreign_key']['column_index']), + ] + + return references + + +class PrimitiveMetadata(Metadata): + """ + A class for metadata for primitives. + + It checks all updates against primitive schema. Note that as such empty (just created) metadata object + does not validate against the schema. If an instance is set on a primitive class, primitive's metaclass + logic will automatically link metadata object with the primitive class and generate required metadata. + """ + + def __init__(self, metadata: typing.Dict[str, typing.Any] = None) -> None: + super().__init__(metadata=metadata) + + # We do not do validation here because provided metadata on its own is + # probably not sufficient for validation to pass. Validation happens + # inside "contribute_to_class" method instead. + + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import base + + self.primitive: typing.Type[base.PrimitiveBase] = None + + # Not adhering to Liskov substitution principle: we do not have "selector" argument. + @deprecate.arguments('source', 'timestamp', message="argument ignored") + def update(self: P, metadata: typing.Dict[str, typing.Any], *, source: typing.Any = None, timestamp: datetime.datetime = None) -> P: # type: ignore + new_metadata = super().update(selector=(), metadata=metadata) + + self._validate(new_metadata.query()) + + return new_metadata + + @deprecate.function(message="create a PrimitiveMetadata instance explicitly instead") + @deprecate.arguments('source', 'timestamp', message="argument ignored") + def clear(self: P, metadata: typing.Dict[str, typing.Any] = None, *, source: typing.Any = None, timestamp: datetime.datetime = None) -> P: + return super().clear(metadata=metadata) + + # Not adhering to Liskov substitution principle: we do not have "selector" argument. + def query(self) -> frozendict.FrozenOrderedDict: # type: ignore + return super().query(selector=()) + + # "primitive" should be of PrimitiveBase here, but we do not want to introduce a + # cyclic dependency. We validate the type at runtime in the method. + def contribute_to_class(self: P, primitive: typing.Any) -> None: + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import base + + if self.primitive is not None: + raise exceptions.InvalidStateError("Primitive is already set to '{primitive}'.".format(primitive=self.primitive)) + + if not issubclass(primitive, base.PrimitiveBase): + raise exceptions.InvalidArgumentTypeError("Primitive argument is not a subclass of 'PrimitiveBase' class.") + + self.primitive = primitive + + self._generate_and_update() + + @classmethod + def _validate_contact_information(cls, metadata: typing.Dict) -> None: + # See https://gitlab.com/datadrivendiscovery/d3m/issues/178 for motivation for this check. + + # If it is a locally registered/used primitive, we do not validate contact information. + if 'installation' not in metadata: + return + + if 'source' not in metadata: + logger.warning( + "%(python_path)s: No \"source\" field in the primitive metadata. Metadata should contain contact information and bug reporting URI.", + { + 'python_path': metadata['python_path'], + }, + ) + return + + if not metadata['source'].get('contact', None): + logger.warning( + "%(python_path)s: Contact information such as the email address of the author " + "(e.g., \"mailto:author@example.com\") should be specified in primitive metadata in its \"source.contact\" field.", + { + 'python_path': metadata['python_path'], + }, + ) + + # If the list is empty, it is also false. + if not metadata['source'].get('uris', None): + logger.warning( + "%(python_path)s: A bug reporting URI should be specified in primitive metadata in its \"source.uris\" field.", + { + 'python_path': metadata['python_path'], + }, + ) + + # Make sure a primitive provides a description (through docstring). Because we use special metaclass + # which inherits description from a base class, we have to check the description itself. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/167 + @classmethod + def _validate_description(cls, metadata: typing.Dict) -> None: + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import base + + if 'description' not in metadata or not metadata['description'] or metadata['description'].startswith(base.DEFAULT_DESCRIPTION): + logger.warning( + "%(python_path)s: Primitive is not providing a description through its docstring.", + { + 'python_path': metadata['python_path'], + }, + ) + + # Checks that the primitive's Python path complies with namespace requirements. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/3 + @classmethod + def _validate_namespace_compliance(cls, python_path: str, primitive_family: typing.Union[PrimitiveFamily, str]) -> None: # type: ignore + segments = python_path.split('.') + + if len(segments) != 5: + logger.warning( + "%(python_path)s: Primitive's Python path does not adhere to d3m.primitives namespace specification. " + "Reason: must have 5 segments.", + { + 'python_path': python_path, + }, + ) + else: + if segments[0] != 'd3m' or segments[1] != 'primitives': + logger.warning( + "%(python_path)s: Primitive's Python path does not adhere to d3m.primitives namespace specification. " + "Reason: must start with \"d3m.primitives\".", + { + 'python_path': python_path, + }, + ) + + family = segments[2] + name = segments[3] + kind = segments[4] + + # "primitive_family" could also already be a string. + if isinstance(primitive_family, str): + primitive_family_name = primitive_family + else: + primitive_family_name = primitive_family.name + + if family != primitive_family_name.lower(): # type: ignore + logger.warning( + "%(python_path)s: Primitive's Python path does not adhere to d3m.primitives namespace specification. " + "Reason: primitive family segment must match primitive's primitive family.", + { + 'python_path': python_path, + }, + ) + + if name not in primitive_names.PRIMITIVE_NAMES: + logger.warning( + "%(python_path)s: Primitive's Python path does not adhere to d3m.primitives namespace specification. " + "Reason: must have a known primitive name segment.", + { + 'python_path': python_path, + }, + ) + + if not kind[0].isupper(): + logger.warning( + "%(python_path)s: Primitive's Python path does not adhere to d3m.primitives namespace specification. " + "Reason: primitive kind segment must start with upper case.", + { + 'python_path': python_path, + }, + ) + + @classmethod + def _validate(cls, metadata: typing.Dict) -> None: + PRIMITIVE_SCHEMA_VALIDATOR.validate(metadata) + + cls._validate_installation(metadata) + cls._validate_volumes(metadata) + cls._validate_docker_containers(metadata) + cls._validate_hyperparams_to_tune(metadata) + cls._validate_optional_constructor_arguments(metadata) + cls._validate_namespace_compliance(metadata['python_path'], metadata['primitive_family']) + cls._validate_contact_information(metadata) + cls._validate_description(metadata) + + def _generate_and_update(self) -> None: + generated_metadata = self._generate_metadata_for_primitive() + + self._update_in_place((), generated_metadata, self._current_metadata) + + self._validate(self.query()) + + @classmethod + def _validate_installation(cls, metadata: typing.Dict) -> None: + for entry in metadata.get('installation', []): + # We can check simply equality because metadata enumerations are equal to strings as well, + # and "entry['type']" can be both a string or an enumeration instance. + if entry['type'] != PrimitiveInstallationType.PIP: + continue + + if 'package' in entry: + if '/' in entry['package']: + raise exceptions.InvalidMetadataError("Invalid package name '{package_name}'. If you want to use an URI pointing to a package, use 'package_uri' instead.".format( + package_name=entry['package'], + )) + + continue + + if 'package_uri' not in entry: + continue + + if entry['package_uri'].startswith('git+git@'): + # "git+git@git.myproject.org:MyProject" format cannot be parsed with urlparse. + raise exceptions.InvalidMetadataError("Only git+http and git+https URI schemes are allowed.") + + parsed_uri = url_parse.urlparse(entry['package_uri']) + + # It is not a git pip URI. For now we then do not validate it. + if not parsed_uri.scheme.startswith('git'): + continue + + if parsed_uri.scheme not in ['git+http', 'git+https']: + raise exceptions.InvalidMetadataError("Only git+http and git+https URI schemes are allowed.") + + if '@' not in parsed_uri.path: + raise exceptions.InvalidMetadataError("Package URI does not include a commit hash: {package_uri}".format(package_uri=entry['package_uri'])) + + path, commit_hash = parsed_uri.path.rsplit('@', 1) + + if not COMMIT_HASH_REGEX.match(commit_hash): + raise exceptions.InvalidMetadataError("Package URI does not include a commit hash: {package_uri}".format(package_uri=entry['package_uri'])) + + if not parsed_uri.fragment: + raise exceptions.InvalidMetadataError("Package URI does not include a '#egg=package_name' URI suffix.") + + parsed_fragment = url_parse.parse_qs(parsed_uri.fragment, strict_parsing=True) + + if 'egg' not in parsed_fragment: + raise exceptions.InvalidMetadataError("Package URI does not include a '#egg=package_name' URI suffix.") + + @classmethod + def _validate_optional_constructor_arguments(cls, metadata: typing.Dict) -> None: + installation = metadata.get('installation', []) + + containers = [entry for entry in installation if entry.get('type', None) == PrimitiveInstallationType.DOCKER] + if containers and 'docker_containers' not in metadata['primitive_code'].get('instance_methods', {})['__init__']['arguments']: + raise exceptions.InvalidPrimitiveCodeError("Primitive defines a Docker container dependency but does not accept 'docker_containers' argument to the constructor.") + + volumes = cls._get_volumes(metadata) + if volumes and 'volumes' not in metadata['primitive_code'].get('instance_methods', {})['__init__']['arguments']: + raise exceptions.InvalidPrimitiveCodeError("Primitive defines a volume dependency but does not accept 'volumes' argument to the constructor.") + + @classmethod + def _validate_hyperparams_to_tune(cls, metadata: typing.Dict) -> None: + hyperparams = metadata['primitive_code'].get('hyperparams', {}) + + for name in metadata.get('hyperparams_to_tune', []): + if name not in hyperparams: + raise exceptions.InvalidMetadataError("Hyper-parameter in 'hyperparams_to_tune' metadata does not exist: {name}".format(name=name)) + + def _generate_metadata_for_primitive(self) -> typing.Dict[str, typing.Any]: + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import base + + type_arguments = self._get_type_arguments() + class_attributes = self._get_class_attributes() + hyperparams_class = typing.cast(typing.Type[hyperparams_module.Hyperparams], type_arguments[base.Hyperparams]) + arguments, instance_methods = self._get_arguments_and_methods(hyperparams_class, type_arguments) + self._validate_constructor(instance_methods) + self._validate_multi_produce(instance_methods) + self._validate_fit_multi_produce(instance_methods) + hyperparams = self._get_hyperparams(hyperparams_class) + class_methods = self._get_class_methods(type_arguments) + instance_attributes = self._get_instance_attributes() + params = self._get_params(type_arguments) + + # Sanity check. + hyperparams_keys = set(hyperparams.keys()) + # We can check simply equality because metadata enumerations are equal to strings as well, + # and "argument['kind']" can be both a string or an enumeration instance. + non_hyperparameter_arguments_keys = {name for name, argument in arguments.items() if argument['kind'] != PrimitiveArgumentKind.HYPERPARAMETER} + overlapping_keys = hyperparams_keys & non_hyperparameter_arguments_keys + if len(overlapping_keys): + raise exceptions.InvalidPrimitiveCodeError("Hyper-paramater names are overlapping with non-hyperparameter argument names: {overlapping_keys}".format(overlapping_keys=overlapping_keys)) + + primitive_code = { + # We have to convert parameters to their names because JSON schema supports only strings for keys. + 'class_type_arguments': {parameter.__name__: argument for parameter, argument in type_arguments.items()}, + 'interfaces_version': d3m.__version__, + 'interfaces': self._get_interfaces(), + 'hyperparams': hyperparams, + 'arguments': arguments, + 'class_methods': class_methods, + 'instance_methods': instance_methods, + 'class_attributes': class_attributes, + 'instance_attributes': instance_attributes, + } + + if params is not None: + primitive_code['params'] = params + + result = { + 'schema': PRIMITIVE_SCHEMA_VERSION, + 'original_python_path': '{module}.{class_name}'.format( + module=self.primitive.__module__, + class_name=self.primitive.__name__, + ), + 'primitive_code': primitive_code, + 'structural_type': self.primitive, + } + + description = inspect.cleandoc(getattr(self.primitive, '__doc__', None) or '') or None + if description is not None: + result['description'] = description + + digest = self._compute_primitive_digest() + if digest is not None: + result['digest'] = digest + + return result + + def _compute_primitive_digest(self) -> typing.Optional[str]: + primitive_metadata = self.query() + + # We use installation metadata for digest because it uniquely identifies the content of the primitive. + # TODO: Some primitives install extra code/data from their setup.py during installation. Could we capture that with digest as well? + installation = primitive_metadata.get('installation', None) + + if not installation: + return None + + # We use "to_json_structure" here and not "to_reversible_json_structure" + # because pickled values might not be deterministic. + to_digest = utils.to_json_structure({ + # We include primitive ID as well, so that different primitives + # from the same package do not have the same digest. + 'id': primitive_metadata['id'], + 'installation': installation, + }) + + return utils.compute_digest(to_digest) + + # Using typing.TypeVar in type signature does not really work, so we are using type instead. + # See: https://github.com/python/typing/issues/520 + def _get_type_arguments(self) -> typing.Dict[type, type]: + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import base + + # This call also catches if type parameter has been overridden with a new type variable. + # This means that we for free get to make sure type parameters from the base class stay + # as they are expected to be. It also fetches them recursively, so one cannot hide a + # type parameter (but can fix it to a fixed type instead of leaving it open for a + # subclass to choose it). + type_arguments = utils.get_type_arguments(self.primitive, unique_names=True) + + for parameter, argument in type_arguments.items(): + # Params type argument is optional and can be set to None. + if parameter == base.Params and issubclass(argument, type(None)): + continue + + if not utils.is_subclass(argument, parameter): + raise exceptions.InvalidPrimitiveCodeError("Type parameter '{name}' has type '{type}' and not an expected type: {expected}".format( + name=parameter.__name__, type=argument, expected=parameter.__bound__, # type: ignore + )) + + return type_arguments + + def _resolve_type(self, obj: type, type_arguments: typing.Dict[type, type]) -> type: + if obj in type_arguments: + return type_arguments[obj] + else: + return obj + + def _get_interfaces(self) -> typing.Tuple[str, ...]: + mro = [parent for parent in inspect.getmro(self.primitive) if parent.__module__.startswith('d3m.primitive_interfaces.')] + + interfaces: typing.List[str] = [] + for parent in mro: + interface = utils.get_full_name(parent) + # Remove package name. + interface = '.'.join(interface.split('.')[2:]) + if interface not in interfaces: + interfaces.append(interface) + + if not len(interfaces): + raise exceptions.InvalidPrimitiveCodeError("The primitive does not implement a standard interface.") + + return tuple(interfaces) + + # Using typing.TypeVar in type signature does not really work, so we are using type instead. + # See: https://github.com/python/typing/issues/520 + def _get_params(self, type_arguments: typing.Dict[type, type]) -> typing.Optional[typing.Dict[str, type]]: + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import base + + params = type_arguments.get(base.Params, type(None)) + + if issubclass(params, type(None)): + return None + + return params.__params_items__ # type: ignore + + def _get_hyperparams(self, hyperparams_class: 'typing.Type[hyperparams_module.Hyperparams]') -> typing.Dict[str, typing.Dict]: + # We check this here and not during hyper-parameter construction itself because + # we want to require this only once it is used with a primitive. Hyper-parameters + # might be used and constructed in other settings as well. + for hyperparameter_name, hyperparameter in hyperparams_class.configuration.items(): + if not set(hyperparameter.semantic_types) & HYPERPARAMETER_REQUIRED_SEMANTIC_TYPES: + raise exceptions.InvalidPrimitiveCodeError( + "Hyper-parameter '{hyperparameter_name}' does not contain any of required semantic types: {required}".format( + hyperparameter_name=hyperparameter_name, + required=sorted(HYPERPARAMETER_REQUIRED_SEMANTIC_TYPES), + ), + ) + + return hyperparams_class.to_simple_structure() + + def _get_class_attributes(self) -> typing.Dict[str, type]: + result = {} + + for attribute_name, attribute in inspect.getmembers(self.primitive): + if attribute_name.startswith('_'): + continue + + if utils.is_class_method_on_class(attribute) or utils.is_instance_method_on_class(attribute): + continue + + result[attribute_name] = type(attribute) + + result_keys = set(result.keys()) + expected_result_keys = set(EXPECTED_CLASS_ATTRIBUTES.keys()) + + missing = expected_result_keys - result_keys + if len(missing): + raise exceptions.InvalidPrimitiveCodeError("Not all expected public class attributes exist: {missing}".format(missing=missing)) + + extra = result_keys - expected_result_keys + if len(extra): + raise exceptions.InvalidPrimitiveCodeError("Additional unexpected public class attributes exist, consider making them private by prefixing them with '_': {extra}".format(extra=extra)) + + for attribute_name, attribute in result.items(): + if not utils.is_subclass(attribute, EXPECTED_CLASS_ATTRIBUTES[attribute_name]): + raise exceptions.InvalidPrimitiveCodeError("Class attribute '{attribute_name}' does not have an expected type.".format(attribute_name=attribute_name)) + + return result + + # Using typing.TypeVar in type signature does not really work, so we are using type instead. + # See: https://github.com/python/typing/issues/520 + def _get_arguments_and_methods( + self, hyperparams_class: 'typing.Type[hyperparams_module.Hyperparams]', type_arguments: typing.Dict[type, type], + ) -> typing.Tuple[typing.Dict[str, typing.Dict], typing.Dict[str, typing.Dict]]: + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import base + from d3m import types as types_module + + arguments: typing.Dict[str, typing.Dict] = {} + methods: typing.Dict[str, typing.Dict] = {} + + for method_name, method in inspect.getmembers(self.primitive): + if method_name.startswith('_') and method_name != '__init__': + continue + + if not utils.is_instance_method_on_class(method): + continue + + # To make get_type_hints find method's module while the primitive's + # module is still being defined (and this method was indirectly called + # from primitive's metaclass). + method.im_class = self.primitive + + type_hints = utils.get_type_hints(method) + + if not type_hints: + raise exceptions.InvalidPrimitiveCodeError("Cannot get types for method '{method_name}'.".format(method_name=method_name)) + + if 'return' not in type_hints: + raise exceptions.InvalidPrimitiveCodeError("Method '{method_name}' is missing a type for the return value.".format(method_name=method_name)) + + if method_name.startswith('produce_') or method_name == 'produce': + method_kind = PrimitiveMethodKind.PRODUCE + + if getattr(method, '__singleton__', False): + singleton_produce_method = True + else: + singleton_produce_method = False + + method_inputs_across_samples = getattr(method, '__inputs_across_samples__', ()) + elif method_name.startswith('produce'): + raise exceptions.InvalidPrimitiveCodeError("Produce method should start with 'produce_' and not be '{method_name}'.".format(method_name=method_name)) + else: + method_kind = PrimitiveMethodKind.OTHER + + singleton_produce_method = None + method_inputs_across_samples = None + + if hasattr(method, '__singleton__'): + raise exceptions.InvalidPrimitiveCodeError("Only produce methods can be set as singleton or not: {method_name}.".format(method_name=method_name)) + if hasattr(method, '__inputs_across_samples__'): + raise exceptions.InvalidPrimitiveCodeError("Only arguments of produce methods can be set to compute accross samples or not: {method_name}.".format(method_name=method_name)) + + method_arguments = [] + + # We skip the first argument (self). + for argument_name, argument in list(inspect.signature(method).parameters.items())[1:]: + if argument.kind != inspect.Parameter.KEYWORD_ONLY: + raise exceptions.InvalidPrimitiveCodeError("Method '{method_name}' has a non-keyword argument '{argument_name}'.".format(method_name=method_name, argument_name=argument_name)) + + has_default = argument.default is not inspect.Parameter.empty + + if argument_name.startswith('_'): + if not has_default: + raise exceptions.InvalidPrimitiveCodeError("Method '{method_name}' has a non-optional private argument '{argument_name}'.".format( + method_name=method_name, argument_name=argument_name, + )) + + continue + + if not ARGUMENT_NAME_REGEX.match(argument_name): + raise exceptions.InvalidPrimitiveCodeError("Method '{method_name}' has an argument with an invalid name '{argument_name}'.".format( + method_name=method_name, argument_name=argument_name + )) + + if argument_name not in type_hints: + raise exceptions.InvalidPrimitiveCodeError("Method '{method_name}' is missing a type for argument '{argument_name}'.".format(method_name=method_name, argument_name=argument_name)) + + argument_type = self._resolve_type(type_hints[argument_name], type_arguments) + + standard_argument_description = typing.cast( + typing.Dict, + STANDARD_RUNTIME_ARGUMENTS.get(argument_name, None) or STANDARD_PIPELINE_ARGUMENTS.get(argument_name, None), + ) + if standard_argument_description is not None: + try: + expected_type = self._get_argument_type(standard_argument_description, type_arguments) + except KeyError: + raise exceptions.InvalidPrimitiveCodeError( + "Method '{method_name}' has an argument '{argument_name}' for which an expected type cannot be determined. Is a type parameter missing?".format( + method_name=method_name, argument_name=argument_name, + ) + ) + + # Types have to match here exactly. This is what class type arguments are for. + if argument_type != expected_type: + raise exceptions.InvalidPrimitiveCodeError( + "Method '{method_name}' has an argument '{argument_name}' with type '{argument_type}' and not an expected type: {expected_type}".format( + method_name=method_name, argument_name=argument_name, + argument_type=argument_type, expected_type=expected_type, + ) + ) + + if 'default' in standard_argument_description: + if not has_default: + raise exceptions.InvalidPrimitiveCodeError( + "Method '{method_name}' has an argument '{argument_name}' which does not have a default value, but it should.".format( + method_name=method_name, argument_name=argument_name, + ) + ) + + if argument.default != standard_argument_description['default']: + raise exceptions.InvalidPrimitiveCodeError( + "Method '{method_name}' has an argument '{argument_name}' with a different default value: {argument_default} != {expected_default}.".format( + method_name=method_name, argument_name=argument_name, + argument_default=argument.default, expected_default=standard_argument_description['default'], + ) + ) + + else: + if has_default: + raise exceptions.InvalidPrimitiveCodeError("Method '{method_name}' has an argument '{argument_name}' which has a default value, but it should not.".format( + method_name=method_name, argument_name=argument_name, + )) + + if argument_name in STANDARD_RUNTIME_ARGUMENTS: + argument_kind = PrimitiveArgumentKind.RUNTIME + else: + assert argument_name in STANDARD_PIPELINE_ARGUMENTS, "argument_name not in STANDARD_PIPELINE_ARGUMENTS" + argument_kind = PrimitiveArgumentKind.PIPELINE + + # Constructor cannot have additional non-private custom arguments. + elif method_name == '__init__': + raise exceptions.InvalidPrimitiveCodeError( + "Constructor cannot have non-private custom arguments, but it has an argument '{argument_name}'.".format( + argument_name=argument_name, + ) + ) + + elif argument_name in hyperparams_class.configuration: + # Types have to match here exactly. + if argument_type != hyperparams_class.configuration[argument_name].structural_type: + raise exceptions.InvalidPrimitiveCodeError("Method '{method_name}' has an argument '{argument_name}' overriding a hyper-parameter with a different type: {argument_type} != {hyperparameter_type}.".format( # noqa + method_name=method_name, argument_name=argument_name, + argument_type=argument_type, hyperparameter_type=hyperparams_class.configuration[argument_name].structural_type, + )) + + # Arguments overriding a hyper-parameter should not have a default value and caller should pass a value in. + if has_default: + raise exceptions.InvalidPrimitiveCodeError( + "Method '{method_name}' has an argument '{argument_name}' overriding a hyper-parameter which has a default value, but it should not.".format( + method_name=method_name, argument_name=argument_name, + ) + ) + + argument_kind = PrimitiveArgumentKind.HYPERPARAMETER + + else: + # Any other argument should be something the rest of the pipeline can provide: + # a container value, data value, or another primitive. + expected_types: typing.Tuple[type, ...] = types_module.Container + types_module.Data + (base.PrimitiveBase,) + + if not utils.is_subclass(argument_type, typing.Union[expected_types]): + raise exceptions.InvalidPrimitiveCodeError( + "Method '{method_name}' has an argument '{argument_name}' with type '{argument_type}' and not an expected type: {expected_types}".format( + method_name=method_name, argument_name=argument_name, + argument_type=argument_type, expected_types=expected_types + ) + ) + + # It should not have a default. Otherwise it is easy to satisfy the argument + # (just never connect anything to it in the pipeline). + if has_default: + raise exceptions.InvalidPrimitiveCodeError("Method '{method_name}' has an argument '{argument_name}' which has a default value, but it should not.".format( + method_name=method_name, argument_name=argument_name, + )) + + argument_kind = PrimitiveArgumentKind.PIPELINE + + method_arguments.append(argument_name) + + if argument_name in arguments: + if argument_type != arguments[argument_name]['type']: + raise exceptions.InvalidPrimitiveCodeError("Method '{method_name}' has an argument '{argument_name}' which does not match a type of a previous argument with the same name: {argument_type} != {previous_type}".format( # noqa + method_name=method_name, argument_name=argument_name, + argument_type=argument_type, previous_type=arguments[argument_name]['type'], + )) + + # This should hold because it depends only on the argument name. + assert argument_kind == arguments[argument_name]['kind'], "argument_kind mismatch" + + if has_default: + if 'default' not in arguments[argument_name]: + raise exceptions.InvalidPrimitiveCodeError("Method '{method_name}' has an argument '{argument_name}' which has a default value, but a previous argument with the same name did not have a default value.".format( # noqa + method_name=method_name, argument_name=argument_name, + )) + elif argument.default != arguments[argument_name]['default']: + raise exceptions.InvalidPrimitiveCodeError("Method '{method_name}' has an argument '{argument_name}' which does not have the same default value as a previous argument with the same name: {argument_default} != {previous_default}".format( # noqa + method_name=method_name, argument_name=argument_name, + argument_default=argument.default, + previous_default=arguments[argument_name]['default'], + )) + else: + if 'default' in arguments[argument_name]: + raise exceptions.InvalidPrimitiveCodeError("Method '{method_name}' has an argument '{argument_name}' which does not have a default value, but a previous argument with the same name had a default value.".format( # noqa + method_name=method_name, argument_name=argument_name, + )) + + else: + arguments[argument_name] = { + 'type': argument_type, + 'kind': argument_kind, + } + + if has_default: + arguments[argument_name]['default'] = argument.default + + methods[method_name] = { + 'kind': method_kind, + 'arguments': method_arguments, + 'returns': self._resolve_type(type_hints['return'], type_arguments), + } + + if singleton_produce_method is not None: + methods[method_name]['singleton'] = singleton_produce_method + + if method_inputs_across_samples is not None: + for method_input in method_inputs_across_samples: + if method_input not in method_arguments: + raise exceptions.InvalidPrimitiveCodeError("Method '{method_name}' has an argument '{method_input}' set as computing across samples, but it does not exist.".format( + method_name=method_name, method_input=method_input, + )) + + if arguments[method_input]['kind'] != PrimitiveArgumentKind.PIPELINE: + raise exceptions.InvalidPrimitiveCodeError("Method '{method_name}' has an argument '{method_input}' set as computing across samples, but it is not a PIPELINE argument.".format( + method_name=method_name, method_input=method_input, + )) + + methods[method_name]['inputs_across_samples'] = method_inputs_across_samples + + description = inspect.cleandoc(getattr(method, '__doc__', None) or '') or None + if description is not None: + methods[method_name]['description'] = description + + return arguments, methods + + # Using typing.TypeVar in type signature does not really work, so we are using type instead. + # See: https://github.com/python/typing/issues/520 + def _get_argument_type(self, argument_description: typing.Dict[str, typing.Any], type_arguments: typing.Dict[type, type]) -> type: + if 'get_type' in argument_description: + return argument_description['get_type'](type_arguments) + else: + return argument_description['type'] + + # Using typing.TypeVar in type signature does not really work, so we are using type instead. + # See: https://github.com/python/typing/issues/520 + def _get_class_methods(self, type_arguments: typing.Dict[type, type]) -> typing.Dict[str, typing.Dict]: + methods: typing.Dict[str, typing.Dict] = {} + + for method_name, method in inspect.getmembers(self.primitive): + if method_name.startswith('_'): + continue + + if not utils.is_class_method_on_class(method): + continue + + type_hints = utils.get_type_hints(method) + + if not type_hints: + raise exceptions.InvalidPrimitiveCodeError("Cannot get types for method '{method_name}'.".format(method_name=method_name)) + + if 'return' not in type_hints: + raise exceptions.InvalidPrimitiveCodeError("Method '{method_name}' is missing a type for the return value.".format(method_name=method_name)) + + method_arguments = {} + + for argument_name, argument in inspect.signature(method).parameters.items(): + if argument.kind != inspect.Parameter.KEYWORD_ONLY: + raise exceptions.InvalidPrimitiveCodeError("Method '{method_name}' has a non-keyword argument '{argument_name}'.".format(method_name=method_name, argument_name=argument_name)) + + has_default = argument.default is not inspect.Parameter.empty + + if argument_name.startswith('_'): + if not has_default: + raise exceptions.InvalidPrimitiveCodeError("Method '{method_name}' has a non-optional private argument '{argument_name}'.".format( + method_name=method_name, argument_name=argument_name, + )) + + continue + + if argument_name not in type_hints: + raise exceptions.InvalidPrimitiveCodeError("Method '{method_name}' is missing a type for argument '{argument_name}'.".format(method_name=method_name, argument_name=argument_name)) + + argument_type = self._resolve_type(type_hints[argument_name], type_arguments) + + argument_description = { + 'type': argument_type, + } + + if has_default: + argument_description['default'] = argument.default + + method_arguments[argument_name] = argument_description + + methods[method_name] = { + 'arguments': method_arguments, + 'returns': self._resolve_type(type_hints['return'], type_arguments), + } + + description = inspect.cleandoc(getattr(method, '__doc__', None) or '') or None + if description is not None: + methods[method_name]['description'] = description + + return methods + + @classmethod + def _validate_docker_containers(cls, metadata: typing.Dict) -> None: + installation = metadata.get('installation', []) + + containers: typing.List[str] = [] + + for entry in installation: + # We can check simply equality because metadata enumerations are equal to strings as well, + # and "entry['type']" can be both a string or an enumeration instance. + if entry.get('type', None) != PrimitiveInstallationType.DOCKER: + continue + + key = entry.get('key', None) + if key: + containers.append(key) + + containers_set = set(containers) + if len(containers_set) != len(containers): + for key in containers_set: + containers.remove(key) + raise exceptions.InvalidMetadataError("Same Docker image key reused across multiple installation entries: {extra_keys}".format(extra_keys=containers)) + + @classmethod + def _validate_volumes(cls, metadata: typing.Dict) -> None: + volumes: typing.List[str] = [] + + for entry in cls._get_volumes(metadata): + volumes.append(entry['key']) + + volumes_set = set(volumes) + if len(volumes_set) != len(volumes): + for key in volumes_set: + volumes.remove(key) + raise exceptions.InvalidMetadataError("Same volume key reused across multiple installation entries: {extra_keys}".format(extra_keys=volumes)) + + def _validate_constructor(self, instance_methods: typing.Dict[str, typing.Dict]) -> None: + if '__init__' not in instance_methods: + raise exceptions.InvalidPrimitiveCodeError("Constructor is missing.") + + if 'hyperparams' not in instance_methods['__init__']['arguments']: + raise exceptions.InvalidPrimitiveCodeError("Constructor's argument 'hyperparams' is required.") + + def _validate_multi_produce(self, instance_methods: typing.Dict[str, typing.Dict]) -> None: + if 'produce' not in instance_methods: + raise exceptions.InvalidPrimitiveCodeError("'produce' method is missing.") + + if 'multi_produce' not in instance_methods: + raise exceptions.InvalidPrimitiveCodeError("'multi_produce' method is missing.") + + # Initialize with runtime arguments. + expected_arguments = {'produce_methods', 'timeout', 'iterations'} + for method_name, method in instance_methods.items(): + if method['kind'] != PrimitiveMethodKind.PRODUCE: + continue + + if 'produce_methods' in method['arguments']: + raise exceptions.InvalidPrimitiveCodeError("Produce method cannot use 'produce_methods' argument: {method_name}".format(method_name=method_name)) + + expected_arguments.update(method['arguments']) + + arguments = set(instance_methods['multi_produce']['arguments']) + + missing = expected_arguments - arguments + if len(missing): + raise exceptions.InvalidPrimitiveCodeError( + "'multi_produce' method arguments have to be an union of all arguments of all produce methods, but it does not accept all expected arguments: {missing}".format( + missing=missing, + ) + ) + + extra = arguments - expected_arguments + if len(extra): + raise exceptions.InvalidPrimitiveCodeError( + "'multi_produce' method arguments have to be an union of all arguments of all produce methods, but it accepts unexpected arguments: {extra}".format( + extra=extra, + ) + ) + + def _validate_fit_multi_produce(self, instance_methods: typing.Dict[str, typing.Dict]) -> None: + if 'set_training_data' not in instance_methods: + raise exceptions.InvalidPrimitiveCodeError("'set_training_data' method is missing.") + + if 'produce' not in instance_methods: + raise exceptions.InvalidPrimitiveCodeError("'produce' method is missing.") + + if 'fit_multi_produce' not in instance_methods: + raise exceptions.InvalidPrimitiveCodeError("'fit_multi_produce' method is missing.") + + # Initialize with runtime arguments. + expected_arguments = {'produce_methods', 'timeout', 'iterations'} + for method_name, method in instance_methods.items(): + if method['kind'] == PrimitiveMethodKind.PRODUCE: + if 'produce_methods' in method['arguments']: + raise exceptions.InvalidPrimitiveCodeError("Produce method cannot use 'produce_methods' argument: {method_name}".format(method_name=method_name)) + + expected_arguments.update(method['arguments']) + + elif method_name == 'set_training_data': + if 'produce_methods' in method['arguments']: + raise exceptions.InvalidPrimitiveCodeError("'set_training_data' method cannot use 'produce_methods' argument: {method_name}".format(method_name=method_name)) + + expected_arguments.update(method['arguments']) + + arguments = set(instance_methods['fit_multi_produce']['arguments']) + + missing = expected_arguments - arguments + if len(missing): + raise exceptions.InvalidPrimitiveCodeError( + "'fit_multi_produce' method arguments have to be an union of all arguments of 'set_training_data' method and all produce methods, " + "but it does not accept all expected arguments: {missing}".format( + missing=missing, + ) + ) + + extra = arguments - expected_arguments + if len(extra): + raise exceptions.InvalidPrimitiveCodeError( + "'fit_multi_produce' method arguments have to be an union of all arguments of 'set_training_data' method and all produce methods, but it accepts unexpected arguments: {extra}".format( + extra=extra, + ) + ) + + # In the past we have validated instance attributes by creating an instance of the primitive and observe + # which instance attributes were created in a constructor. This was potentially resource intensive because + # primitives use constructor to initialize resources they use. Moreover, it did not detect attributes + # added outside the constructor (even if such practice is bad, it does happen). We could maybe do some + # static analysis instead, but it could also miss attributes, or have false positives. So, instead, we + # just document standard instance attributes and this is it. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/158 + def _get_instance_attributes(self) -> typing.Dict[str, type]: + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import base + + # Primitive instance attributes are standardized and fixed. + return { + 'hyperparams': hyperparams_module.Hyperparams, + 'random_seed': int, + 'docker_containers': typing.Dict[str, base.DockerContainer], + 'volumes': typing.Dict[str, str], + 'temporary_directory': typing.Optional[str], + } + + def get_hyperparams(self) -> 'hyperparams_module.Hyperparams': + return self.query()['primitive_code']['class_type_arguments']['Hyperparams'] + + def get_volumes(self) -> typing.Sequence[typing.Dict]: + return self._get_volumes(self.query()) + + @classmethod + def _get_volumes(cls, metadata: typing.Dict) -> typing.Sequence[typing.Dict]: + # We can check simply equality because metadata enumerations are equal to strings as well, + # and "entry['type']" can be both a string or an enumeration instance. + return [ + entry for entry in metadata.get('installation', []) + if entry.get('key', None) and entry.get('file_digest', None) and entry.get('type', None) in [PrimitiveInstallationType.FILE, PrimitiveInstallationType.TGZ] + ] + + # Not adhering to Liskov substitution principle: we are not returning a list. + def to_internal_json_structure(self) -> typing.Dict: # type: ignore + return utils.to_reversible_json_structure(self.to_internal_simple_structure()) + + # Not adhering to Liskov substitution principle: we are not returning a list. + def to_internal_simple_structure(self) -> typing.Dict: # type: ignore + return super().to_internal_simple_structure()[0]['metadata'] + + # Not adhering to Liskov substitution principle: we are not returning a list. + def to_json_structure(self) -> typing.Dict: # type: ignore + return utils.to_json_structure(self.to_simple_structure()) + + # Not adhering to Liskov substitution principle: we are not returning a list. + def to_simple_structure(self) -> typing.Dict: # type: ignore + return super().to_simple_structure()[0]['metadata'] + + +EXPECTED_CLASS_ATTRIBUTES = { + 'metadata': PrimitiveMetadata, + 'logger': logging.Logger, +} + + +def _get_inputs(type_arguments: typing.Dict[type, type]) -> type: + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import base + + return type_arguments[base.Inputs] + + +def _get_outputs(type_arguments: typing.Dict[type, type]) -> type: + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import base + + return type_arguments[base.Outputs] + + +def _get_input_labels(type_arguments: typing.Dict[type, type]) -> type: + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import distance + + return type_arguments[distance.InputLabels] + + +# Arguments which can be fulfilled by other primitives in a pipeline. +STANDARD_PIPELINE_ARGUMENTS = { + 'inputs': { + 'get_type': _get_inputs, + }, + 'outputs': { + 'get_type': _get_outputs, + }, + 'input_labels': { + 'get_type': _get_input_labels, + }, +} + + +def _get_hyperparams(type_arguments: typing.Dict[type, type]) -> type: + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import base + + return type_arguments[base.Hyperparams] + + +def _get_docker_containers(type_arguments: typing.Dict[type, type]) -> type: + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import base + + return typing.Optional[typing.Dict[str, base.DockerContainer]] + + +def _get_params(type_arguments: typing.Dict[type, type]) -> type: + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import base + + return type_arguments[base.Params] + + +def _get_gradient_outputs(type_arguments: typing.Dict[type, type]) -> type: + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import base + + return base.Gradients[type_arguments[base.Outputs]] # type: ignore + + +def _get_module(type_arguments: typing.Dict[type, type]) -> type: + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import base + + return type_arguments[base.Module] + + +# Arguments which are meaningful only for a runtime executing a pipeline. +STANDARD_RUNTIME_ARGUMENTS = { + 'hyperparams': { + 'get_type': _get_hyperparams, + }, + 'random_seed': { + 'type': int, + 'default': 0, + }, + 'docker_containers': { + 'get_type': _get_docker_containers, + 'default': None, + }, + 'volumes': { + 'type': typing.Optional[typing.Dict[str, str]], + 'default': None, + }, + 'temporary_directory': { + 'type': typing.Optional[str], + 'default': None, + }, + 'timeout': { + 'type': typing.Optional[float], + 'default': None, + }, + 'iterations': { + 'type': typing.Optional[int], + 'default': None, + }, + 'produce_methods': { + 'type': typing.Sequence[str], + }, + 'params': { + 'get_type': _get_params, + }, + 'num_samples': { + 'type': int, + 'default': 1, + }, + 'gradient_outputs': { + 'get_type': _get_gradient_outputs, + }, + 'fine_tune': { + 'type': bool, + 'default': False, + }, + 'fine_tune_learning_rate': { + 'type': float, + 'default': 0.00001, + }, + 'fine_tune_weight_decay': { + 'type': float, + 'default': 0.00001, + }, + 'temperature': { + 'type': float, + 'default': 0, + }, + 'input_module': { + 'get_type': _get_module, + }, + 'module': { + 'get_type': _get_module, + }, +} + + +def metadata_serializer(obj: Metadata) -> dict: + data = { + 'metadata': pickle.dumps(obj), + } + + return data + + +def metadata_deserializer(data: dict) -> Metadata: + metadata = pickle.loads(data['metadata']) + + return metadata + + +if pyarrow_lib is not None: + pyarrow_lib._default_serialization_context.register_type( + Metadata, 'd3m.metadata', + custom_serializer=metadata_serializer, + custom_deserializer=metadata_deserializer, + ) diff --git a/d3m/d3m/metadata/hyperparams.py b/d3m/d3m/metadata/hyperparams.py new file mode 100644 index 0000000..afe5391 --- /dev/null +++ b/d3m/d3m/metadata/hyperparams.py @@ -0,0 +1,3370 @@ +import abc +import base64 +import collections +import copy +import functools +import importlib +import inspect +import logging +import numbers +import operator +import pickle +import re +import types +import typing + +import frozendict # type: ignore +import numpy # type: ignore +import typing_inspect # type: ignore +from pytypes import type_util # type: ignore +from scipy import special as scipy_special # type: ignore +from sklearn.utils import validation as sklearn_validation # type: ignore + +from . import base +from d3m import deprecate, exceptions, utils + +__all__ = ( + 'Hyperparameter', 'Primitive', 'Constant', 'Bounded', 'Enumeration', 'UniformBool', 'UniformInt', + 'Uniform', 'LogUniform', 'Normal', 'LogNormal', 'Union', 'Choice', 'Set', 'SortedSet', 'List', + 'SortedList', 'Hyperparams', +) + +logger = logging.getLogger(__name__) + +RandomState = typing.Union[numbers.Integral, numpy.integer, numpy.random.RandomState] + +T = typing.TypeVar('T') +S = typing.TypeVar('S', bound=typing.Sequence) + +# We want to make sure we do not support dots because they are used to delimit nested hyper-parameters. +HYPERPARAMETER_NAME_REGEX = re.compile(r'^[A-Za-z][A-Za-z_0-9]*$') + + +def _get_structural_type_argument(obj: typing.Any, type_var: typing.Any) -> type: + cls = typing_inspect.get_generic_type(obj) + + return utils.get_type_arguments(cls)[type_var] + + +def check_sample_size(obj: 'typing.Union[Hyperparameter, Hyperparams]', min_samples: int, max_samples: typing.Optional[int], with_replacement: bool) -> typing.Tuple[int, int]: + if with_replacement: + all_max_samples = None + else: + all_max_samples = obj.get_max_samples() + + if not isinstance(min_samples, int): + raise exceptions.InvalidArgumentTypeError("'min_samples' argument is not an int.") + if min_samples < 0: + raise exceptions.InvalidArgumentValueError("'min_samples' cannot be smaller than 0.") + if max_samples is not None: + if not isinstance(max_samples, int): + raise exceptions.InvalidArgumentTypeError("'max_samples' argument is not an int.") + if min_samples > max_samples: + raise exceptions.InvalidArgumentValueError("'min_samples' cannot be larger than 'max_samples'.") + if all_max_samples is not None and max_samples > all_max_samples: + raise exceptions.InvalidArgumentValueError("'max_samples' cannot be larger than {max_samples}.".format(max_samples=all_max_samples)) + else: + if all_max_samples is not None: + max_samples = all_max_samples + else: + raise exceptions.InvalidArgumentValueError("'max_samples' argument is required.") + + return min_samples, max_samples + + +# A special Python method which is stored efficiently +# when pickled. See PEP 307 for more details. +def __newobj__(cls: type, *args: typing.Any) -> typing.Any: + return cls.__new__(cls, *args) + + +def _is_defined_at_global_scope(cls: type) -> bool: + class_name = getattr(cls, '__name__', None) + class_module = inspect.getmodule(cls) + return class_name is not None and class_module is not None and getattr(class_module, class_name, None) is cls + + +def _recreate_hyperparams_class(base_cls: 'typing.Type[Hyperparams]', define_args_list: typing.Sequence[typing.Dict[str, typing.Any]]) -> typing.Any: + # We first have to recreate the class from the base class. + cls = base_cls + for args in define_args_list: + cls = cls.define(**args) + # And then we create a new instance of the object. + return cls.__new__(cls) + + +def _encode_generic_type(structural_type: type) -> typing.Union[type, typing.Dict]: + args = typing_inspect.get_last_args(structural_type) + + if not args: + return structural_type + + return { + 'origin': typing_inspect.get_origin(structural_type), + 'args': [_encode_generic_type(arg) for arg in args] + } + + +def _decode_generic_type(description: typing.Union[type, typing.Dict]) -> type: + if not isinstance(description, dict): + return description + + return description['origin'][tuple(_decode_generic_type(arg) for arg in description['args'])] + + +class HyperparameterMeta(utils.AbstractMetaclass, typing.GenericMeta): + pass + + +class Hyperparameter(typing.Generic[T], metaclass=HyperparameterMeta): + """ + A base class for hyper-parameter descriptions. + + A base hyper-parameter does not give any information about the space of the hyper-parameter, + besides a default value. + + Type variable ``T`` is optional and if not provided an attempt to automatically infer + it from ``default`` will be made. Attribute ``structural_type`` exposes this type. + + There is a special case when values are primitives. In this case type variable ``T`` and + ``structural_type`` should always be a primitive base class, but valid values used in + hyper-parameters can be both primitive instances (of that base class or its subclasses) + and primitive classes (that base class itself or its subclasses). Primitive instances + allow one to specify a primitive much more precisely: values of their hyper-parameters, + or even an already fitted primitive. + + This means that TA2 should take care and check if values it is planning to use for + this hyper-parameter are a primitive class or a primitive instance. It should make sure + that it always passes only a primitive instance to the primitive which has a hyper-parameter + expecting primitive(s). Even if the value is already a primitive instance, it must not + pass it directly, but should make a copy of the primitive instance with same hyper-parameters + and params. Primitive instances part of hyper-parameter definitions should be seen + as immutable and as a template for primitives to pass and not to directly use. + + TA2 is in the best position to create such instances during pipeline run as it has all + necessary information to construct primitive instances (and can control a random seed, + or example). Moreover, it is also more reasonable for TA2 to handle the life-cycle of + a primitive and do any additional processing of primitives. TA2 can create such a primitive + outside of the pipeline, or as part of the pipeline and pass it as a hyper-parameter + value to the primitive. The latter approach allows pipeline to describe how is the primitive + fitted and use data from the pipeline itself for fitting, before the primitive is passed on + as a hyper-parameter value to another primitive. + + Attributes + ---------- + name: + A name of this hyper-parameter in the configuration of all hyper-parameters. + structural_type: + A Python type of this hyper-parameter. All values of the hyper-parameter, including the default value, + should be of this type. + semantic_types: + A list of URIs providing semantic meaning of the hyper-parameter. This can help express how + the hyper-parameter is being used, e.g., as a learning rate or as kernel parameter. + description: + An optional natural language description of the hyper-parameter. + """ + + name: str + structural_type: typing.Type + semantic_types: typing.Sequence[str] + description: str + + def __init__(self, default: T, *, semantic_types: typing.Sequence[str] = None, description: str = None) -> None: + if semantic_types is None: + semantic_types = () + + self.name: str = None + self.semantic_types = semantic_types + self.description = description + + self._default = default + + # If subclass has not already set it. + if not hasattr(self, 'structural_type'): + structural_type = _get_structural_type_argument(self, T) # type: ignore + + if structural_type == typing.Any: + structural_type = self.infer_type(self._default) + + self.structural_type = structural_type + + self.validate_default() + + def contribute_to_class(self, name: str) -> None: + if self.name is not None and self.name != name: + raise exceptions.InvalidStateError("Name is already set to '{name}', cannot set to '{new_name}'.".format(name=self.name, new_name=name)) + + self.name = name + + def get_default(self, path: str = None) -> typing.Any: + """ + Returns a default value of a hyper-parameter. + + Remember to never modify it in-place it is a mutable value. Moreover, if it is + an instance of a primitive, also copy the instance before you use it to not + change its internal state. + + Parameters + ---------- + path: + An optional path to get defaults for nested hyper-parameters, if a hyper-parameter + has nested hyper-parameters. It can contain ``.`` to represent a path through + nested hyper-parameters. + + Returns + ------- + A default value. + """ + + if path is not None: + raise KeyError("Invalid path '{path}'.".format(path=path)) + + return self._default + + def check_type(self, value: typing.Any, cls: type) -> bool: + """ + Check that the type of ``value`` matches given ``cls``. + + There is a special case if ``value`` is a primitive class, in that case it is checked + that ``value`` is a subclass of ``cls``. + + Parameters + ---------- + value: + Value to check type for. + cls: + Type to check type against. + + Returns + ------- + ``True`` if ``value`` is an instance of ``cls``, or if ``value`` is a primitive + class, if it is a subclass of ``cls``. + """ + + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import base as primitive_interfaces_base + + def get_type(obj: typing.Any) -> type: + if utils.is_type(obj) and issubclass(obj, primitive_interfaces_base.PrimitiveBase): + return obj + else: + return type(obj) + + value_type = type_util.deep_type(value, get_type=get_type) + + return utils.is_subclass(value_type, cls) + + def infer_type(self, value: typing.Any) -> type: + """ + Infers a structural type of ``value``. + + There is a special case if ``value`` is a primitive class, in that case it is returned + as is. + + Parameters + ---------- + value: + Value to infer a type for. + + Returns + ------- + Type of ``value``, or ``value`` itself if ``value`` is a primitive class. + """ + + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import base as primitive_interfaces_base + + if utils.is_type(value) and issubclass(value, primitive_interfaces_base.PrimitiveBase): + return value + else: + return utils.get_type(value) + + def validate(self, value: T) -> None: + """ + Validates that a given ``value`` belongs to the space of the hyper-parameter. + + If not, it throws an exception. + + Parameters + ---------- + value: + Value to validate. + """ + + if not self.check_type(value, self.structural_type): + raise exceptions.InvalidArgumentTypeError("Value '{value}' {for_name}is not an instance of the structural type: {structural_type}".format( + value=value, for_name=self._for_name(), structural_type=self.structural_type, + )) + + def validate_default(self) -> None: + """ + Validates that a default value belongs to the space of the hyper-parameter. + + If not, it throws an exception. + """ + + self.validate(self._default) + + def _validate_finite_float(self, value: typing.Any) -> None: + """ + If ``value`` is a floating-point value, it validates that it is + a finite number (no infinity, no ``NaN``). + + If not, it throws an exception. + + Parameters + ---------- + value: + Value to validate. + """ + + if utils.is_float(type(value)) and not numpy.isfinite(value): + raise exceptions.InvalidArgumentValueError("A floating-point value {for_name}must be finite.".format(for_name=self._for_name())) + + def _for_name(self) -> str: + if getattr(self, 'name', None) is None: + return "" + else: + return "for hyper-parameter '{name}' ".format(name=self.name) + + def sample(self, random_state: RandomState = None) -> T: + """ + Samples a random value from the hyper-parameter search space. + + For the base class it always returns a ``default`` value because the space + is unknown. + + Parameters + ---------- + random_state: + A random seed or state to be used when sampling. + + Returns + ------- + A sampled value. + """ + + sklearn_validation.check_random_state(random_state) + + utils.log_once(logger, logging.WARNING, "Sampling a hyper-parameter '%(name)s' without known space. Using a default value.", {'name': self.name}, stack_info=True) + + return self.get_default() + + # Should not be called at the module importing time because it can trigger loading + # of all primitives in the "Primitive" hyper-parameter, which can lead to an import cycle. + def get_max_samples(self) -> typing.Optional[int]: + """ + Returns a maximum number of samples that can be returned at once using `sample_multiple`, + when ``with_replacement`` is ``False``. + + Returns + ------- + A maximum number of samples that can be returned at once. Or ``None`` if there is no limit. + """ + + return 1 + + def _check_sample_size(self, min_samples: int, max_samples: typing.Optional[int], with_replacement: bool) -> typing.Tuple[int, int]: + return check_sample_size(self, min_samples, max_samples, with_replacement) + + def sample_multiple(self, min_samples: int = 0, max_samples: int = None, random_state: RandomState = None, *, with_replacement: bool = False) -> typing.Sequence[T]: + """ + Samples multiple random values from the hyper-parameter search space. At least ``min_samples`` + of them, and at most ``max_samples``. + + For the base class it always returns only a ``default`` value because the space + is unknown. + + Parameters + ---------- + min_samples: + A minimum number of samples to return. + max_samples: + A maximum number of samples to return. + random_state: + A random seed or state to be used when sampling. + with_replacement: + Are we sampling with replacement or without? + + Returns + ------- + A set (represented as a tuple) of multiple sampled values. + """ + + min_samples, max_samples = self._check_sample_size(min_samples, max_samples, with_replacement) + + random_state = sklearn_validation.check_random_state(random_state) + + utils.log_once(logger, logging.WARNING, "Sampling a hyper-parameter '%(name)s' without known space. Using a default value.", {'name': self.name}, stack_info=True) + + if with_replacement: + size = random_state.randint(min_samples, max_samples + 1) + + return (self.get_default(),) * size + + else: + if min_samples > 0: + assert min_samples == 1, min_samples + assert max_samples == 1, max_samples + return (self.get_default(),) + elif max_samples < 1: + assert min_samples == 0, min_samples + assert max_samples == 0, max_samples + return () + else: + assert min_samples == 0, min_samples + assert max_samples == 1, max_samples + return typing.cast(typing.Sequence[T], () if random_state.rand() >= 0.5 else (self.get_default(),)) + + def __repr__(self) -> str: + return '{class_name}(default={default})'.format( + class_name=type(self).__name__, + default=self.get_default(), + ) + + def to_simple_structure(self) -> typing.Dict: + """ + Converts the hyper-parameter to a simple structure, similar to JSON, but with values + left as Python values. + + Returns + ------- + A dict. + """ + + structure = { + 'type': type(self), + 'default': self.get_default(), + 'structural_type': self.structural_type, + 'semantic_types': list(self.semantic_types), + } + + if self.description is not None: + structure['description'] = self.description + + return structure + + @deprecate.function(message="use value_to_json_structure method instead") + def value_to_json(self, value: T) -> typing.Any: + return self.value_to_json_structure(value) + + def value_to_json_structure(self, value: T) -> typing.Any: + """ + Converts a value of this hyper-parameter to a JSON-compatible value. + + Parameters + ---------- + value: + Value to convert. + + Returns + ------- + A JSON-compatible value. + """ + + self.validate(value) + + if utils.is_subclass(self.structural_type, typing.Union[str, int, float, bool, type(None)]): + if utils.is_float(type(value)) and not numpy.isfinite(value): + return { + 'encoding': 'pickle', + 'value': base64.b64encode(pickle.dumps(value)).decode('utf8'), + } + else: + return value + elif utils.is_subclass(self.structural_type, numpy.bool_): + return bool(value) + elif utils.is_subclass(self.structural_type, numpy.integer): + return int(value) + elif utils.is_subclass(self.structural_type, typing.Union[numpy.float32, numpy.float64]): + value = float(value) + if not numpy.isfinite(value): + return { + 'encoding': 'pickle', + 'value': base64.b64encode(pickle.dumps(value)).decode('utf8'), + } + else: + return value + else: + return { + 'encoding': 'pickle', + 'value': base64.b64encode(pickle.dumps(value)).decode('utf8'), + } + + @deprecate.function(message="use value_from_json_structure method instead") + def value_from_json(self, json: typing.Any) -> T: + return self.value_from_json_structure(json) + + def value_from_json_structure(self, json: typing.Any) -> T: + """ + Converts a JSON-compatible value to a value of this hyper-parameter. + + Parameters + ---------- + json: + A JSON-compatible value. + + Returns + ------- + Converted value. + """ + + if isinstance(json, dict): + if json.get('encoding', None) != 'pickle': + raise exceptions.NotSupportedError(f"Not supported hyper-parameter value encoding: {json.get('encoding', None)}") + if 'value' not in json: + raise exceptions.MissingValueError(f"'value' field is missing in encoded hyper-parameter value.") + + # TODO: Limit the types of values being able to load to prevent arbitrary code execution by a malicious pickle. + value = pickle.loads(base64.b64decode(json['value'].encode('utf8'))) + elif utils.is_subclass(self.structural_type, typing.Union[str, int, bool, type(None)]): + # Handle a special case when value was parsed from JSON as float, but we expect an int. + # If "json" is not really an integer then we set "value" to a float and leave + # to "validate" to raise an exception. + if isinstance(json, float) and json.is_integer(): + value = int(json) + else: + value = json + elif utils.is_subclass(self.structural_type, typing.Union[str, float, bool, type(None)]): + # Handle a special case when value was parsed from JSON as int, but we expect a float. + if isinstance(json, int): + value = float(json) + else: + value = json + elif utils.is_subclass(self.structural_type, typing.Union[str, int, float, bool, type(None)]): + # If both int and float are accepted we assume the user of the value knows how to + # differentiate between values or that precise numerical type does not matter. + value = json + else: + # Backwards compatibility. A string representing a pickle. + logger.warning("Converting hyper-parameter '%(name)s' from a deprecated JSON structure.", {'name': self.name}) + + # TODO: Limit the types of values being able to load to prevent arbitrary code execution by a malicious pickle. + value = pickle.loads(base64.b64decode(json.encode('utf8'))) + + self.validate(value) + + return value + + def traverse(self) -> 'typing.Iterator[Hyperparameter]': + """ + Traverse over all child hyper-parameters of this hyper-parameter. + + Yields + ------ + Hyperparamater + The next child hyper-parameter of this hyper-parameter. + """ + + # Empty generator by default. + yield from () # type: ignore + + def transform_value(self, value: T, transform: typing.Callable, index: int = 0) -> T: + """ + Transforms the value belonging to this hyper-parameter to a new value by + calling ``transform`` on it. If the hyper-parameter has child + hyper-parameters, it deconstructs the value, calls ``transform_value`` + recursively, and constructs the new value back. + + Parameters + ---------- + value: + A value to transform. + transform: + A function which receives as arguments: a hyper-parameter instance, + the value, and a sequence index of iterating over a structure, and + should return a new transformed value. It is called only for leaf + hyper-parameters (those without child hyper-parameters). + index: + A sequence index which should be passed to ``transform``. + Used when iterating over a structure by the parent. + It should be deterministic. + + Returns + ------- + A transformed value. + """ + + return transform(self, value, index) + + def can_accept_value_type(self, structural_type: typing.Union[type, typing.List[type]]) -> bool: + """ + Returns ``True`` if a hyper-parameter can accept a value of type ``structural_type``. + + Parameters + ---------- + structural_type: + A structural type. Can be a type or a list of types. + + Returns + ------- + If value of given type can be accepted by this hyper-parameter. + """ + + if structural_type is typing.Any: + return True + elif isinstance(structural_type, typing.List): + # Default implementation does not support a list of types. This is used for "Set" hyper-parameter. + return False + else: + return utils.is_subclass(structural_type, self.structural_type) + + # TODO: Remove once using Python 3.7 exclusively. + def __getstate__(self) -> dict: + state = dict(self.__dict__) + # Subclasses of generic classes cannot be pickled in Python 3.6, but instances of + # them can, because during runtime information about generic classes is removed. + # Pickling of hyper-parameter instances thus generally work without problems + # but if they are an instance of the a subclass of a generic class, a reference + # to that class is stored into "__orig_class__" which cannot be pickled. + # Because we do not really need it after we extracted "structural_type", + # we remove it here when pickling. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/155 + if '__orig_class__' in state: + del state['__orig_class__'] + + if 'structural_type' in state: + # A workaround for structural type being a generic class. + state['structural_type'] = _encode_generic_type(state['structural_type']) + + return state + + def __setstate__(self, state: dict) -> None: + if 'structural_type' in state: + state['structural_type'] = _decode_generic_type(state['structural_type']) + + self.__dict__ = state + + +class Primitive(Hyperparameter[T]): + """ + A hyper-parameter describing a primitive or primitives. + + Matching primitives are determined based on their structural type (a matching primitive + has to be an instance or a subclass of the structural type), their primitive's family + (a matching primitive's family has to be among those listed in the hyper-parameter), + their algorithm types (a matching primitive has to implement at least one of the + listed in the hyper-parameter), and produce methods provided (a matching primitive + has to provide all of the listed in the hyper-parameter). + + Remember that valid values of a hyper-parameter which has primitive values are both + primitive instances and primitive classes, but the structural type is always just a + primitive base class. Hyper-parameter values being passed to a primitive which has + a hyper-parameter expecting primitive(s) should always be primitive instances. + + The default sampling method returns always classes (or a default value, which can be a + primitive instance), but alternative implementations could sample across instances + (and for example across also primitive's hyper-parameters). + + Attributes + ---------- + primitive_families: + A list of primitive families a matching primitive should be part of. + algorithm_types: + A list of algorithm types a matching primitive should implement at least one. + produce_methods: + A list of produce methods a matching primitive should provide all. + """ + + primitive_families: 'typing.Sequence[base.PrimitiveFamily]' + algorithm_types: 'typing.Sequence[base.PrimitiveAlgorithmType]' + produce_methods: typing.Sequence[str] + + def __init__(self, default: typing.Type[T], primitive_families: 'typing.Sequence[base.PrimitiveFamily]' = None, # type: ignore + algorithm_types: 'typing.Sequence[base.PrimitiveAlgorithmType]' = None, produce_methods: typing.Sequence[str] = None, *, # type: ignore + semantic_types: typing.Sequence[str] = None, description: str = None) -> None: + if primitive_families is None: + primitive_families = () + if algorithm_types is None: + algorithm_types = () + if produce_methods is None: + produce_methods = () + + # Convert any strings to enums. + self.primitive_families: typing.Tuple[base.PrimitiveFamily, ...] = tuple(base.PrimitiveFamily[primitive_family] for primitive_family in primitive_families) # type: ignore + self.algorithm_types: typing.Tuple[base.PrimitiveAlgorithmType, ...] = tuple(base.PrimitiveAlgorithmType[algorithm_type] for algorithm_type in algorithm_types) # type: ignore + self.produce_methods = tuple(produce_methods) + + for primitive_family in self.primitive_families: # type: ignore + if primitive_family not in list(base.PrimitiveFamily): + raise exceptions.InvalidArgumentValueError("Unknown primitive family '{primitive_family}'.".format(primitive_family=primitive_family)) + for algorithm_type in self.algorithm_types: # type: ignore + if algorithm_type not in list(base.PrimitiveAlgorithmType): + raise exceptions.InvalidArgumentValueError("Unknown algorithm type '{algorithm_type}'.".format(algorithm_type=algorithm_type)) + for produce_method in self.produce_methods: + if produce_method != 'produce' and not produce_method.startswith('produce_'): + raise exceptions.InvalidArgumentValueError("Invalid produce method name '{produce_method}'.".format(produce_method=produce_method)) + + self.matching_primitives: typing.Sequence[typing.Union[T, typing.Type[T]]] = None + + # Used for sampling. + # See: https://github.com/numpy/numpy/issues/15935 + self._choices: numpy.ndarray = None + + # Default value is checked by parent class calling "validate". + + super().__init__(default, semantic_types=semantic_types, description=description) # type: ignore + + # "all_primitives" is not "Sequence[Type[PrimitiveBase]]" to not introduce an import cycle. + def populate_primitives(self, all_primitives: typing.Sequence[type] = None) -> None: + """ + Populate a list of matching primitives. + + Called automatically when needed using `d3m.index` primitives. If this is not desired, + this method should be called using a list of primitive classes to find matching + primitives among. + + Parameters + ---------- + all_primitives: + An alternative list of all primitive classes to find matching primitives among. + """ + + if all_primitives is None: + # Importing here to prevent import cycle. + from d3m import index + + index.load_all() + all_primitives = index.get_loaded_primitives() # type: ignore + + matching_primitives = [] + for primitive in all_primitives: + try: + self.validate(primitive) + matching_primitives.append(primitive) + except (exceptions.InvalidArgumentTypeError, exceptions.InvalidArgumentValueError): + pass + + default = self.get_default() + + if utils.is_type(default): + if default not in matching_primitives: + matching_primitives.append(default) # type: ignore + else: + if type(default) not in matching_primitives: + matching_primitives.append(default) # type: ignore + else: + matching_primitives[matching_primitives.index(type(default))] = default # type: ignore + + self.matching_primitives = matching_primitives + self._choices = numpy.array(matching_primitives, dtype=object) + + def validate(self, value: typing.Union[T, typing.Type[T]]) -> None: + # Importing here to prevent import cycle. + from d3m.primitive_interfaces import base as primitive_interfaces_base + + super().validate(typing.cast(T, value)) + + if utils.is_type(value): + primitive_class = typing.cast(typing.Type[primitive_interfaces_base.PrimitiveBase], value) + + # Additional check that we really have a primitive. + if not utils.is_subclass(primitive_class, primitive_interfaces_base.PrimitiveBase): + raise exceptions.InvalidArgumentTypeError("Value '{value}' {for_name}is not a subclass of 'PrimitiveBase' class.".format( + value=value, for_name=self._for_name(), + )) + else: + primitive_class = typing.cast(typing.Type[primitive_interfaces_base.PrimitiveBase], type(value)) + + # Additional check that we really have a primitive. + if not utils.is_subclass(primitive_class, primitive_interfaces_base.PrimitiveBase): + raise exceptions.InvalidArgumentTypeError("Value '{value}' {for_name}is not an instance of 'PrimitiveBase' class.".format( + value=value, for_name=self._for_name(), + )) + + primitive_family = primitive_class.metadata.query()['primitive_family'] + if self.primitive_families and primitive_family not in self.primitive_families: + raise exceptions.InvalidArgumentValueError( + "Primitive '{value}' {for_name}has primitive family '{primitive_family}' and not any of: {primitive_families}".format( + value=value, for_name=self._for_name(), + primitive_family=primitive_family, primitive_families=self.primitive_families, + ) + ) + + algorithm_types = primitive_class.metadata.query()['algorithm_types'] + if self.algorithm_types and set(algorithm_types).isdisjoint(set(self.algorithm_types)): + raise exceptions.InvalidArgumentValueError( + "Primitive '{value}' {for_name}has algorithm types '{primitive_algorithm_types}' and not any of: {algorithm_types}".format( + value=value, for_name=self._for_name(), + primitive_algorithm_types=algorithm_types, algorithm_types=self.algorithm_types, + ) + ) + + produce_methods = { + method_name for method_name, method_description + in primitive_class.metadata.query()['primitive_code']['instance_methods'].items() + if method_description['kind'] == base.PrimitiveMethodKind.PRODUCE + } + if not set(self.produce_methods) <= produce_methods: + raise exceptions.InvalidArgumentValueError( + "Primitive '{value}' {for_name}has produce methods '{primitive_produce_methods}' and not all of: {produce_methods}".format( + value=value, for_name=self._for_name(), + primitive_produce_methods=produce_methods, produce_methods=self.produce_methods, + ) + ) + + def sample(self, random_state: RandomState = None) -> typing.Union[T, typing.Type[T]]: # type: ignore + """ + Samples a random value from the hyper-parameter search space. + + Returns a random primitive from primitives available through `d3m.index`, by default, + or those given to a manual call of `populate_primitives`. + + Parameters + ---------- + random_state: + A random seed or state to be used when sampling. + + Returns + ------- + A sampled value. + """ + + random_state = sklearn_validation.check_random_state(random_state) + + if self.matching_primitives is None: + self.populate_primitives() + + return random_state.choice(self._choices) + + def get_max_samples(self) -> typing.Optional[int]: + if self.matching_primitives is None: + self.populate_primitives() + + return len(self.matching_primitives) + + def sample_multiple( # type: ignore + self, min_samples: int = 0, max_samples: int = None, random_state: RandomState = None, *, with_replacement: bool = False, + ) -> typing.Sequence[typing.Union[T, typing.Type[T]]]: + """ + Samples multiple random values from the hyper-parameter search space. At least ``min_samples`` + of them, and at most ``max_samples``. + + It samples primitives available through `d3m.index`, by default, + or those given to a manual call of `populate_primitives`. + + Parameters + ---------- + min_samples: + A minimum number of samples to return. + max_samples: + A maximum number of samples to return. + random_state: + A random seed or state to be used when sampling. + with_replacement: + Are we sampling with replacement or without? + + Returns + ------- + A set (represented as a tuple) of multiple sampled values. + """ + + min_samples, max_samples = self._check_sample_size(min_samples, max_samples, with_replacement) + + random_state = sklearn_validation.check_random_state(random_state) + + if self.matching_primitives is None: + self.populate_primitives() + + size = random_state.randint(min_samples, max_samples + 1) + + return tuple(random_state.choice(self._choices, size, replace=with_replacement)) + + def __repr__(self) -> str: + return '{class_name}(default={default}, primitive_families={primitive_families}, algorithm_types={algorithm_types})'.format( + class_name=type(self).__name__, + default=self.get_default(), + primitive_families=[primitive_family.name for primitive_family in self.primitive_families], # type: ignore + algorithm_types=[algorithm_type.name for algorithm_type in self.algorithm_types], # type: ignore + produce_methods=list(self.produce_methods), + ) + + @functools.lru_cache() + def to_simple_structure(self) -> typing.Dict: # type: ignore + structure = super().to_simple_structure() + structure.update({ + 'primitive_families': list(self.primitive_families), + 'algorithm_types': list(self.algorithm_types), + 'produce_methods': list(self.produce_methods), + }) + return structure + + @deprecate.function(message="use value_to_json_structure method instead") + def value_to_json(self, value: typing.Union[T, typing.Type[T]]) -> typing.Any: + return self.value_to_json_structure(value) + + def value_to_json_structure(self, value: typing.Union[T, typing.Type[T]]) -> typing.Any: + self.validate(value) + + if utils.is_type(value): + return {'class': value.metadata.query()['python_path']} # type: ignore + else: + return {'instance': base64.b64encode(pickle.dumps(value)).decode('utf8')} + + @deprecate.function(message="use value_from_json_structure method instead") + def value_from_json(self, json: typing.Any) -> typing.Union[T, typing.Type[T]]: # type: ignore + return self.value_from_json_structure(json) + + def value_from_json_structure(self, json: typing.Any) -> typing.Union[T, typing.Type[T]]: # type: ignore + if 'class' in json: + module_path, name = json['class'].rsplit('.', 1) + module = importlib.import_module(module_path) + value = getattr(module, name) + else: + # TODO: Limit the types of values being able to load to prevent arbitrary code execution by a malicious pickle. + value = pickle.loads(base64.b64decode(json['instance'].encode('utf8'))) + + self.validate(value) + + return value + + def can_accept_value_type(self, structural_type: typing.Union[type, typing.List[type]]) -> bool: + if structural_type is typing.Any: + return True + elif not super().can_accept_value_type(structural_type): + return False + + try: + # We now know that it is a primitive class and we can check other constraints. + self.validate(typing.cast(typing.Type[T], structural_type)) + return True + except Exception: + return False + + +class Constant(Hyperparameter[T]): + """ + A constant hyper-parameter that represents a constant default value. + + Type variable ``T`` is optional and if not provided an attempt to + automatically infer it from ``default`` will be made. + """ + + def validate(self, value: T) -> None: + super().validate(value) + + default = self.get_default() + if value != default: + raise exceptions.InvalidArgumentValueError("Value '{value}' {for_name}is not the constant default value '{default}'.".format(value=value, for_name=self._for_name(), default=default)) + + def sample(self, random_state: RandomState = None) -> T: + """ + Samples a random value from the hyper-parameter search space. + + Parameters + ---------- + random_state: + A random seed or state to be used when sampling. + + Returns + ------- + A sampled value. + """ + + sklearn_validation.check_random_state(random_state) + + return self.get_default() + + def sample_multiple(self, min_samples: int = 0, max_samples: int = None, random_state: RandomState = None, *, with_replacement: bool = False) -> typing.Sequence[T]: + """ + Samples multiple random values from the hyper-parameter search space. At least ``min_samples`` + of them, and at most ``max_samples``. + + For the base class it always returns only a ``default`` value because the space + is unknown. + + Parameters + ---------- + min_samples: + A minimum number of samples to return. + max_samples: + A maximum number of samples to return. + random_state: + A random seed or state to be used when sampling. + with_replacement: + Are we sampling with replacement or without? + + Returns + ------- + A set (represented as a tuple) of multiple sampled values. + """ + + min_samples, max_samples = self._check_sample_size(min_samples, max_samples, with_replacement) + + random_state = sklearn_validation.check_random_state(random_state) + + if with_replacement: + size = random_state.randint(min_samples, max_samples + 1) + + return (self.get_default(),) * size + + else: + if min_samples > 0: + assert min_samples == 1, min_samples + assert max_samples == 1, max_samples + return (self.get_default(),) + elif max_samples < 1: + assert min_samples == 0, min_samples + assert max_samples == 0, max_samples + return () + else: + assert min_samples == 0, min_samples + assert max_samples == 1, max_samples + return typing.cast(typing.Sequence[T], () if random_state.rand() >= 0.5 else (self.get_default(),)) + + +class Bounded(Hyperparameter[T]): + """ + A bounded hyper-parameter with lower and upper bounds, but no other + information about the distribution of the space of the hyper-parameter, + besides a default value. + + Both lower and upper bounds are inclusive by default. Each bound can be + also ``None`` to signal that the hyper-parameter is unbounded for that bound. + Both bounds cannot be ``None`` because then this is the same as + ``Hyperparameter`` class, so you can use that one directly. + + Type variable ``T`` is optional and if not provided an attempt to + automatically infer it from bounds and ``default`` will be made. + + Attributes + ---------- + lower: + A lower bound. + lower_inclusive: + Is the lower bound inclusive? + upper: + An upper bound. + upper_inclusive: + Is the upper bound inclusive? + """ + + lower: typing.Any + lower_inclusive: bool + upper: typing.Any + upper_inclusive: bool + + def __init__(self, lower: T, upper: T, default: T, *, lower_inclusive: bool = True, upper_inclusive: bool = True, semantic_types: typing.Sequence[str] = None, description: str = None) -> None: + self.lower = lower + self.upper = upper + self.lower_inclusive = lower_inclusive + self.upper_inclusive = upper_inclusive + + if self.lower is None and self.upper is None: + raise exceptions.InvalidArgumentValueError("Lower and upper bounds cannot both be None.") + + self._validate_finite_float(self.lower) + self._validate_finite_float(self.upper) + + if self.lower is None: + self.lower_inclusive = False + if self.upper is None: + self.upper_inclusive = False + + self._lower_compare, self._upper_compare, self._lower_interval, self._upper_interval = self._get_operators(self.lower_inclusive, self.upper_inclusive) + + # If subclass has not already set it. + if not hasattr(self, 'structural_type'): + structural_type = _get_structural_type_argument(self, T) # type: ignore + + if structural_type == typing.Any: + structural_types = list(self.infer_type(value) for value in [self.lower, self.upper, default] if value is not None) + type_util.simplify_for_Union(structural_types) + structural_type = typing.Union[tuple(structural_types)] # type: ignore + + self.structural_type = structural_type + + if self.lower is None or self.upper is None: + maybe_optional_structural_type = typing.cast(type, typing.Optional[self.structural_type]) # type: ignore + else: + maybe_optional_structural_type = self.structural_type + + if not self.check_type(self.lower, maybe_optional_structural_type): + raise exceptions.InvalidArgumentTypeError( + "Lower bound '{lower}' is not an instance of the structural type: {structural_type}".format( + lower=self.lower, structural_type=self.structural_type, + ) + ) + + if not self.check_type(self.upper, maybe_optional_structural_type): + raise exceptions.InvalidArgumentTypeError( + "Upper bound '{upper}' is not an instance of the structural type: {structural_type}".format( + upper=self.upper, structural_type=self.structural_type, + )) + + if self.lower is not None and self.upper is not None: + if not (self._lower_compare(self.lower, self.upper) and self._upper_compare(self.lower, self.upper)): + raise exceptions.InvalidArgumentValueError( + "Lower bound '{lower}' is not smaller than upper bound '{upper}'.".format( + lower=self.lower, upper=self.upper, + ) + ) + + self._initialize_effective_bounds() + + # Default value is checked to be inside bounds by parent class calling "validate". + + super().__init__(default, semantic_types=semantic_types, description=description) + + @classmethod + def _get_operators(cls, lower_inclusive: bool, upper_inclusive: bool) -> typing.Tuple[typing.Callable, typing.Callable, str, str]: + if lower_inclusive: + lower_compare = operator.le + lower_interval = '[' + else: + lower_compare = operator.lt + lower_interval = '(' + + if upper_inclusive: + upper_compare = operator.le + upper_interval = ']' + else: + upper_compare = operator.lt + upper_interval = ')' + + return lower_compare, upper_compare, lower_interval, upper_interval + + def _initialize_effective_bounds_float(self) -> None: + if self.lower_inclusive: + self._effective_lower = self.lower + else: + self._effective_lower = numpy.nextafter(self.lower, self.lower + 1) + + if self.upper_inclusive: + self._effective_upper = numpy.nextafter(self.upper, self.upper + 1) + else: + self._effective_upper = self.upper + + def _initialize_effective_bounds_int(self) -> None: + if self.lower_inclusive: + self._effective_lower = self.lower + else: + self._effective_lower = self.lower + 1 + + if self.upper_inclusive: + self._effective_upper = self.upper + 1 + else: + self._effective_upper = self.upper + + def _initialize_effective_bounds(self) -> None: + # If subclass has not already set it. + if getattr(self, '_effective_lower', None) is None or getattr(self, '_effective_upper', None) is None: + if self.lower is None or self.upper is None: + self._effective_lower = None + self._effective_upper = None + self._is_int = False + self._is_float = False + elif utils.is_int(type(self.lower)) and utils.is_int(type(self.upper)): + self._initialize_effective_bounds_int() + self._is_int = True + self._is_float = False + elif utils.is_float(type(self.lower)) and utils.is_float(type(self.upper)): + self._initialize_effective_bounds_float() + self._is_int = False + self._is_float = True + else: + self._effective_lower = None + self._effective_upper = None + self._is_int = False + self._is_float = False + + if self._effective_lower is not None and self._effective_upper is not None and not (self._effective_lower < self._effective_upper): + raise exceptions.InvalidArgumentValueError( + "Effective lower bound '{lower}' is not smaller than upper bound '{upper}'.".format( + lower=self.lower, upper=self.upper, + ) + ) + + def validate(self, value: T) -> None: + super().validate(value) + + # This my throw an exception if value is not comparable, but this is on purpose. + if self.lower is None: + if not (value is None or self._upper_compare(value, self.upper)): # type: ignore + raise exceptions.InvalidArgumentValueError( + "Value '{value}' {for_name}is outside of range {lower_interval}{lower}, {upper}{upper_interval}.".format( + value=value, for_name=self._for_name(), lower_interval=self._lower_interval, + lower=self.lower, upper=self.upper, upper_interval=self._upper_interval, + ), + ) + elif self.upper is None: + if not (value is None or self._lower_compare(self.lower, value)): # type: ignore + raise exceptions.InvalidArgumentValueError( + "Value '{value}' {for_name}is outside of range {lower_interval}{lower}, {upper}{upper_interval}.".format( + value=value, for_name=self._for_name(), lower_interval=self._lower_interval, + lower=self.lower, upper=self.upper, upper_interval=self._upper_interval, + ), + ) + else: + if not (self._lower_compare(self.lower, value) and self._upper_compare(value, self.upper)): # type: ignore + raise exceptions.InvalidArgumentValueError( + "Value '{value}' {for_name}is outside of range {lower_interval}{lower}, {upper}{upper_interval}.".format( + value=value, for_name=self._for_name(), lower_interval=self._lower_interval, + lower=self.lower, upper=self.upper, upper_interval=self._upper_interval, + ), + ) + + def validate_default(self) -> None: + if self.lower is None or self.upper is None: + maybe_optional_structural_type = typing.cast(type, typing.Optional[self.structural_type]) # type: ignore + else: + maybe_optional_structural_type = self.structural_type + + structural_type = self.structural_type + try: + self.structural_type = maybe_optional_structural_type + super().validate_default() + finally: + self.structural_type = structural_type + + def sample(self, random_state: RandomState = None) -> T: + """ + Samples a random value from the hyper-parameter search space. + + If it is bounded on both sides, it tries to sample from uniform distribution, + otherwise returns a ``default`` value. + + Parameters + ---------- + random_state: + A random seed or state to be used when sampling. + + Returns + ------- + A sampled value. + """ + + random_state = sklearn_validation.check_random_state(random_state) + + if getattr(self, '_is_int', False) or getattr(self, '_is_float', False): + utils.log_once( + logger, logging.WARNING, + "Sampling a bounded hyper-parameter '%(name)s' without known distribution. Sampling from a uniform distribution.", + {'name': self.name}, + stack_info=True, + ) + + if getattr(self, '_is_int', False): + return self.structural_type(random_state.randint(self._effective_lower, self._effective_upper)) + else: + return self.structural_type(random_state.uniform(self._effective_lower, self._effective_upper)) + + elif self.lower is not None and self.upper is not None: + utils.log_once( + logger, + logging.WARNING, + "Sampling a bounded hyper-parameter '%(name)s' with unsupported bounds. Using a default value.", + {'name': self.name}, + stack_info=True, + ) + + return self.get_default() + + else: + utils.log_once( + logger, + logging.WARNING, + "Sampling a semi-bounded hyper-parameter '%(name)s'. Using a default value.", + {'name': self.name}, stack_info=True, + ) + + return self.get_default() + + def get_max_samples(self) -> typing.Optional[int]: + if getattr(self, '_is_int', False): + return self._effective_upper - self._effective_lower + + elif getattr(self, '_is_float', False): + return None + + else: + return 1 + + def sample_multiple(self, min_samples: int = 0, max_samples: int = None, random_state: RandomState = None, *, with_replacement: bool = False) -> typing.Sequence[T]: + """ + Samples multiple random values from the hyper-parameter search space. At least ``min_samples`` + of them, and at most ``max_samples``. + + Parameters + ---------- + min_samples: + A minimum number of samples to return. + max_samples: + A maximum number of samples to return. + random_state: + A random seed or state to be used when sampling. + with_replacement: + Are we sampling with replacement or without? + + Returns + ------- + A set (represented as a tuple) of multiple sampled values. + """ + + min_samples, max_samples = self._check_sample_size(min_samples, max_samples, with_replacement) + + random_state = sklearn_validation.check_random_state(random_state) + + size = random_state.randint(min_samples, max_samples + 1) + + if with_replacement: + sample_list: list = [self.sample(random_state) for i in range(size)] + else: + sample_set: set = set() + sample_list = [] + while len(sample_list) != size: + value = self.sample(random_state) + if value not in sample_set: + sample_set.add(value) + sample_list.append(value) + + return tuple(sample_list) + + def __repr__(self) -> str: + return '{class_name}(lower={lower}, upper={upper}, default={default}, lower_inclusive={lower_inclusive}, upper_inclusive={upper_inclusive})'.format( + class_name=type(self).__name__, + lower=self.lower, + upper=self.upper, + default=self.get_default(), + lower_inclusive=self.lower_inclusive, + upper_inclusive=self.upper_inclusive, + ) + + def to_simple_structure(self) -> typing.Dict: + structure = super().to_simple_structure() + structure.update({ + 'lower': self.lower, + 'upper': self.upper, + 'lower_inclusive': self.lower_inclusive, + 'upper_inclusive': self.upper_inclusive, + }) + return structure + + +class Enumeration(Hyperparameter[T]): + """ + An enumeration hyper-parameter with a value drawn uniformly from a list of values. + + If ``None`` is a valid choice, it should be listed among ``values``. + + Type variable ``T`` is optional and if not provided an attempt to + automatically infer it from ``values`` will be made. + + Attributes + ---------- + values: + A list of choice values. + """ + + values: typing.Sequence[typing.Any] + + def __init__(self, values: typing.Sequence[T], default: T, *, semantic_types: typing.Sequence[str] = None, description: str = None) -> None: + self.values = values + + # Used for sampling. + # See: https://github.com/numpy/numpy/issues/15935 + self._choices = numpy.array(list(self.values), dtype=object) + + # If subclass has not already set it. + if not hasattr(self, 'structural_type'): + structural_type = _get_structural_type_argument(self, T) # type: ignore + + if structural_type == typing.Any: + structural_types = list(self.infer_type(value) for value in self.values) + type_util.simplify_for_Union(structural_types) + structural_type = typing.Union[tuple(structural_types)] # type: ignore + + self.structural_type = structural_type + + for value in self.values: + if not self.check_type(value, self.structural_type): + raise exceptions.InvalidArgumentTypeError("Value '{value}' is not an instance of the structural type: {structural_type}".format(value=value, structural_type=self.structural_type)) + + # This also raises an exception if there is a "1.0" and "1" value in the list, so a float and + # and int of equal value. This is important because when storing as JSON floats can be converted + # to ints it they are integers. So we could not know which enumeration value it represents. + if utils.has_duplicates(self.values): + raise exceptions.InvalidArgumentValueError("Values '{values}' contain duplicates.".format(values=self.values)) + + self._has_nan = any(utils.is_float(type(value)) and numpy.isnan(value) for value in self.values) + + # Default value is checked to be among values by parent class calling "validate". + + super().__init__(default, semantic_types=semantic_types, description=description) + + def validate(self, value: T) -> None: + # We have to specially handle NaN because it is not equal to any value. + if value not in self.values and not (self._has_nan and utils.is_float(type(value)) and numpy.isnan(value)): + raise exceptions.InvalidArgumentValueError("Value '{value}' {for_name}is not among values.".format(value=value, for_name=self._for_name())) + + def sample(self, random_state: RandomState = None) -> T: + """ + Samples a random value from the hyper-parameter search space. + + It samples a value from ``values``. + + Parameters + ---------- + random_state: + A random seed or state to be used when sampling. + + Returns + ------- + A sampled value. + """ + + random_state = sklearn_validation.check_random_state(random_state) + + return random_state.choice(self._choices) + + def get_max_samples(self) -> typing.Optional[int]: + return len(self.values) + + def sample_multiple(self, min_samples: int = 0, max_samples: int = None, random_state: RandomState = None, *, with_replacement: bool = False) -> typing.Sequence[T]: + """ + Samples multiple random values from the hyper-parameter search space. At least ``min_samples`` + of them, and at most ``max_samples``. + + It samples values from ``values``. + + Parameters + ---------- + min_samples: + A minimum number of samples to return. + max_samples: + A maximum number of samples to return. + random_state: + A random seed or state to be used when sampling. + with_replacement: + Are we sampling with replacement or without? + + Returns + ------- + A set (represented as a tuple) of multiple sampled values. + """ + + min_samples, max_samples = self._check_sample_size(min_samples, max_samples, with_replacement) + + random_state = sklearn_validation.check_random_state(random_state) + + size = random_state.randint(min_samples, max_samples + 1) + + return tuple(random_state.choice(self._choices, size, replace=with_replacement)) + + def __repr__(self) -> str: + return '{class_name}(values={values}, default={default})'.format( + class_name=type(self).__name__, + values=self.values, + default=self.get_default(), + ) + + def to_simple_structure(self) -> typing.Dict: + structure = super().to_simple_structure() + structure.update({ + 'values': list(self.values), + }) + return structure + + +class UniformBool(Enumeration[bool]): + """ + A bool hyper-parameter with a value drawn uniformly from ``{True, False}``. + """ + + def __init__(self, default: bool, *, semantic_types: typing.Sequence[str] = None, description: str = None) -> None: + super().__init__([True, False], default, semantic_types=semantic_types, description=description) + + def __repr__(self) -> str: + return '{class_name}(default={default})'.format( + class_name=type(self).__name__, + default=self.get_default(), + ) + + def to_simple_structure(self) -> typing.Dict: + structure = super().to_simple_structure() + del structure['values'] + return structure + + +class UniformInt(Bounded[int]): + """ + An int hyper-parameter with a value drawn uniformly from ``[lower, upper)``, + by default. + + Attributes + ---------- + lower: + A lower bound. + lower_inclusive: + Is the lower bound inclusive? + upper: + An upper bound. + upper_inclusive: + Is the upper bound inclusive? + """ + + lower: int + lower_inclusive: bool + upper: int + upper_inclusive: bool + + def __init__( + self, lower: int, upper: int, default: int, *, lower_inclusive: bool = True, upper_inclusive: bool = False, + semantic_types: typing.Sequence[str] = None, description: str = None, + ) -> None: + # Just to make sure because parent class allow None values. + if lower is None or upper is None: + raise exceptions.InvalidArgumentValueError("Bounds cannot be None.") + + # Default value is checked to be inside bounds by parent class calling "validate". + + super().__init__(lower, upper, default, lower_inclusive=lower_inclusive, upper_inclusive=upper_inclusive, semantic_types=semantic_types, description=description) + + def _initialize_effective_bounds(self) -> None: + self._initialize_effective_bounds_int() + + super()._initialize_effective_bounds() + + def sample(self, random_state: RandomState = None) -> int: + """ + Samples a random value from the hyper-parameter search space. + + Parameters + ---------- + random_state: + A random seed or state to be used when sampling. + + Returns + ------- + A sampled value. + """ + + random_state = sklearn_validation.check_random_state(random_state) + + return self.structural_type(random_state.randint(self._effective_lower, self._effective_upper)) + + def get_max_samples(self) -> typing.Optional[int]: + return self._effective_upper - self._effective_lower + + +class Uniform(Bounded[float]): + """ + A float hyper-parameter with a value drawn uniformly from ``[lower, upper)``, + by default. + + If ``q`` is provided, then the value is drawn according to ``round(uniform(lower, upper) / q) * q``. + + Attributes + ---------- + lower: + A lower bound. + upper: + An upper bound. + q: + An optional quantization factor. + lower_inclusive: + Is the lower bound inclusive? + upper_inclusive: + Is the upper bound inclusive? + """ + + lower: float + upper: float + q: float + lower_inclusive: bool + upper_inclusive: bool + + def __init__( + self, lower: float, upper: float, default: float, q: float = None, *, lower_inclusive: bool = True, upper_inclusive: bool = False, + semantic_types: typing.Sequence[str] = None, description: str = None, + ) -> None: + # Just to make sure because parent class allow None values. + if lower is None or upper is None: + raise exceptions.InvalidArgumentValueError("Bounds cannot be None.") + + self.q = q + + # Default value is checked to be inside bounds by parent class calling "validate". + + super().__init__(lower, upper, default, lower_inclusive=lower_inclusive, upper_inclusive=upper_inclusive, semantic_types=semantic_types, description=description) + + def _initialize_effective_bounds(self) -> None: + self._initialize_effective_bounds_float() + + super()._initialize_effective_bounds() + + def sample(self, random_state: RandomState = None) -> float: + """ + Samples a random value from the hyper-parameter search space. + + Parameters + ---------- + random_state: + A random seed or state to be used when sampling. + + Returns + ------- + A sampled value. + """ + + random_state = sklearn_validation.check_random_state(random_state) + + value = random_state.uniform(self._effective_lower, self._effective_upper) + + if self.q is None: + return self.structural_type(value) + else: + return self.structural_type(numpy.round(value / self.q) * self.q) + + def get_max_samples(self) -> typing.Optional[int]: + return None + + def __repr__(self) -> str: + return '{class_name}(lower={lower}, upper={upper}, q={q}, default={default}, lower_inclusive={lower_inclusive}, upper_inclusive={upper_inclusive})'.format( + class_name=type(self).__name__, + lower=self.lower, + upper=self.upper, + q=self.q, + default=self.get_default(), + lower_inclusive=self.lower_inclusive, + upper_inclusive=self.upper_inclusive, + ) + + def to_simple_structure(self) -> typing.Dict: + structure = super().to_simple_structure() + + structure.update({ + 'lower': self.lower, + 'upper': self.upper, + 'lower_inclusive': self.lower_inclusive, + 'upper_inclusive': self.upper_inclusive, + }) + + if self.q is not None: + structure['q'] = self.q + + return structure + + +class LogUniform(Bounded[float]): + """ + A float hyper-parameter with a value drawn from ``[lower, upper)``, by default, + according to ``exp(uniform(log(lower), log(upper)))`` + so that the logarithm of the value is uniformly distributed. + + If ``q`` is provided, then the value is drawn according to ``round(exp(uniform(log(lower), log(upper))) / q) * q``. + + Attributes + ---------- + lower: + A lower bound. + upper: + An upper bound. + q: + An optional quantization factor. + lower_inclusive: + Is the lower bound inclusive? + upper_inclusive: + Is the upper bound inclusive? + """ + + lower: float + upper: float + q: float + lower_inclusive: bool + upper_inclusive: bool + + def __init__( + self, lower: float, upper: float, default: float, q: float = None, *, lower_inclusive: bool = True, upper_inclusive: bool = False, + semantic_types: typing.Sequence[str] = None, description: str = None, + ) -> None: + # Just to make sure because parent class allow None values. + if lower is None or upper is None: + raise exceptions.InvalidArgumentValueError("Bounds cannot be None.") + + self.q = q + + # Default value is checked to be inside bounds by parent class calling "validate". + + super().__init__(lower, upper, default, lower_inclusive=lower_inclusive, upper_inclusive=upper_inclusive, semantic_types=semantic_types, description=description) + + def _initialize_effective_bounds(self) -> None: + self._initialize_effective_bounds_float() + + super()._initialize_effective_bounds() + + def sample(self, random_state: RandomState = None) -> float: + """ + Samples a random value from the hyper-parameter search space. + + Parameters + ---------- + random_state: + A random seed or state to be used when sampling. + + Returns + ------- + A sampled value. + """ + + random_state = sklearn_validation.check_random_state(random_state) + + value = numpy.exp(random_state.uniform(numpy.log(self._effective_lower), numpy.log(self._effective_upper))) + + if self.q is None: + return self.structural_type(value) + else: + return self.structural_type(numpy.round(value / self.q) * self.q) + + def get_max_samples(self) -> typing.Optional[int]: + return None + + def __repr__(self) -> str: + return '{class_name}(lower={lower}, upper={upper}, q={q}, default={default}, lower_inclusive={lower_inclusive}, upper_inclusive={upper_inclusive})'.format( + class_name=type(self).__name__, + lower=self.lower, + upper=self.upper, + q=self.q, + default=self.get_default(), + lower_inclusive=self.lower_inclusive, + upper_inclusive=self.upper_inclusive, + ) + + def to_simple_structure(self) -> typing.Dict: + structure = super().to_simple_structure() + + structure.update({ + 'lower': self.lower, + 'upper': self.upper, + 'lower_inclusive': self.lower_inclusive, + 'upper_inclusive': self.upper_inclusive, + }) + + if self.q is not None: + structure['q'] = self.q + + return structure + + +class Normal(Hyperparameter[float]): + """ + A float hyper-parameter with a value drawn normally distributed according to ``mu`` and ``sigma``. + + If ``q`` is provided, then the value is drawn according to ``round(normal(mu, sigma) / q) * q``. + + Attributes + ---------- + mu: + A mean of normal distribution. + sigma: + A standard deviation of normal distribution. + q: + An optional quantization factor. + """ + + mu: float + sigma: float + q: float + + def __init__(self, mu: float, sigma: float, default: float, q: float = None, *, semantic_types: typing.Sequence[str] = None, description: str = None) -> None: + self.mu = mu + self.sigma = sigma + self.q = q + + self._validate_finite_float(self.mu) + self._validate_finite_float(self.sigma) + self._validate_finite_float(self.q) + + super().__init__(default, semantic_types=semantic_types, description=description) + + def sample(self, random_state: RandomState = None) -> float: + """ + Samples a random value from the hyper-parameter search space. + + Parameters + ---------- + random_state: + A random seed or state to be used when sampling. + + Returns + ------- + A sampled value. + """ + + random_state = sklearn_validation.check_random_state(random_state) + + value = random_state.normal(self.mu, self.sigma) + + if self.q is None: + return self.structural_type(value) + else: + return self.structural_type(numpy.round(value / self.q) * self.q) + + def get_max_samples(self) -> typing.Optional[int]: + return None + + def sample_multiple(self, min_samples: int = 0, max_samples: int = None, random_state: RandomState = None, *, with_replacement: bool = False) -> typing.Sequence[T]: + """ + Samples multiple random values from the hyper-parameter search space. At least ``min_samples`` + of them, and at most ``max_samples``. + + Parameters + ---------- + min_samples: + A minimum number of samples to return. + max_samples: + A maximum number of samples to return. + random_state: + A random seed or state to be used when sampling. + + Returns + ------- + A set (represented as a tuple) of multiple sampled values. + """ + + min_samples, max_samples = self._check_sample_size(min_samples, max_samples, with_replacement) + + random_state = sklearn_validation.check_random_state(random_state) + + size = random_state.randint(min_samples, max_samples + 1) + + if with_replacement: + sample_list: list = [self.sample(random_state) for i in range(size)] + else: + sample_set: set = set() + sample_list = [] + while len(sample_list) != size: + value = self.sample(random_state) + if value not in sample_set: + sample_set.add(value) + sample_list.append(value) + + return tuple(sample_list) + + def __repr__(self) -> str: + return '{class_name}(mu={mu}, sigma={sigma}, q={q}, default={default})'.format( + class_name=type(self).__name__, + mu=self.mu, + sigma=self.sigma, + q=self.q, + default=self.get_default(), + ) + + def to_simple_structure(self) -> typing.Dict: + structure = super().to_simple_structure() + + structure.update({ + 'mu': self.mu, + 'sigma': self.sigma, + }) + + if self.q is not None: + structure['q'] = self.q + + return structure + + +class LogNormal(Hyperparameter[float]): + """ + A float hyper-parameter with a value drawn according to ``exp(normal(mu, sigma))`` so that the logarithm of the value is + normally distributed. + + If ``q`` is provided, then the value is drawn according to ``round(exp(normal(mu, sigma)) / q) * q``. + + Attributes + ---------- + mu: + A mean of normal distribution. + sigma: + A standard deviation of normal distribution. + q: + An optional quantization factor. + """ + + mu: float + sigma: float + q: float + + def __init__(self, mu: float, sigma: float, default: float, q: float = None, *, semantic_types: typing.Sequence[str] = None, description: str = None) -> None: + self.mu = mu + self.sigma = sigma + self.q = q + + self._validate_finite_float(self.mu) + self._validate_finite_float(self.sigma) + self._validate_finite_float(self.q) + + super().__init__(default, semantic_types=semantic_types, description=description) + + def sample(self, random_state: RandomState = None) -> float: + """ + Samples a random value from the hyper-parameter search space. + + Parameters + ---------- + random_state: + A random seed or state to be used when sampling. + + Returns + ------- + A sampled value. + """ + + random_state = sklearn_validation.check_random_state(random_state) + + value = numpy.exp(random_state.normal(self.mu, self.sigma)) + + if self.q is None: + return self.structural_type(value) + else: + return self.structural_type(numpy.round(value / self.q) * self.q) + + def get_max_samples(self) -> typing.Optional[int]: + return None + + def sample_multiple(self, min_samples: int = 0, max_samples: int = None, random_state: RandomState = None, *, with_replacement: bool = False) -> typing.Sequence[T]: + """ + Samples multiple random values from the hyper-parameter search space. At least ``min_samples`` + of them, and at most ``max_samples``. + + Parameters + ---------- + min_samples: + A minimum number of samples to return. + max_samples: + A maximum number of samples to return. + random_state: + A random seed or state to be used when sampling. + with_replacement: + Are we sampling with replacement or without? + + Returns + ------- + A set (represented as a tuple) of multiple sampled values. + """ + + min_samples, max_samples = self._check_sample_size(min_samples, max_samples, with_replacement) + + random_state = sklearn_validation.check_random_state(random_state) + + size = random_state.randint(min_samples, max_samples + 1) + + if with_replacement: + sample_list: list = [self.sample(random_state) for i in range(size)] + else: + sample_set: set = set() + sample_list = [] + while len(sample_list) != size: + value = self.sample(random_state) + if value not in sample_set: + sample_set.add(value) + sample_list.append(value) + + return tuple(sample_list) + + def __repr__(self) -> str: + return '{class_name}(mu={mu}, sigma={sigma}, q={q}, default={default})'.format( + class_name=type(self).__name__, + mu=self.mu, + sigma=self.sigma, + q=self.q, + default=self.get_default(), + ) + + def to_simple_structure(self) -> typing.Dict: + structure = super().to_simple_structure() + + structure.update({ + 'mu': self.mu, + 'sigma': self.sigma, + }) + + if self.q is not None: + structure['q'] = self.q + + return structure + + +class Union(Hyperparameter[T]): + """ + A union hyper-parameter which combines multiple other hyper-parameters. + + This is useful when a hyper-parameter has multiple modalities and each modality + can be described with a different hyper-parameter. + + No relation or probability distribution between modalities is prescribed, but + default sampling implementation assumes uniform distribution of modalities. + + Type variable ``T`` does not have to be specified because the structural type + can be automatically inferred as a union of all hyper-parameters in configuration. + + This is similar to `Choice` hyper-parameter that it combines hyper-parameters, but + `Union` combines individual hyper-parameters, while `Choice` combines configurations + of multiple hyper-parameters. + + Attributes + ---------- + configuration: + A configuration of hyper-parameters to combine into one. It is important + that configuration uses an ordered dict so that order is reproducible + (default dict has unspecified order). + """ + + configuration: frozendict.FrozenOrderedDict + + def __init__(self, configuration: 'collections.OrderedDict[str, Hyperparameter]', default: str, *, semantic_types: typing.Sequence[str] = None, + description: str = None) -> None: + if default not in configuration: + raise exceptions.InvalidArgumentValueError("Default value '{default}' is not in configuration.".format(default=default)) + + self.default_hyperparameter = configuration[default] + self.configuration = frozendict.FrozenOrderedDict(configuration) + + # Used for sampling. + # See: https://github.com/numpy/numpy/issues/15935 + self._choices = numpy.array(list(self.configuration.values()), dtype=object) + + for name, hyperparameter in self.configuration.items(): + if not isinstance(name, str): + raise exceptions.InvalidArgumentTypeError("Hyper-parameter name is not a string: {name}".format(name=name)) + if not isinstance(hyperparameter, Hyperparameter): + raise exceptions.InvalidArgumentTypeError("Hyper-parameter description is not an instance of the Hyperparameter class: {name}".format(name=name)) + + # If subclass has not already set it. + if not hasattr(self, 'structural_type'): + structural_type = _get_structural_type_argument(self, T) # type: ignore + + if structural_type == typing.Any: + structural_type = typing.Union[tuple(hyperparameter.structural_type for hyperparameter in self.configuration.values())] # type: ignore + + self.structural_type = structural_type + + for name, hyperparameter in self.configuration.items(): + if not utils.is_subclass(hyperparameter.structural_type, self.structural_type): + raise exceptions.InvalidArgumentTypeError( + "Hyper-parameter '{name}' is not a subclass of the structural type: {structural_type}".format( + name=name, structural_type=self.structural_type, + ) + ) + + super().__init__(self.configuration[default].get_default(), semantic_types=semantic_types, description=description) + + def contribute_to_class(self, name: str) -> None: + super().contribute_to_class(name) + + for hyperparameter_name, hyperparameter in self.configuration.items(): + hyperparameter.contribute_to_class('{name}.{hyperparameter_name}'.format(name=self.name, hyperparameter_name=hyperparameter_name)) + + def validate(self, value: T) -> None: + # Check that value belongs to the structural type. + super().validate(value) + + for name, hyperparameter in self.configuration.items(): + try: + hyperparameter.validate(value) + # Value validated with at least one hyper-parameter, we can return. + return + except Exception: + pass + + raise exceptions.InvalidArgumentValueError("Value '{value}' {for_name}has not validated with any of configured hyper-parameters.".format(value=value, for_name=self._for_name())) + + def value_to_json_structure(self, value: T) -> typing.Any: + # We could first call "self.validate" and then once more traverse configuration, + # but we instead re-implement validation like it is implemented in "self.validate", + # but also convert the value once we find configuration which passes validation. + + # Check that value belongs to the structural type. + super().validate(value) + + for name, hyperparameter in self.configuration.items(): + try: + hyperparameter.validate(value) + # Value validated with this hyper-parameter. + return { + 'case': name, + 'value': hyperparameter.value_to_json_structure(value), + } + except Exception: + pass + + raise exceptions.InvalidArgumentValueError("Value '{value}' {for_name}has not validated with any of configured hyper-parameters.".format(value=value, for_name=self._for_name())) + + def value_from_json_structure(self, json: typing.Any) -> T: + if isinstance(json, dict): + value = self.configuration[json['case']].value_from_json_structure(json['value']) + + # No need to traverse configuration again, configuration's + # "value_from_json_structure" already validated the value. + # We just check that value belongs to the structural type. + super().validate(value) + + else: + # Backwards compatibility. We just take value as-is and hope JSON encoding has + # not changed the type from float to int in a way that it breaks the primitive. + logger.warning("Converting union hyper-parameter '%(name)s' from a deprecated JSON structure. It might be converted badly.", {'name': self.name}) + + value = super().value_to_json_structure(json) + + return value + + def sample(self, random_state: RandomState = None) -> T: + """ + Samples a random value from the hyper-parameter search space. + + It first chooses a hyper-parameter from its configuration and then + samples it. + + Parameters + ---------- + random_state: + A random seed or state to be used when sampling. + + Returns + ------- + A sampled value. + """ + + random_state = sklearn_validation.check_random_state(random_state) + + hyperparameter = random_state.choice(self._choices) + + return hyperparameter.sample(random_state) + + @functools.lru_cache() + def get_max_samples(self) -> typing.Optional[int]: # type: ignore + all_max_samples = 0 + for hyperparameter in self.configuration.values(): + hyperparameter_max_samples = hyperparameter.get_max_samples() + if hyperparameter_max_samples is None: + return None + else: + # TODO: Assumption here is that values between hyper-parameters are independent. What when they are not? + # For example, union of UniformInt(0, 10) and UniformInt(5, 15) does not have 20 samples, but only 15 possible. + all_max_samples += hyperparameter_max_samples + + return all_max_samples + + def sample_multiple(self, min_samples: int = 0, max_samples: int = None, random_state: RandomState = None, *, with_replacement: bool = False) -> typing.Sequence[T]: + """ + Samples multiple random values from the hyper-parameter search space. At least ``min_samples`` + of them, and at most ``max_samples``. + + Parameters + ---------- + min_samples: + A minimum number of samples to return. + max_samples: + A maximum number of samples to return. + random_state: + A random seed or state to be used when sampling. + with_replacement: + Are we sampling with replacement or without? + + Returns + ------- + A set (represented as a tuple) of multiple sampled values. + """ + + min_samples, max_samples = self._check_sample_size(min_samples, max_samples, with_replacement) + + random_state = sklearn_validation.check_random_state(random_state) + + size = random_state.randint(min_samples, max_samples + 1) + + if with_replacement: + sample_list: list = [self.sample(random_state) for i in range(size)] + else: + sample_set: set = set() + sample_list = [] + while len(sample_list) != size: + value = self.sample(random_state) + if value not in sample_set: + sample_set.add(value) + sample_list.append(value) + + return tuple(sample_list) + + @functools.lru_cache() + def __repr__(self) -> str: # type: ignore + return '{class_name}(configuration={{{configuration}}}, default={default})'.format( + class_name=type(self).__name__, + configuration=', '.join('{name}: {hyperparameter}'.format(name=name, hyperparameter=hyperparameter) for name, hyperparameter in self.configuration.items()), + default=self.get_default(), + ) + + @functools.lru_cache() + def to_simple_structure(self) -> typing.Dict: # type: ignore + structure = super().to_simple_structure() + structure.update({ + 'configuration': {name: hyperparameter.to_simple_structure() for name, hyperparameter in self.configuration.items()} + }) + return structure + + def traverse(self) -> 'typing.Iterator[Hyperparameter]': + yield from super().traverse() + + for hyperparameter in self.configuration.values(): + yield hyperparameter + yield from hyperparameter.traverse() + + +class Choice(Hyperparameter[typing.Dict]): + """ + A hyper-parameter which combines multiple hyper-parameter configurations into one + hyper-parameter. + + This is useful when a combination of hyper-parameters should exists together. + Then such combinations can be made each into one choice. + + No relation or probability distribution between choices is prescribed. + + This is similar to `Union` hyper-parameter that it combines hyper-parameters, but + `Choice` combines configurations of multiple hyper-parameters, while `Union` combines + individual hyper-parameters. + + Attributes + ---------- + choices: + A map between choices and their classes defining their hyper-parameters configuration. + """ + + choices: frozendict.frozendict + + def __init__(self, choices: 'typing.Dict[str, typing.Type[Hyperparams]]', default: str, *, semantic_types: typing.Sequence[str] = None, + description: str = None) -> None: + if default not in choices: + raise exceptions.InvalidArgumentValueError("Default value '{default}' is not among choices.".format(default=default)) + + choices = copy.copy(choices) + + for choice, hyperparams in choices.items(): + if not isinstance(choice, str): + raise exceptions.InvalidArgumentTypeError("Choice is not a string: {choice}".format(choice=choice)) + if not issubclass(hyperparams, Hyperparams): + raise exceptions.InvalidArgumentTypeError("Hyper-parameters space is not a subclass of 'Hyperparams' class: {choice}".format(choice=choice)) + if 'choice' in hyperparams.configuration: + raise ValueError("Hyper-parameters space contains a reserved hyper-paramater name 'choice': {choice}".format(choice=choice)) + + configuration = collections.OrderedDict(hyperparams.configuration) + configuration['choice'] = Hyperparameter[str](choice, semantic_types=['https://metadata.datadrivendiscovery.org/types/ChoiceParameter']) + + # We make a copy/subclass adding "choice" hyper-parameter. We add a name suffix to differentiate it from the parent class. + choices[choice] = hyperparams.define(configuration, class_name='{name}WithChoice'.format(name=hyperparams.__name__), module_name=hyperparams.__module__) + + self.default_hyperparams = choices[default] + self.choices = frozendict.frozendict(choices) + + # Used for sampling. + # See: https://github.com/numpy/numpy/issues/15935 + self._choices = numpy.array(list(self.choices.keys()), dtype=object) + + # Copy defaults and add "choice". + defaults = self.choices[default](self.choices[default].defaults(), choice=default) + + # If subclass has not already set it. + if not hasattr(self, 'structural_type'): + # Choices do not really have a free type argument, so this is probably the same as "dict". + self.structural_type = _get_structural_type_argument(self, T) + + super().__init__(defaults, semantic_types=semantic_types, description=description) + + # We go over all hyper-parameter configurations and set their names. This means that names should not already + # be set. This is by default so if "Hyperparams.define" is used, but if one defines a custom class, + # you have to define it like "class MyHyperparams(Hyperparams, set_names=False): ..." + def contribute_to_class(self, name: str) -> None: + super().contribute_to_class(name) + + for choice, hyperparams in self.choices.items(): + for hyperparameter_name, hyperparameter in hyperparams.configuration.items(): + hyperparameter.contribute_to_class('{name}.{choice}.{hyperparameter_name}'.format(name=self.name, choice=choice, hyperparameter_name=hyperparameter_name)) + + def get_default(self, path: str = None) -> typing.Any: + if path is None: + return super().get_default(path) + + if '.' not in path: + return self.choices[path].defaults() + else: + segment, rest = path.split('.', 1) + return self.choices[segment].defaults(rest) + + def validate(self, value: dict) -> None: + # Check that value belongs to the structural type, a dict. + super().validate(value) + + if 'choice' not in value: + raise exceptions.InvalidArgumentValueError("'choice' is missing in '{value}' {for_name}.".format(value=value, for_name=self._for_name())) + + self.choices[value['choice']].validate(value) + + def sample(self, random_state: RandomState = None) -> dict: + """ + Samples a random value from the hyper-parameter search space. + + It first chooses a hyper-parameters configuration from available choices and then + samples it. + + Parameters + ---------- + random_state: + A random seed or state to be used when sampling. + + Returns + ------- + A sampled value. + """ + + random_state = sklearn_validation.check_random_state(random_state) + + choice = random_state.choice(self._choices) + + sample = self.choices[choice].sample(random_state) + + # The "choice" hyper-parameter should be sampled to its choice value. + assert choice == sample['choice'], sample + + return sample + + @functools.lru_cache() + def get_max_samples(self) -> typing.Optional[int]: # type: ignore + all_max_samples = 0 + for hyperparams in self.choices.values(): + hyperparams_max_samples = hyperparams.get_max_samples() + if hyperparams_max_samples is None: + return None + else: + all_max_samples += hyperparams_max_samples + return all_max_samples + + def sample_multiple(self, min_samples: int = 0, max_samples: int = None, random_state: RandomState = None, *, with_replacement: bool = False) -> typing.Sequence[T]: + """ + Samples multiple random values from the hyper-parameter search space. At least ``min_samples`` + of them, and at most ``max_samples``. + + Parameters + ---------- + min_samples: + A minimum number of samples to return. + max_samples: + A maximum number of samples to return. + random_state: + A random seed or state to be used when sampling. + with_replacement: + Are we sampling with replacement or without? + + Returns + ------- + A set (represented as a tuple) of multiple sampled values. + """ + + min_samples, max_samples = self._check_sample_size(min_samples, max_samples, with_replacement) + + random_state = sklearn_validation.check_random_state(random_state) + + size = random_state.randint(min_samples, max_samples + 1) + + if with_replacement: + sample_list: list = [self.sample(random_state) for i in range(size)] + else: + sample_set: set = set() + sample_list = [] + while len(sample_list) != size: + value = self.sample(random_state) + if value not in sample_set: + sample_set.add(value) + sample_list.append(value) + + return tuple(sample_list) + + @functools.lru_cache() + def __repr__(self) -> str: # type: ignore + return '{class_name}(choices={{{choices}}}, default={default})'.format( + class_name=type(self).__name__, + choices=', '.join('{choice}: {hyperparams}'.format(choice=choice, hyperparams=hyperparams) for choice, hyperparams in self.choices.items()), + default=self.get_default(), + ) + + @functools.lru_cache() + def to_simple_structure(self) -> typing.Dict: # type: ignore + structure = super().to_simple_structure() + structure.update({ + 'choices': {choice: hyperparams.to_simple_structure() for choice, hyperparams in self.choices.items()} + }) + return structure + + @deprecate.function(message="use value_to_json_structure method instead") + def value_to_json(self, value: dict) -> typing.Any: + return self.value_to_json_structure(value) + + def value_to_json_structure(self, value: dict) -> typing.Any: + self.validate(value) + + return self.choices[value['choice']](value).values_to_json_structure() + + @deprecate.function(message="use value_from_json_structure method instead") + def value_from_json(self, json: typing.Any) -> dict: + return self.value_from_json_structure(json) + + def value_from_json_structure(self, json: typing.Any) -> dict: + value = self.choices[json['choice']].values_from_json_structure(json) + + self.validate(value) + + return value + + def traverse(self) -> 'typing.Iterator[Hyperparameter]': + yield from super().traverse() + + for hyperparams in self.choices.values(): + yield from hyperparams.traverse() + + def transform_value(self, value: dict, transform: typing.Callable, index: int = 0) -> dict: + if 'choice' not in value: + raise exceptions.InvalidArgumentValueError("'choice' is missing in '{value}' {for_name}.".format(value=value, for_name=self._for_name())) + + return self.choices[value['choice']].transform_value(value, transform, index + sorted(self.choices.keys()).index(value['choice'])) + + +# TODO: "elements" hyper-parameter still needs a default. Can we get rid of that somehow? It is not used. +# Maybe we should require that just top-level hyper-parameter instances need defaults, but not all. +class _Sequence(Hyperparameter[S]): + """ + Abstract class. Do not use directly. + + Attributes + ---------- + elements: + A hyper-parameter or hyper-parameters configuration of set elements. + min_size: + A minimal number of elements in the set. + max_size: + A maximal number of elements in the set. Can be ``None`` for no limit. + is_configuration: + Is ``elements`` a hyper-parameter or hyper-parameters configuration? + """ + + elements: 'typing.Union[Hyperparameter, typing.Type[Hyperparams]]' + min_size: int + max_size: int + is_configuration: bool + + def __init__( + self, elements: 'typing.Union[Hyperparameter, typing.Type[Hyperparams]]', default: S, min_size: int = 0, max_size: int = None, *, + semantic_types: typing.Sequence[str] = None, description: str = None, + ) -> None: + self.elements = elements + self.min_size = min_size + self.max_size = max_size + self.is_configuration = utils.is_type(self.elements) and issubclass(typing.cast(type, self.elements), Hyperparams) + + if not isinstance(self.elements, Hyperparameter) and not self.is_configuration: + raise exceptions.InvalidArgumentTypeError("'elements' argument is not an instance of the Hyperparameter class or a subclass of the Hyperparams class.") + + if not isinstance(self.min_size, int): + raise exceptions.InvalidArgumentTypeError("'min_size' argument is not an int.") + if self.min_size < 0: + raise exceptions.InvalidArgumentValueError("'min_size' cannot be smaller than 0.") + if self.max_size is not None: + if not isinstance(self.max_size, int): + raise exceptions.InvalidArgumentTypeError("'max_size' argument is not an int.") + if self.min_size > self.max_size: + raise exceptions.InvalidArgumentValueError("'min_size' cannot be larger than 'max_size'.") + + # If subclass has not already set it. + if not hasattr(self, 'structural_type'): + structural_type = _get_structural_type_argument(self, S) # type: ignore + + if structural_type == typing.Any: + if self.is_configuration: + structural_type = typing.Sequence[self.elements] # type: ignore + else: + structural_type = typing.Sequence[elements.structural_type] # type: ignore + + self.structural_type = structural_type + + if not utils.is_subclass(self.structural_type, typing.Sequence): + raise exceptions.InvalidArgumentTypeError("Structural type is not a subclass of a sequence.") + + elements_type = utils.get_type_arguments(self.structural_type)[typing.T_co] # type: ignore + if self.is_configuration: + if elements_type is not self.elements: + raise exceptions.InvalidArgumentTypeError("Structural type does not match hyper-parameters configuration type.") + else: + if elements_type is not elements.structural_type: + raise exceptions.InvalidArgumentTypeError("Structural type does not match elements hyper-parameter's structural type.") + + # Default value is checked by parent class calling "validate". + + super().__init__(default, semantic_types=semantic_types, description=description) + + # We go over the hyper-parameters configuration and set their names. This means that names should not already + # be set. This is by default so if "Hyperparams.define" is used, but if one defines a custom class, + # you have to define it like "class MyHyperparams(Hyperparams, set_names=False): ..." + def contribute_to_class(self, name: str) -> None: + super().contribute_to_class(name) + + if self.is_configuration: + for hyperparameter_name, hyperparameter in typing.cast(typing.Type[Hyperparams], self.elements).configuration.items(): + hyperparameter.contribute_to_class('{name}.{hyperparameter_name}'.format(name=self.name, hyperparameter_name=hyperparameter_name)) + else: + self.elements.contribute_to_class('{name}.elements'.format(name=self.name)) + + def get_default(self, path: str = None) -> typing.Any: + # If "path" is "None" we want to return what was set as a default for this hyper-parameter + # which might be different than hyper-parameters configuration defaults. + if path is None or not self.is_configuration: + return super().get_default(path) + else: + return typing.cast(Hyperparams, self.elements).defaults(path) + + def validate(self, value: S) -> None: + # Check that value belongs to the structural type. + super().validate(value) + + cast_value = typing.cast(typing.Sequence, value) + + for v in cast_value: + self.elements.validate(v) + + if not self.min_size <= len(cast_value): + raise exceptions.InvalidArgumentValueError("Value '{value}' {for_name}has less than {min_size} elements.".format(value=value, for_name=self._for_name(), min_size=self.min_size)) + if self.max_size is not None and not len(cast_value) <= self.max_size: + raise exceptions.InvalidArgumentValueError("Value '{value}' {for_name}has more than {max_size} elements.".format(value=value, for_name=self._for_name(), max_size=self.max_size)) + + @abc.abstractmethod + def sample(self, random_state: RandomState = None) -> S: + pass + + @abc.abstractmethod + def get_max_samples(self) -> typing.Optional[int]: + pass + + @abc.abstractmethod + def sample_multiple(self, min_samples: int = 0, max_samples: int = None, random_state: RandomState = None, *, with_replacement: bool = False) -> typing.Sequence[S]: + pass + + def __repr__(self) -> str: + return '{class_name}(elements={elements}, default={default}, min_size={min_size}, max_size={max_size})'.format( + class_name=type(self).__name__, + elements=self.elements, + default=self.get_default(), + min_size=self.min_size, + max_size=self.max_size, + ) + + @functools.lru_cache() + def to_simple_structure(self) -> typing.Dict: # type: ignore + structure = super().to_simple_structure() + structure.update({ + 'elements': self.elements.to_simple_structure(), + 'is_configuration': self.is_configuration, + 'min_size': self.min_size, + }) + + if self.max_size is not None: + structure['max_size'] = self.max_size + + return structure + + @deprecate.function(message="use value_to_json_structure method instead") + def value_to_json(self, value: S) -> typing.Any: + return self.value_to_json_structure(value) + + def value_to_json_structure(self, value: S) -> typing.Any: + self.validate(value) + + if self.is_configuration: + return [typing.cast(typing.Type[Hyperparams], self.elements)(v).values_to_json_structure() for v in typing.cast(typing.Sequence, value)] + else: + return [self.elements.value_to_json_structure(v) for v in typing.cast(typing.Sequence, value)] + + @deprecate.function(message="use value_from_json_structure method instead") + def value_from_json(self, json: typing.Any) -> S: + return self.value_from_json_structure(json) + + def value_from_json_structure(self, json: typing.Any) -> S: + if self.is_configuration: + value = typing.cast(S, tuple(typing.cast(typing.Type[Hyperparams], self.elements).values_from_json_structure(j) for j in json)) + else: + value = typing.cast(S, tuple(self.elements.value_from_json_structure(j) for j in json)) + + self.validate(value) + + return value + + def traverse(self) -> 'typing.Iterator[Hyperparameter]': + yield from super().traverse() + + if self.is_configuration: + yield from self.elements.traverse() + else: + yield self.elements + + def transform_value(self, value: S, transform: typing.Callable, index: int = 0) -> S: + cast_value = typing.cast(typing.Sequence, value) + + # We assume here that we can make a new instance of the sequence-type used + # for "value" by providing an iterator of new values to its constructor. + # This works for tuples which we are using by default to represent a set. + return type(value)(self.elements.transform_value(v, transform, index + i) for i, v in enumerate(cast_value)) # type: ignore + + def can_accept_value_type(self, structural_type: typing.Union[type, typing.List[type]]) -> bool: + if not isinstance(structural_type, typing.List): + # For parent method to return "False" because for "Set" hyper-parameter it has to be a list of types. + return super().can_accept_value_type(structural_type) + + if not self.min_size <= len(structural_type): + return False + if self.max_size is not None and not len(structural_type) <= self.max_size: + return False + + for st in structural_type: + if not self.elements.can_accept_value_type(st): + return False + + return True + + +class Set(_Sequence[S]): + """ + A set hyper-parameter which samples without replacement multiple times another hyper-parameter or hyper-parameters configuration. + + This is useful when a primitive is interested in more than one value of a hyper-parameter or hyper-parameters configuration. + + Values are represented as tuples of unique elements. The order of elements does not matter (two different orders of same + elements represent the same value), but order is meaningful and preserved to assure reproducibility. + + Type variable ``S`` does not have to be specified because the structural type + is a set from provided elements. + """ + + def validate(self, value: S) -> None: + super().validate(value) + + cast_value = typing.cast(typing.Sequence, value) + + if utils.has_duplicates(cast_value): + raise exceptions.InvalidArgumentValueError("Value '{value}' {for_name}has duplicate elements.".format(value=value, for_name=self._for_name())) + + def sample(self, random_state: RandomState = None) -> S: + """ + Samples a random value from the hyper-parameter search space. + + It first randomly chooses the size of the resulting sampled set + and then samples this number of unique elements. + + Parameters + ---------- + random_state: + A random seed or state to be used when sampling. + + Returns + ------- + A sampled value. + """ + + elements_max_samples = self.elements.get_max_samples() + if elements_max_samples is not None and elements_max_samples < self.min_size: + utils.log_once( + logger, + logging.WARNING, + "Elements hyper-parameter for hyper-parameter '%(name)s' cannot provide enough samples " + "(maximum %(elements_max_samples)s) to sample a set of at least %(min_size)s elements. Using a default value.", + {'name': self.name, 'elements_max_samples': elements_max_samples, 'min_size': self.min_size}, + stack_info=True, + ) + + return self.get_default() + + return self.elements.sample_multiple(min_samples=self.min_size, max_samples=self.max_size, random_state=random_state, with_replacement=False) # type: ignore + + @functools.lru_cache() + def get_max_samples(self) -> typing.Optional[int]: # type: ignore + max_samples = self.elements.get_max_samples() + if max_samples is None: + return None + elif max_samples < self.min_size: + # Theoretically this would be 0, but we sample with default value in this case. + return 1 + elif self.max_size is None: + return 2 ** max_samples - sum(scipy_special.comb(max_samples, j, exact=True) for j in range(self.min_size)) + else: + return sum(scipy_special.comb(max_samples, k, exact=True) for k in range(self.min_size, self.max_size + 1)) + + def sample_multiple(self, min_samples: int = 0, max_samples: int = None, random_state: RandomState = None, *, with_replacement: bool = False) -> typing.Sequence[S]: + """ + Samples multiple random values from the hyper-parameter search space. At least ``min_samples`` + of them, and at most ``max_samples``. + + Parameters + ---------- + min_samples: + A minimum number of samples to return. + max_samples: + A maximum number of samples to return. + random_state: + A random seed or state to be used when sampling. + with_replacement: + Are we sampling with replacement or without? + + Returns + ------- + A set (represented as a tuple) of multiple sampled values. + """ + + min_samples, max_samples = self._check_sample_size(min_samples, max_samples, with_replacement) + + random_state = sklearn_validation.check_random_state(random_state) + + size = random_state.randint(min_samples, max_samples + 1) + + if with_replacement: + sample_list: list = [self.sample(random_state) for i in range(size)] + else: + sample_set: set = set() + sample_list = [] + while len(sample_list) != size: + value = self.sample(random_state) + value_set: frozenset = frozenset(value) + if value_set not in sample_set: + sample_set.add(value_set) + sample_list.append(value) + + return tuple(sample_list) + + +class SortedSet(Set[S]): + """ + Similar to `Set` hyper-parameter, but elements of values are required to be sorted from smallest to largest, by default. + + Hyper-parameters configuration as elements is not supported. + + Attributes + ---------- + ascending: + Are values required to be sorted from smallest to largest (``True``) or the opposite (``False``). + """ + + ascending: bool + + def __init__( + self, elements: Hyperparameter, default: S, min_size: int = 0, max_size: int = None, *, + ascending: bool = True, semantic_types: typing.Sequence[str] = None, description: str = None, + ) -> None: + self.ascending = ascending + + if self.ascending: + self._compare = operator.lt + else: + self._compare = operator.gt + + super().__init__(elements, default, min_size, max_size, semantic_types=semantic_types, description=description) + + if self.is_configuration: + raise exceptions.NotSupportedError("Hyper-parameters configuration as elements is not supported.") + + def validate(self, value: S) -> None: + super().validate(value) + + if not all(self._compare(a, b) for a, b in zip(value, value[1:])): # type: ignore + raise exceptions.InvalidArgumentValueError("Value '{value}' {for_name}is not sorted.".format(value=value, for_name=self._for_name())) + + def sample(self, random_state: RandomState = None) -> S: + values = super().sample(random_state) + return type(values)(sorted(values, reverse=not self.ascending)) + + def to_simple_structure(self) -> typing.Dict: # type: ignore + structure = super().to_simple_structure() + structure['ascending'] = self.ascending + del structure['is_configuration'] + return structure + + +class List(_Sequence[S]): + """ + A list hyper-parameter which samples with replacement multiple times another hyper-parameter or hyper-parameters configuration. + + This is useful when a primitive is interested in more than one value of a hyper-parameter or hyper-parameters configuration. + + Values are represented as tuples of elements. The order of elements matters and is preserved but is not prescribed. + + Type variable ``S`` does not have to be specified because the structural type + is a set from provided elements. + """ + + def sample(self, random_state: RandomState = None) -> S: + """ + Samples a random value from the hyper-parameter search space. + + It first randomly chooses the size of the resulting sampled list + and then samples this number of elements. + + Parameters + ---------- + random_state: + A random seed or state to be used when sampling. + + Returns + ------- + A sampled value. + """ + + if self.max_size is None: + utils.log_once( + logger, + logging.WARNING, + "Sampling an unlimited list hyper-parameter '%(name)s'. Using a default value.", + {'name': self.name}, + stack_info=True, + ) + + return self.get_default() + + return self.elements.sample_multiple(min_samples=self.min_size, max_samples=self.max_size, random_state=random_state, with_replacement=True) # type: ignore + + @functools.lru_cache() + def get_max_samples(self) -> typing.Optional[int]: # type: ignore + max_samples = self.elements.get_max_samples() + if max_samples is None: + return None + elif self.max_size is None: + # Theoretically this would be "None", but we sample with default value in this case. + return 1 + # Equal to: sum(max_samples ** k for k in range(self.min_size, self.max_size + 1)) + else: + if max_samples == 0: + return 0 + elif max_samples == 1: + return self.max_size - self.min_size + 1 + else: + return (max_samples ** self.min_size) * (max_samples ** (self.max_size - self.min_size + 1) - 1) / (max_samples - 1) + + def sample_multiple(self, min_samples: int = 0, max_samples: int = None, random_state: RandomState = None, *, with_replacement: bool = False) -> typing.Sequence[S]: + """ + Samples multiple random values from the hyper-parameter search space. At least ``min_samples`` + of them, and at most ``max_samples``. + + Parameters + ---------- + min_samples: + A minimum number of samples to return. + max_samples: + A maximum number of samples to return. + random_state: + A random seed or state to be used when sampling. + with_replacement: + Are we sampling with replacement or without? + + Returns + ------- + A list (represented as a tuple) of multiple sampled values. + """ + + min_samples, max_samples = self._check_sample_size(min_samples, max_samples, with_replacement) + + random_state = sklearn_validation.check_random_state(random_state) + + size = random_state.randint(min_samples, max_samples + 1) + + if with_replacement: + sample_list: list = [self.sample(random_state) for i in range(size)] + else: + sample_set: set = set() + sample_list = [] + while len(sample_list) != size: + value = self.sample(random_state) + if value not in sample_set: + sample_set.add(value) + sample_list.append(value) + + return tuple(sample_list) + + +class SortedList(List[S]): + """ + Similar to `List` hyper-parameter, but elements of values are required to be sorted from smallest to largest, by default. + + Hyper-parameters configuration as elements is not supported. + + Attributes + ---------- + ascending: + Are values required to be sorted from smallest to largest (``True``) or the opposite (``False``). + """ + + ascending: bool + + def __init__( + self, elements: Hyperparameter, default: S, min_size: int = 0, max_size: int = None, *, + ascending: bool = True, semantic_types: typing.Sequence[str] = None, description: str = None, + ) -> None: + self.ascending = ascending + + if self.ascending: + self._compare = operator.le + else: + self._compare = operator.ge + + super().__init__(elements, default, min_size, max_size, semantic_types=semantic_types, description=description) + + if self.is_configuration: + raise exceptions.NotSupportedError("Hyper-parameters configuration as elements is not supported.") + + def validate(self, value: S) -> None: + super().validate(value) + + if not all(self._compare(a, b) for a, b in zip(value, value[1:])): # type: ignore + raise exceptions.InvalidArgumentValueError("Value '{value}' {for_name}is not sorted.".format(value=value, for_name=self._for_name())) + + def sample(self, random_state: RandomState = None) -> S: + values = super().sample(random_state) + return type(values)(sorted(values, reverse=not self.ascending)) + + @functools.lru_cache() + def get_max_samples(self) -> typing.Optional[int]: # type: ignore + max_samples = self.elements.get_max_samples() + if max_samples is None: + return None + elif self.max_size is None: + return None + else: + return sum(scipy_special.comb(max_samples + k - 1, k, exact=True) for k in range(self.min_size, self.max_size + 1)) + + def to_simple_structure(self) -> typing.Dict: # type: ignore + structure = super().to_simple_structure() + structure['ascending'] = self.ascending + del structure['is_configuration'] + return structure + + +class HyperparamsMeta(utils.AbstractMetaclass): + """ + A metaclass which provides the hyper-parameter description its name. + """ + + def __new__(mcls, class_name, bases, namespace, set_names=True, **kwargs): # type: ignore + # This should run only on subclasses of the "Hyperparams" class. + if bases != (dict,): + # Hyper-parameters configuration should be deterministic, so order matters. + configuration = collections.OrderedDict() + + # Create a (mutable) copy and don't modify the input argument. + namespace = collections.OrderedDict(namespace) + + # We traverse parent classes in order to keep hyper-parameters configuration deterministic. + for parent_class in bases: + # Using "isinstance" and not "issubclass" because we are comparing against a metaclass. + if isinstance(parent_class, mcls): + configuration.update(parent_class.configuration) + + for name, value in namespace.items(): + if name.startswith('_'): + continue + + if isinstance(value, Hyperparameter): + if name in base.STANDARD_PIPELINE_ARGUMENTS or name in base.STANDARD_RUNTIME_ARGUMENTS: + raise ValueError("Hyper-parameter name '{name}' is reserved because it is used as an argument in primitive interfaces.".format( + name=name, + )) + + if not HYPERPARAMETER_NAME_REGEX.match(name): + raise ValueError("Hyper-parameter name '{name}' contains invalid characters.".format( + name=name, + )) + + if set_names: + value.contribute_to_class(name) + + configuration[name] = value + + if isinstance(value, tuple) and len(value) == 1 and isinstance(value[0], Hyperparameter): + logger.warning("Probably invalid definition of a hyper-parameter. Hyper-parameter should be defined as class attribute without a trailing comma.", stack_info=True) + + for name in configuration.keys(): + # "name" might came from a parent class, but if not, then remove it + # from the namespace of the class we are creating. + if name in namespace: + del namespace[name] + + namespace['configuration'] = frozendict.FrozenOrderedDict(configuration) + + return super().__new__(mcls, class_name, bases, namespace, **kwargs) + + def __repr__(self): # type: ignore + return ''.format( + module=self.__module__, + class_name=self.__name__, + configuration=', '.join('{name}: {hyperparameter}'.format(name=name, hyperparameter=hyperparameter) for name, hyperparameter in self.configuration.items()), + ) + + def __setattr__(self, key, value): # type: ignore + if key == 'configuration': + raise AttributeError("Hyper-parameters configuration is immutable.") + + super().__setattr__(key, value) + + +H = typing.TypeVar('H', bound='Hyperparams') + + +class Hyperparams(dict, metaclass=HyperparamsMeta): + """ + A base class to be subclassed and used as a type for ``Hyperparams`` + type argument in primitive interfaces. An instance of this subclass + is passed as a ``hyperparams`` argument to primitive's constructor. + + You should subclass the class and configure class attributes to + hyper-parameters you want. They will be extracted out and put into + the ``configuration`` attribute. They have to be an instance of the + `Hyperparameter` class for this to happen. + + You can define additional methods and attributes on the class. + Prefix them with `_` to not conflict with future standard ones. + + When creating an instance of the class, all hyper-parameters have + to be provided. Default values have to be explicitly passed. + + Attributes + ---------- + configuration: + A hyper-parameters configuration. + """ + + configuration: typing.ClassVar[frozendict.FrozenOrderedDict] = frozendict.FrozenOrderedDict() + + def __init__(self, *args: typing.Any, **kwargs: typing.Any) -> None: + values = dict(*args, **kwargs) + + self.validate(values) + + super().__init__(values) + + self._hash: int = None + + @classmethod + def sample(cls: typing.Type[H], random_state: RandomState = None) -> H: + """ + Returns a hyper-parameters sample with all values sampled from their hyper-parameter configurations. + + Parameters + ---------- + random_state: + A random seed or state to be used when sampling. + + Returns + ------- + An instance of hyper-parameters. + """ + random_state = sklearn_validation.check_random_state(random_state) + + values = {} + + for name, hyperparameter in cls.configuration.items(): + values[name] = hyperparameter.sample(random_state) + + return cls(values) + + @classmethod + def get_max_samples(cls) -> typing.Optional[int]: + hyperparams_max_samples = 1 + for hyperparameter in cls.configuration.values(): + hyperparameter_max_samples = hyperparameter.get_max_samples() + if hyperparameter_max_samples is None: + return None + else: + # TODO: Assumption here is that hyper-parameters are independent. What when we will support dependencies? + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/46 + hyperparams_max_samples *= hyperparameter_max_samples + return hyperparams_max_samples + + @classmethod + def _check_sample_size(cls, min_samples: int, max_samples: typing.Optional[int], with_replacement: bool) -> typing.Tuple[int, int]: + return check_sample_size(cls, min_samples, max_samples, with_replacement) + + @classmethod + def sample_multiple(cls: typing.Type[H], min_samples: int = 0, max_samples: int = None, random_state: RandomState = None, *, with_replacement: bool = False) -> typing.Sequence[H]: + min_samples, max_samples = cls._check_sample_size(min_samples, max_samples, with_replacement) + + random_state = sklearn_validation.check_random_state(random_state) + + size = random_state.randint(min_samples, max_samples + 1) + + if with_replacement: + sample_list: list = [cls.sample(random_state) for i in range(size)] + else: + sample_set: set = set() + sample_list = [] + while len(sample_list) != size: + value = cls.sample(random_state) + if value not in sample_set: + sample_set.add(value) + sample_list.append(value) + + return tuple(sample_list) + + @classmethod + def defaults(cls: typing.Type[H], path: str = None) -> typing.Any: + """ + Returns a hyper-parameters sample with all values set to defaults. + + Parameters + ---------- + path: + An optional path to get defaults for. It can contain ``.`` to represent + a path through nested hyper-parameters. + + Returns + ------- + An instance of hyper-parameters or a default value of a hyper-parameter under ``path``. + """ + + if path is None: + values = {} + + for name, hyperparameter in cls.configuration.items(): + values[name] = hyperparameter.get_default() + + return cls(values) + + else: + if '.' not in path: + return cls.configuration[path].get_default() + else: + segment, rest = path.split('.', 1) + return cls.configuration[segment].get_default(rest) + + @classmethod + def validate(cls, values: dict) -> None: + configuration_keys = set(cls.configuration.keys()) + values_keys = set(values.keys()) + + missing = configuration_keys - values_keys + if len(missing): + raise exceptions.InvalidArgumentValueError("Not all hyper-parameters are specified: {missing}".format(missing=missing)) + + extra = values_keys - configuration_keys + if len(extra): + raise exceptions.InvalidArgumentValueError("Additional hyper-parameters are specified: {extra}".format(extra=extra)) + + for name, value in values.items(): + cls.configuration[name].validate(value) + + @classmethod + @functools.lru_cache() + def to_simple_structure(cls) -> typing.Dict: + """ + Converts the hyper-parameters configuration to a simple structure, similar to JSON, but with values + left as Python values. + + Returns + ------- + A dict. + """ + + return {name: hyperparameter.to_simple_structure() for name, hyperparameter in cls.configuration.items()} + + @classmethod + def define(cls: typing.Type[H], configuration: 'collections.OrderedDict[str, Hyperparameter]', *, + class_name: str = None, module_name: str = None, set_names: bool = False) -> typing.Type[H]: + """ + Define dynamically a subclass of this class using ``configuration`` and optional + ``class_name`` and ``module_name``. + + This is equivalent of defining a class statically in Python. ``configuration`` is what + you would otherwise provide through class attributes. + + Parameters + ---------- + configuration: + A hyper-parameters configuration. + class_name: + Class name of the subclass. + module_name: + Module name of the subclass. + set_names: + Should all hyper-parameters defined have their names set. By default ``False``. + This is different from when defining a static subclass, where the default is ``True`` + and names are set by the default. + + Returns + ------- + A subclass itself. + """ + + # Create a (mutable) copy and don't modify the input argument. + namespace: typing.Dict[str, typing.Any] = collections.OrderedDict(configuration) + + if class_name is None: + # We want automatically generated class names to be unique. + class_name = '{name}{id}'.format(name=cls.__name__, id=id(configuration)) + + if module_name is None: + frame = inspect.currentframe() + if frame is not None and frame.f_back is not None: + module_name = frame.f_back.f_globals['__name__'] + + if module_name is not None: + namespace['__module__'] = module_name + + return types.new_class(class_name, (cls,), {'set_names': set_names}, lambda ns: ns.update(namespace)) + + def values_to_json_structure(self) -> typing.Dict[str, typing.Dict]: + """ + Converts hyper-parameter values to a JSON-compatible structure. + + Returns + ------- + A JSON-compatible dict. + """ + + return {name: self.configuration[name].value_to_json_structure(value) for name, value in self.items()} + + @classmethod + def values_from_json_structure(cls: typing.Type[H], json: typing.Dict[str, typing.Dict]) -> H: + """ + Converts given JSON-compatible structure to an instance of this class with values + from the structure. + + Parameters + ---------- + json: + A JSON-compatible dict. + + Returns + ------- + An instance of this class with values from ``json`` argument. + """ + + return cls({name: cls.configuration[name].value_from_json_structure(value) for name, value in json.items()}) + + @classmethod + def traverse(cls) -> typing.Iterator[Hyperparameter]: + """ + Traverse over all hyper-parameters used in this hyper-parameters configuration. + + Yields + ------ + Hyperparamater + The next hyper-parameter used in this hyper-parameters configuration. + """ + + for hyperparameter in cls.configuration.values(): + yield hyperparameter + yield from hyperparameter.traverse() + + @classmethod + def transform_value(cls: typing.Type[H], values: dict, transform: typing.Callable, index: int = 0) -> H: + transformed_values = {} + for i, name in enumerate(sorted(values.keys())): + transformed_values[name] = cls.configuration[name].transform_value(values[name], transform, index + i) + + return cls(transformed_values) + + @classmethod + def can_accept_value_type(cls, structural_type: typing.Union[type, typing.List[type]]) -> bool: + if structural_type is typing.Any: + return True + elif isinstance(structural_type, typing.List): + # We do not support a list of types. This is used for "Set" hyper-parameter. + return False + else: + return utils.is_subclass(structural_type, cls) + + def replace(self: H, values: typing.Dict[str, typing.Any]) -> H: + """ + Creates a copy of hyper-parameters with values replaced with values from ``values``. + + This is equivalent of doing ``Hyperparams(hyperparams, **values)``. + + Parameters + ---------- + values: + Map between keys and values to replace. + + Returns + ------- + A copy of the object with replaced values. + """ + + return type(self)(self, **values) + + def __setitem__(self, key, value): # type: ignore + raise TypeError("Hyper-parameters are immutable.") + + def __delitem__(self, key): # type: ignore + raise TypeError("Hyper-parameters are immutable.") + + def clear(self): # type: ignore + raise TypeError("Hyper-parameters are immutable.") + + def pop(self, key, default=None): # type: ignore + raise TypeError("Hyper-parameters are immutable.") + + def popitem(self): # type: ignore + raise TypeError("Hyper-parameters are immutable.") + + def setdefault(self, key, default=None): # type: ignore + raise TypeError("Hyper-parameters are immutable.") + + def update(self, *args, **kwargs): # type: ignore + raise TypeError("Hyper-parameters are immutable.") + + def __repr__(self) -> str: + return '{class_name}({super})'.format(class_name=type(self).__name__, super=super().__repr__()) + + def __getstate__(self) -> dict: + return dict(self) + + def __setstate__(self, state: dict) -> None: + self.__init__(state) # type: ignore + + # In the past, we had to implement our own __reduce__ method because dict is otherwise pickled + # using a built-in implementation which does not call "__getstate__". But now we use it also + # to handle the case of classes defined using "define". + def __reduce__(self) -> typing.Tuple[typing.Callable, typing.Tuple, dict]: + # If class has been defined at the global scope of a module, we can use regular pickling approach. + if _is_defined_at_global_scope(self.__class__): + return __newobj__, (self.__class__,), self.__getstate__() + + base_cls = None + define_args_list: typing.List[typing.Dict[str, typing.Any]] = [] + for cls in inspect.getmro(self.__class__): + if _is_defined_at_global_scope(cls): + base_cls = cls + break + + if not issubclass(cls, Hyperparams): + raise pickle.PickleError("Class is not a subclass of \"Hyperparams\" class.") + + if set(cls.__dict__.keys()) - DEFAULT_HYPERPARAMS_CLASS_ATTRIBUTES: + raise pickle.PickleError("A class with custom attributes not defined at a global scope.") + + cls = typing.cast(typing.Type[Hyperparams], cls) + + define_args_list.insert(0, { + 'configuration': cls.configuration, + 'class_name': getattr(cls, '__name__', None), + 'module_name': getattr(cls, '__module__', None), + }) + + if base_cls is None: + raise pickle.PickleError("Cannot find a base class defined at a global scope.") + + if not issubclass(base_cls, Hyperparams): + raise pickle.PickleError("Found base class is not a subclass of \"Hyperparams\" class.") + + return _recreate_hyperparams_class, (base_cls, define_args_list), self.__getstate__() + + # It is immutable, so hash can be defined. + def __hash__(self) -> int: + if self._hash is None: + h = 0 + for key, value in self.items(): + h ^= hash((key, value)) + self._hash = h + return self._hash + + +# This is defined here so that we compute it only once. +DEFAULT_HYPERPARAMS_CLASS_ATTRIBUTES = set(Hyperparams.define(collections.OrderedDict()).__dict__.keys()) diff --git a/d3m/d3m/metadata/params.py b/d3m/d3m/metadata/params.py new file mode 100644 index 0000000..5af2bce --- /dev/null +++ b/d3m/d3m/metadata/params.py @@ -0,0 +1,138 @@ +import typing + +from d3m import exceptions, utils + + +class ParamsMeta(utils.AbstractMetaclass): + def __new__(mcls, class_name, bases, namespace, **kwargs): # type: ignore + for name, value in namespace.items(): + if name.startswith('_'): + continue + + if utils.is_class_method_on_class(value) or utils.is_instance_method_on_class(value): + continue + + raise TypeError("Only methods and attribute type annotations can be defined on Params class, not '{name}'.".format(name=name)) + + class_params_items = {} + class_annotations = namespace.get('__annotations__', {}) + + for name, value in class_annotations.items(): + value = typing._type_check(value, "Each annotation must be a type.") + + if name in namespace: + # Just update the annotation. + class_annotations[name] = value + else: + # Extract annotation out. + class_params_items[name] = value + + for name in class_params_items.keys(): + del class_annotations[name] + + # Set back updated annotations. + namespace['__annotations__'] = class_annotations + + params_items = {} + + for base in reversed(bases): + params_items.update(base.__dict__.get('__params_items__', {})) + + params_items.update(class_params_items) + + namespace['__params_items__'] = params_items + + return super().__new__(mcls, class_name, bases, namespace, **kwargs) + + +class Params(dict, metaclass=ParamsMeta): + """ + A base class to be subclassed and used as a type for ``Params`` type + argument in primitive interfaces. An instance of this subclass should + be returned from primitive's ``get_params`` method, and accepted in + ``set_params``. + + You should subclass the class and set type annotations on class attributes + for params available in the class. + + When creating an instance of the class, all parameters have to be provided. + """ + + def __init__(self, other: typing.Dict[str, typing.Any] = None, **values: typing.Any) -> None: + if other is None: + other = {} + + values = dict(other, **values) + + params_keys = set(self.__params_items__.keys()) # type: ignore + values_keys = set(values.keys()) + + missing = params_keys - values_keys + if len(missing): + raise exceptions.InvalidArgumentValueError("Not all parameters are specified: {missing}".format(missing=missing)) + + extra = values_keys - params_keys + if len(extra): + raise exceptions.InvalidArgumentValueError("Additional parameters are specified: {extra}".format(extra=extra)) + + for name, value in values.items(): + value_type = self.__params_items__[name] # type: ignore + if not utils.is_instance(value, value_type): + raise exceptions.InvalidArgumentTypeError("Value '{value}' for parameter '{name}' is not an instance of the type: {value_type}".format(value=value, name=name, value_type=value_type)) + + super().__init__(values) + + def __setitem__(self, key, value): # type: ignore + if key not in self.__params_items__: + raise ValueError("Additional parameter is specified: {key}".format(key=key)) + + value_type = self.__params_items__[key] + if not utils.is_instance(value, value_type): + raise TypeError("Value '{value}' for parameter '{name}' is not an instance of the type: {value_type}".format(value=value, name=key, value_type=value_type)) + + return super().__setitem__(key, value) + + def __delitem__(self, key): # type: ignore + raise AttributeError("You cannot delete parameters.") + + def clear(self): # type: ignore + raise AttributeError("You cannot delete parameters.") + + def pop(self, key, default=None): # type: ignore + raise AttributeError("You cannot delete parameters.") + + def popitem(self): # type: ignore + raise AttributeError("You cannot delete parameters.") + + def setdefault(self, key, default=None): # type: ignore + if key not in self.__params_items__: + raise ValueError("Additional parameter is specified: {key}".format(key=key)) + + default_type = self.__params_items__[key] + if not utils.is_instance(default, default_type): + raise TypeError("Value '{value}' for parameter '{name}' is not an instance of the type: {value_type}".format(value=default, name=key, value_type=default_type)) + + return super().setdefault(key, default) + + def update(self, other: typing.Dict[str, typing.Any] = None, **values: typing.Any) -> None: # type: ignore + if other is None: + other = {} + + values = dict(other, **values) + + params_keys = set(self.__params_items__.keys()) # type: ignore + values_keys = set(values.keys()) + + extra = values_keys - params_keys + if len(extra): + raise ValueError("Additional parameters are specified: {extra}".format(extra=extra)) + + for name, value in values.items(): + value_type = self.__params_items__[name] # type: ignore + if not utils.is_instance(value, value_type): + raise TypeError("Value '{value}' for parameter '{name}' is not an instance of the type: {value_type}".format(value=value, name=name, value_type=value_type)) + + super().update(values) + + def __repr__(self) -> str: + return '{class_name}({super})'.format(class_name=type(self).__name__, super=super().__repr__()) diff --git a/d3m/d3m/metadata/pipeline.py b/d3m/d3m/metadata/pipeline.py new file mode 100644 index 0000000..78cc286 --- /dev/null +++ b/d3m/d3m/metadata/pipeline.py @@ -0,0 +1,2970 @@ +import abc +import argparse +import collections +import copy +import datetime +import json +import logging +import os +import os.path +import pprint +import sys +import traceback +import typing +import uuid + +import dateparser # type: ignore + +from d3m import container, deprecate, environment_variables, exceptions, index, utils +from d3m.primitive_interfaces import base +from . import base as metadata_base, hyperparams as hyperparams_module + +# See: https://gitlab.com/datadrivendiscovery/d3m/issues/66 +try: + from pyarrow import lib as pyarrow_lib # type: ignore +except ModuleNotFoundError: + pyarrow_lib = None + +__all__ = ( + 'Pipeline', 'Resolver', 'NoResolver', 'PrimitiveStep', 'SubpipelineStep', 'PlaceholderStep', +) + +logger = logging.getLogger(__name__) + +# Comma because we unpack the list of validators returned from "load_schema_validators". +PIPELINE_SCHEMA_VALIDATOR, = utils.load_schema_validators(metadata_base.SCHEMAS, ('pipeline.json',)) + +PIPELINE_SCHEMA_VERSION = 'https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json' + +CONTROL_HYPERPARAMETER_SEMANTIC_TYPE = 'https://metadata.datadrivendiscovery.org/types/ControlParameter' + + +class TypeInfo(typing.NamedTuple): + structural_type: type + singleton: typing.Optional[bool] + + +class Resolver: + """ + A resolver to resolve primitives and pipelines. + + It resolves primitives from available primitives on the system, + and resolves pipelines from files in pipeline search paths. + + Attributes + ---------- + strict_resolving: + If resolved pipeline or primitive does not fully match specified primitive reference, raise an exception? + strict_digest: + When loading pipelines or primitives, if computed digest does not match the one provided in metadata, raise an exception? + pipeline_search_paths: + A list of paths to directories with pipelines to resolve from. + Their files should be named ``.json``, ``.yml``, or ``.yaml``. + + Parameters + ---------- + strict_resolving: + If resolved pipeline or primitive does not fully match specified primitive reference, raise an exception? + strict_digest: + When loading pipelines or primitives, if computed digest does not match the one provided in metadata, raise an exception? + pipeline_search_paths: + A list of paths to directories with pipelines to resolve from. + Their files should be named ``.json``, ``.yml``, or ``.yaml``. + respect_environment_variable: + Use also (colon separated) pipeline search paths from ``PIPELINES_PATH`` environment variable? + load_all_primitives: + Load all primitives before attempting to resolve them. If ``False`` any primitive used in a + pipeline has to be loaded before calling the resolver. + primitives_blocklist: + A collection of primitive path prefixes to not (try to) load. + """ + + strict_resolving: bool + strict_digest: bool + pipeline_search_paths: typing.Sequence[str] + + def __init__(self, *, strict_resolving: bool = False, strict_digest: bool = False, + pipeline_search_paths: typing.Sequence[str] = None, + respect_environment_variable: bool = True, load_all_primitives: bool = True, + primitives_blocklist: typing.Collection[str] = None) -> None: + self.strict_resolving = strict_resolving + self.strict_digest = strict_digest + self.primitives_blocklist = primitives_blocklist + + if pipeline_search_paths is None: + self.pipeline_search_paths: typing.List[str] = [] + else: + self.pipeline_search_paths = typing.cast(typing.List[str], pipeline_search_paths) + + if respect_environment_variable: + self.pipeline_search_paths += [path for path in os.environ.get(environment_variables.PIPELINES_PATH, '').split(':') if path] + + self._load_all_primitives = load_all_primitives + self._primitives_loaded = False + self._get_primitive_failed: typing.Set[str] = set() + + def get_primitive(self, primitive_description: typing.Dict) -> typing.Optional[typing.Type[base.PrimitiveBase]]: + primitive = self._get_primitive(primitive_description) + + # This class always resolves a primitive, or throws an exception, but subclasses might return "None". + if primitive is not None: + self._check_primitive(primitive_description, primitive) + + return primitive + + @classmethod + def get_pipeline_class(cls) -> 'typing.Type[Pipeline]': + return Pipeline + + def get_pipeline(self, pipeline_description: typing.Dict) -> 'typing.Optional[Pipeline]': + pipeline = self._get_pipeline(pipeline_description) + + # This class always resolves a pipeline, or throws an exception, but subclasses might return "None". + if pipeline is not None: + self._check_pipeline(pipeline_description, pipeline) + + return pipeline + + def _get_pipeline(self, pipeline_description: typing.Dict) -> 'typing.Optional[Pipeline]': + # If more than just "id" and "digest" is in the pipeline description, + # then we assume it is a full pipeline description. Digest is optional. + if set(pipeline_description.keys()) - {'digest'} > {'id'}: + return self._from_structure(pipeline_description) + else: + return self._from_file(pipeline_description) + + def _from_structure(self, pipeline_description: typing.Dict) -> 'typing.Optional[Pipeline]': + return self.get_pipeline_class().from_json_structure(pipeline_description, resolver=self, strict_digest=self.strict_digest) + + def _from_file(self, pipeline_description: typing.Dict) -> 'typing.Optional[Pipeline]': + for path in self.pipeline_search_paths: + for extension in ['json', 'json.gz']: + pipeline_path = os.path.join(path, '{pipeline_id}.{extension}'.format(pipeline_id=pipeline_description['id'], extension=extension)) + try: + with utils.open(pipeline_path, 'r', encoding='utf8') as pipeline_file: + return self.get_pipeline_class().from_json(pipeline_file, resolver=self, strict_digest=self.strict_digest) + except FileNotFoundError: + pass + + for extension in ['yml', 'yaml', 'yml.gz', 'yaml.gz']: + pipeline_path = os.path.join(path, '{pipeline_id}.{extension}'.format(pipeline_id=pipeline_description['id'], extension=extension)) + try: + with utils.open(pipeline_path, 'r', encoding='utf8') as pipeline_file: + return self.get_pipeline_class().from_yaml(pipeline_file, resolver=self, strict_digest=self.strict_digest) + except FileNotFoundError: + pass + + raise exceptions.InvalidArgumentValueError("Unable to get pipeline '{pipeline_id}'.".format(pipeline_id=pipeline_description['id'])) + + def _get_primitive_by_path(self, primitive_description: typing.Dict) -> typing.Optional[typing.Type[base.PrimitiveBase]]: + if primitive_description['python_path'] in self._get_primitive_failed: + return None + + try: + # We first try to directly load the primitive using its Python path. + primitive = index.get_primitive(primitive_description['python_path']) + except Exception: + # We make sure we attempt to directly load the primitive only once. Otherwise error messages + # during loading could be printed out again and again, every time we try to get this primitive. + self._get_primitive_failed.add(primitive_description['python_path']) + primitive = None + + # Then we check that the loaded primitive matches the requested primitive ID. + # This way we can load primitive's without having to load all primitives in + # the common case, when the Python path of the primitive has not changed. + if primitive is not None and primitive.metadata.query()['id'] == primitive_description['id']: + return primitive + + return None + + def _load_primitives(self) -> None: + if not self._load_all_primitives: + return + + if self._primitives_loaded: + return + self._primitives_loaded = True + + # We attempt to load all primitives only once. Otherwise error messages for failed primitives + # during loading could be printed out again and again. + index.load_all(blocklist=self.primitives_blocklist) + + def _get_primitive(self, primitive_description: typing.Dict) -> typing.Optional[typing.Type[base.PrimitiveBase]]: + if not self._primitives_loaded: + primitive = self._get_primitive_by_path(primitive_description) + + if primitive is not None: + return primitive + + self._load_primitives() + + return index.get_primitive_by_id(primitive_description['id']) + + def _check_primitive(self, primitive_description: typing.Dict, primitive: typing.Type[base.PrimitiveBase]) -> None: + primitive_metadata = primitive.metadata.query() + + if primitive_metadata['version'] != primitive_description['version']: + if self.strict_resolving: + raise exceptions.MismatchError( + "Version for primitive '{primitive_id}' does not match the one specified in the primitive description. " + "Primitive description version: '{primitive_version}'. Resolved primitive version: '{resolved_primitive_version}'.".format( + primitive_id=primitive_metadata['id'], + primitive_version=primitive_description['version'], + resolved_primitive_version=primitive_metadata['version'], + ) + ) + else: + logger.warning( + "Version for primitive '%(primitive_id)s' does not match the one specified in the primitive description. " + "Primitive description version: '%(primitive_version)s'. Resolved primitive version: '%(resolved_primitive_version)s'.", + { + 'primitive_id': primitive_metadata['id'], + 'primitive_version': primitive_description['version'], + 'resolved_primitive_version': primitive_metadata['version'], + }, + ) + + if primitive_metadata['python_path'] != primitive_description['python_path']: + if self.strict_resolving: + raise exceptions.MismatchError( + "Python path for primitive '{primitive_id}' does not match the one specified in the primitive description. " + "Primitive description Python path: '{primitive_python_path}'. Resolved primitive Python path: '{resolved_primitive_python_path}'.".format( + primitive_id=primitive_metadata['id'], + primitive_python_path=primitive_description['python_path'], + resolved_primitive_python_path=primitive_metadata['python_path'], + ) + ) + else: + logger.warning( + "Python path for primitive '%(primitive_id)s' does not match the one specified in the primitive description. " + "Primitive description Python path: '%(primitive_python_path)s'. Resolved primitive Python path: '%(resolved_primitive_python_path)s'.", + { + 'primitive_id': primitive_metadata['id'], + 'primitive_python_path': primitive_description['python_path'], + 'resolved_primitive_python_path': primitive_metadata['python_path'], + }, + ) + + if primitive_metadata['name'] != primitive_description['name']: + if self.strict_resolving: + raise exceptions.MismatchError( + "Name for primitive '{primitive_id}' does not match the one specified in the primitive description. " + "Primitive description name: '{primitive_name}'. Resolved primitive name: '{resolved_primitive_name}'.".format( + primitive_id=primitive_metadata['id'], + primitive_name=primitive_description['name'], + resolved_primitive_name=primitive_metadata['name'], + ) + ) + else: + logger.warning( + "Name for primitive '%(primitive_id)s' does not match the one specified in the primitive description. " + "Primitive description name: '%(primitive_name)s'. Resolved primitive name: '%(resolved_primitive_name)s'.", + { + 'primitive_id': primitive_metadata['id'], + 'primitive_name': primitive_description['name'], + 'resolved_primitive_name': primitive_metadata['name'], + }, + ) + + if 'digest' in primitive_description: + assert primitive_description['digest'] is not None + + if primitive_metadata.get('digest', None) != primitive_description['digest']: + if self.strict_digest: + raise exceptions.DigestMismatchError( + "Digest for primitive '{primitive_id}' does not match the one specified in the primitive description. " + "Primitive description digest: {primitive_digest}. Resolved primitive digest: {resolved_primitive_digest}.".format( + primitive_id=primitive_metadata['id'], + primitive_digest=primitive_description['digest'], + resolved_primitive_digest=primitive_metadata.get('digest', None), + ) + ) + else: + logger.warning( + "Digest for primitive '%(primitive_id)s' does not match the one specified in the primitive description. " + "Primitive description digest: %(primitive_digest)s. Resolved primitive digest: %(resolved_primitive_digest)s.", + { + 'primitive_id': primitive_metadata['id'], + 'primitive_digest': primitive_description['digest'], + 'resolved_primitive_digest': primitive_metadata.get('digest', None), + }, + ) + + def _check_pipeline(self, pipeline_description: typing.Dict, pipeline: 'Pipeline') -> None: + # This can happen if the file has a filename for one pipeline ID but the contents have another pipeline ID. + if pipeline.id != pipeline_description['id']: + if self.strict_resolving: + raise exceptions.MismatchError( + "ID of pipeline '{resolved_pipeline_id}' does not match the one specified in the pipeline description. " + "Pipeline description ID: '{pipeline_id}'. Resolved pipeline ID: '{resolved_pipeline_id}'.".format( + pipeline_id=pipeline_description['id'], + resolved_pipeline_id=pipeline.id, + ) + ) + else: + logger.warning( + "ID of pipeline '%(resolved_pipeline_id)s' does not match the one specified in the pipeline description. " + "Pipeline description ID: '%(pipeline_id)s'. Resolved pipeline ID: '%(resolved_pipeline_id)s'.", + { + 'pipeline_id': pipeline_description['id'], + 'resolved_pipeline_id': pipeline.id, + }, + ) + + if 'digest' in pipeline_description: + assert pipeline_description['digest'] is not None + + pipeline_digest = pipeline.get_digest() + + if pipeline_digest != pipeline_description['digest']: + if self.strict_digest: + raise exceptions.DigestMismatchError( + "Digest for pipeline '{pipeline_id}' does not match the one specified in the pipeline description. " + "Pipeline description digest: {pipeline_digest}. Resolved pipeline digest: {resolved_pipeline_digest}.".format( + pipeline_id=pipeline.id, + pipeline_digest=pipeline_description['digest'], + resolved_pipeline_digest=pipeline_digest, + ) + ) + else: + logger.warning( + "Digest for pipeline '%(pipeline_id)s' does not match the one specified in the pipeline description. " + "Pipeline description digest: %(pipeline_digest)s. Resolved pipeline digest: %(resolved_pipeline_digest)s.", + { + 'pipeline_id': pipeline.id, + 'pipeline_digest': pipeline_description['digest'], + 'resolved_pipeline_digest': pipeline_digest, + }, + ) + + +class NoResolver(Resolver): + """ + A resolver which never resolves anything. + """ + + def _get_primitive(self, primitive_description: typing.Dict) -> typing.Optional[typing.Type[base.PrimitiveBase]]: + return None + + def _get_pipeline(self, pipeline_description: typing.Dict) -> 'typing.Optional[Pipeline]': + return None + + +S = typing.TypeVar('S', bound='StepBase') + + +class StepBase(metaclass=utils.AbstractMetaclass): + """ + Class representing one step in pipeline's execution. + + Attributes + ---------- + index: + An index of the step among steps in the pipeline. + resolver: + Resolver to use. + + Parameters + ---------- + resolver: + Resolver to use. + """ + + index: int + resolver: Resolver + + def __init__(self, *, resolver: typing.Optional[Resolver] = None) -> None: + self.resolver = self.get_resolver(resolver) + + self.index: int = None + + @classmethod + def get_resolver(cls, resolver: typing.Optional[Resolver]) -> Resolver: + if resolver is None: + return Resolver() + else: + return resolver + + @classmethod + @abc.abstractmethod + def get_step_type(cls) -> metadata_base.PipelineStepType: + pass + + def check_add(self, existing_steps: 'typing.Sequence[StepBase]', available_data_references: typing.AbstractSet[str]) -> None: + """ + Checks if a step can be added given existing steps and available + data references to provide to the step. It also checks if the + state of a step is suitable to be added at this point. + + Raises an exception if check fails. + + Parameters + ---------- + existing_steps: + Steps already in the pipeline. + available_data_references: + A set of available data references. + """ + + def set_index(self, index: int) -> None: + if self.index is not None: + raise exceptions.InvalidArgumentValueError("Index already set to {index}.".format(index=self.index)) + + self.index = index + + @abc.abstractmethod + def get_free_hyperparams(self) -> typing.Union[typing.Dict, typing.Sequence]: + """ + Returns step's hyper-parameters which have not been fixed by the pipeline. + + Returns + ------- + Hyper-parameters configuration for free hyper-parameters, or a list of those. + """ + + @abc.abstractmethod + def get_all_hyperparams(self) -> typing.Union[typing.Dict, typing.Sequence]: + """ + Returns step's hyper-parameters. + + Returns + ------- + Hyper-parameters configuration for all hyper-parameters, or a list of those. + """ + + @abc.abstractmethod + def get_input_data_references(self) -> typing.AbstractSet[str]: + pass + + @abc.abstractmethod + def get_output_data_references(self) -> typing.AbstractSet[str]: + pass + + @classmethod + @abc.abstractmethod + def from_json_structure(cls: typing.Type[S], step_description: typing.Dict, *, resolver: Resolver = None) -> S: + pass + + @abc.abstractmethod + def to_json_structure(self) -> typing.Dict: + pass + + +SP = typing.TypeVar('SP', bound='PrimitiveStep') + + +class PrimitiveStep(StepBase): + """ + Class representing a primitive execution step in pipeline's execution. + + Attributes + ---------- + primitive_description: + A description of the primitive specified for this step. Available if ``primitive`` could not be resolved. + primitive: + A primitive class associated with this step. + outputs: + A list of method names providing outputs for this step. + hyperparams: + A map of of fixed hyper-parameters to their values which are set + as part of a pipeline and should not be tuned during hyper-parameter tuning. + arguments: + A map between argument name and its description. Description contains + a data reference of an output of a prior step (or a pipeline input). + users: + Users associated with the primitive. + + Parameters + ---------- + primitive_description: + A description of the primitive specified for this step. Allowed only if ``primitive`` is not provided. + primitive: + A primitive class associated with this step. If not provided, resolved using ``resolver`` from ``primitive_description``. + """ + + primitive_description: typing.Dict + primitive: typing.Type[base.PrimitiveBase] + outputs: typing.List[str] + hyperparams: typing.Dict[str, typing.Dict] + arguments: typing.Dict[str, typing.Dict] + users: typing.List[typing.Dict] + + def __init__(self, primitive_description: typing.Dict = None, *, primitive: typing.Type[base.PrimitiveBase] = None, resolver: typing.Optional[Resolver] = None) -> None: + super().__init__(resolver=resolver) + + if primitive is None: + if primitive_description is None: + raise exceptions.InvalidArgumentValueError("\"primitive_description\" and \"primitive\" arguments are both None.") + + primitive = self.resolver.get_primitive(primitive_description) + elif primitive_description is not None: + raise exceptions.InvalidArgumentValueError("\"primitive_description\" and \"primitive\" arguments cannot be both provided.") + + if primitive is None: + # If still "None" it means resolver returned "None". + # We just store provided primitive description. + self.primitive_description = primitive_description + self.primitive = None + else: + self.primitive_description = None + self.primitive = primitive + + self.outputs: typing.List[str] = [] + self.hyperparams: typing.Dict[str, typing.Dict] = {} + self.arguments: typing.Dict[str, typing.Dict] = {} + self.users: typing.List[typing.Dict] = [] + + @classmethod + def get_step_type(cls) -> metadata_base.PipelineStepType: + return metadata_base.PipelineStepType.PRIMITIVE + + def add_argument(self, name: str, argument_type: typing.Any, data_reference: typing.Union[str, typing.Sequence[str]]) -> None: + """ + Associate a data reference to an argument of this step (and underlying primitive). + + Parameters + ---------- + name: + Argument name. + argument_type: + Argument type. + data_reference: + Data reference or a list of data references associated with this argument. + """ + + if name in self.arguments: + raise exceptions.InvalidArgumentValueError("Argument with name '{name}' already exists.".format(name=name)) + + if argument_type not in [metadata_base.ArgumentType.CONTAINER, metadata_base.ArgumentType.DATA]: + raise exceptions.InvalidArgumentValueError("Invalid argument type: {argument_type}".format(argument_type=argument_type)) + + if not isinstance(data_reference, str) and not utils.is_instance(data_reference, typing.Sequence[str]): + raise exceptions.InvalidArgumentTypeError("Data reference is not a string or a list of strings.".format(name=name)) + + if self.primitive is not None: + argument_metadata = self.primitive.metadata.query()['primitive_code'].get('arguments', {}).get(name, None) + + if argument_metadata is None: + raise exceptions.InvalidArgumentValueError( + "Unknown argument name '{name}' for primitive {primitive}.".format( + name=name, + primitive=self.primitive, + ), + ) + + if argument_metadata['kind'] != metadata_base.PrimitiveArgumentKind.PIPELINE: + raise exceptions.InvalidArgumentValueError( + "Pipelines can provide only pipeline arguments, '{name}' is of kind {kind}.".format( + name=name, + kind=argument_metadata['kind'], + ), + ) + + self.arguments[name] = { + 'type': argument_type, + 'data': data_reference, + } + + def add_output(self, output_id: str) -> None: + """ + Define an output from this step. + + Underlying primitive can have multiple produce methods but not all have to be + defined as outputs of the step. + + Parameters + ---------- + output_id: + A name of the method producing this output. + """ + + if output_id in self.outputs: + raise exceptions.InvalidArgumentValueError("Output with ID '{output_id}' already exists.".format(output_id=output_id)) + + if self.primitive is not None: + method_metadata = self.primitive.metadata.query()['primitive_code'].get('instance_methods', {}).get(output_id, None) + + if method_metadata is None: + raise exceptions.InvalidArgumentValueError( + "Unknown output ID '{output_id}' for primitive {primitive}.".format( + output_id=output_id, + primitive=self.primitive, + ), + ) + + if method_metadata['kind'] != metadata_base.PrimitiveMethodKind.PRODUCE: + raise exceptions.InvalidArgumentValueError( + "Primitives can output only from produce methods, '{output_id}' is of kind {kind}.".format( + output_id=output_id, + kind=method_metadata['kind'], + ), + ) + + self.outputs.append(output_id) + + def add_hyperparameter(self, name: str, argument_type: typing.Any, data: typing.Any) -> None: + """ + Associate a value for a hyper-parameter of this step (and underlying primitive). + + Parameters + ---------- + name: + Hyper-parameter name. + argument_type: + Argument type. + data: + Data reference associated with this hyper-parameter, or list of data references, or value itself. + """ + + if name in self.hyperparams: + raise exceptions.InvalidArgumentValueError("Hyper-parameter with name '{name}' already exists.".format(name=name)) + + if self.primitive is not None: + hyperparams = self.get_primitive_hyperparams() + + if name not in hyperparams.configuration: + raise exceptions.InvalidArgumentValueError( + "Unknown hyper-parameter name '{name}' for primitive {primitive}.".format( + name=name, + primitive=self.primitive, + ), + ) + + if argument_type == metadata_base.ArgumentType.VALUE: + hyperparams.configuration[name].validate(data) + + if argument_type in [metadata_base.ArgumentType.DATA, metadata_base.ArgumentType.PRIMITIVE]: + if utils.is_sequence(data): + if not len(data): + raise exceptions.InvalidArgumentValueError("An empty list of hyper-paramater values.") + + self.hyperparams[name] = { + 'type': argument_type, + 'data': data, + } + + def add_user(self, user_description: typing.Dict) -> None: + """ + Add a description of user to a list of users associated with the primitive. + + Parameters + ---------- + user_description: + User description. + """ + + if 'id' not in user_description: + raise exceptions.InvalidArgumentValueError("User description is missing user ID.") + + self.users.append(user_description) + + def check_add(self, existing_steps: typing.Sequence[StepBase], available_data_references: typing.AbstractSet[str]) -> None: + # Order of steps can be arbitrary during execution (given that inputs for a step are available), but we still + # want some partial order during construction. We want that arguments can already be satisfied by existing steps. + for argument_description in self.arguments.values(): + if utils.is_sequence(argument_description['data']): + data_references = argument_description['data'] + else: + data_references = typing.cast(typing.Sequence, [argument_description['data']]) + for data_reference in data_references: + if not isinstance(data_reference, str): + raise exceptions.InvalidArgumentTypeError("Argument data reference '{data_reference}' is not a string.".format(data_reference=data_reference)) + elif data_reference not in available_data_references: + raise exceptions.InvalidPipelineError("Argument data reference '{data_reference}' is not among available data references.".format( + data_reference=data_reference, + )) + + for hyperparameter_description in self.hyperparams.values(): + if hyperparameter_description['type'] == metadata_base.ArgumentType.DATA: + if utils.is_sequence(hyperparameter_description['data']): + data_references = hyperparameter_description['data'] + else: + data_references = typing.cast(typing.Sequence, [hyperparameter_description['data']]) + for data_reference in data_references: + if not isinstance(data_reference, str): + raise exceptions.InvalidArgumentTypeError("Hyper-parameter data reference '{data_reference}' is not a string.".format(data_reference=data_reference)) + elif data_reference not in available_data_references: + raise exceptions.InvalidPipelineError("Hyper-parameter data reference '{data_reference}' is not among available data references.".format( + data_reference=data_reference, + )) + elif hyperparameter_description['type'] == metadata_base.ArgumentType.PRIMITIVE: + if utils.is_sequence(hyperparameter_description['data']): + primitive_references = hyperparameter_description['data'] + else: + primitive_references = typing.cast(typing.Sequence, [hyperparameter_description['data']]) + for primitive_reference in primitive_references: + if not isinstance(primitive_reference, int): + raise exceptions.InvalidArgumentTypeError("Primitive reference '{primitive_reference}' is not an integer.".format(primitive_reference=primitive_reference)) + elif not 0 <= primitive_reference < len(existing_steps): + raise exceptions.InvalidPipelineError("Invalid primitive reference in a step: {primitive}".format(primitive=primitive_reference)) + elif not isinstance(existing_steps[primitive_reference], PrimitiveStep): + raise exceptions.InvalidArgumentTypeError("Primitive reference '{primitive_reference}' is not referencing a primitive step.".format(primitive_reference=primitive_reference)) + elif hyperparameter_description['type'] == metadata_base.ArgumentType.CONTAINER: + if not isinstance(hyperparameter_description['data'], str): + raise exceptions.InvalidArgumentTypeError("Hyper-parameter data reference '{data_reference}' is not a string.".format( + data_reference=hyperparameter_description['data'], + )) + elif hyperparameter_description['data'] not in available_data_references: + raise exceptions.InvalidPipelineError("Hyper-parameter data reference '{data_reference}' is not among available data references.".format( + data_reference=hyperparameter_description['data'], + )) + elif hyperparameter_description['type'] == metadata_base.ArgumentType.VALUE: + # "VALUE" hyper-parameter value has already been checked in "add_hyperparameter". + pass + else: + raise exceptions.UnexpectedValueError("Unknown hyper-parameter type: {hyperparameter_type}".format(hyperparameter_type=hyperparameter_description['type'])) + + # We do this check only if primitive has any arguments or outputs defined. + # Otherwise it can be used as a unfitted primitive value for a hyper-parameter to another primitive. + if self.primitive is not None and (self.arguments or self.outputs): + primitive_arguments = self.primitive.metadata.query()['primitive_code'].get('arguments', {}) + required_arguments_set = { + argument_name for argument_name, argument in primitive_arguments.items() if 'default' not in argument and argument['kind'] == metadata_base.PrimitiveArgumentKind.PIPELINE + } + + arguments_set = set(self.arguments.keys()) + + missing_arguments_set = required_arguments_set - arguments_set + if len(missing_arguments_set): + raise exceptions.InvalidArgumentValueError( + "Not all required arguments are provided for the primitive: {missing_arguments_set}".format( + missing_arguments_set=missing_arguments_set, + ) + ) + + def get_primitive_hyperparams(self) -> hyperparams_module.Hyperparams: + if self.primitive is None: + raise exceptions.InvalidStateError("Primitive has not been resolved.") + + return self.primitive.metadata.get_hyperparams() + + def get_free_hyperparams(self) -> typing.Dict: + free_hyperparams = collections.OrderedDict(self.get_primitive_hyperparams().configuration) + + for hyperparam in self.hyperparams: + del free_hyperparams[hyperparam] + + return free_hyperparams + + def get_all_hyperparams(self) -> typing.Dict: + return collections.OrderedDict(self.get_primitive_hyperparams().configuration) + + def get_input_data_references(self) -> typing.AbstractSet[str]: + data_references = set() + + for argument_description in self.arguments.values(): + if utils.is_sequence(argument_description['data']): + for data_reference in argument_description['data']: + data_references.add(data_reference) + else: + data_references.add(argument_description['data']) + + for hyperparameter_description in self.hyperparams.values(): + if hyperparameter_description['type'] == metadata_base.ArgumentType.VALUE: + continue + + if hyperparameter_description['type'] == metadata_base.ArgumentType.PRIMITIVE: + continue + + if utils.is_sequence(hyperparameter_description['data']): + for data_reference in hyperparameter_description['data']: + data_references.add(data_reference) + else: + data_references.add(hyperparameter_description['data']) + + return data_references + + def get_output_data_references(self) -> typing.AbstractSet[str]: + data_references = set() + + for output_id in self.outputs: + data_references.add('steps.{i}.{output_id}'.format(i=self.index, output_id=output_id)) + + return data_references + + @classmethod + def from_json_structure(cls: typing.Type[SP], step_description: typing.Dict, *, resolver: typing.Optional[Resolver] = None) -> SP: + step = cls(step_description['primitive'], resolver=resolver) + + for argument_name, argument_description in step_description.get('arguments', {}).items(): + argument_type = metadata_base.ArgumentType[argument_description['type']] + step.add_argument(argument_name, argument_type, argument_description['data']) + + for output_description in step_description.get('outputs', []): + step.add_output(output_description['id']) + + for hyperparameter_name, hyperparameter_description in step_description.get('hyperparams', {}).items(): + argument_type = metadata_base.ArgumentType[hyperparameter_description['type']] + + # If "primitive" is not available, we do not parse the value and we leave it in its JSON form. + if argument_type == metadata_base.ArgumentType.VALUE and step.primitive is not None: + hyperparams = step.get_primitive_hyperparams() + + if hyperparameter_name not in hyperparams.configuration: + raise exceptions.InvalidArgumentValueError( + "Unknown hyper-parameter name '{name}' for primitive {primitive}.".format( + name=hyperparameter_name, + primitive=step.primitive, + ), + ) + + data = hyperparams.configuration[hyperparameter_name].value_from_json_structure(hyperparameter_description['data']) + + else: + data = hyperparameter_description['data'] + + step.add_hyperparameter(hyperparameter_name, argument_type, data) + + for user_description in step_description.get('users', []): + step.add_user(user_description) + + return step + + def _output_to_json_structure(self, output_id: str) -> typing.Dict: + return {'id': output_id} + + def _hyperparameter_to_json_structure(self, hyperparameter_name: str) -> typing.Dict: + hyperparameter_description = copy.copy(self.hyperparams[hyperparameter_name]) + + hyperparameter_description['type'] = hyperparameter_description['type'].name + + # If "primitive" is not available, we have the value already in its JSON form. + if hyperparameter_description['type'] == metadata_base.ArgumentType.VALUE and self.primitive is not None: + hyperparams = self.get_primitive_hyperparams() + + if hyperparameter_name not in hyperparams.configuration: + raise exceptions.InvalidArgumentValueError( + "Unknown hyper-parameter name '{name}' for primitive {primitive}.".format( + name=hyperparameter_name, + primitive=self.primitive, + ), + ) + + hyperparameter_description['data'] = hyperparams.configuration[hyperparameter_name].value_to_json_structure(hyperparameter_description['data']) + + return hyperparameter_description + + def _argument_to_json_structure(self, argument_name: str) -> typing.Dict: + argument_description = copy.copy(self.arguments[argument_name]) + + argument_description['type'] = argument_description['type'].name + + return argument_description + + def to_json_structure(self) -> typing.Dict: + if self.primitive is None: + primitive_description = self.primitive_description + else: + primitive_metadata = self.primitive.metadata.query() + primitive_description = { + 'id': primitive_metadata['id'], + 'version': primitive_metadata['version'], + 'python_path': primitive_metadata['python_path'], + 'name': primitive_metadata['name'], + } + + if 'digest' in primitive_metadata: + primitive_description['digest'] = primitive_metadata['digest'] + + step_description = { + 'type': self.get_step_type().name, + 'primitive': primitive_description, + } + + if self.arguments: + step_description['arguments'] = {argument_name: self._argument_to_json_structure(argument_name) for argument_name in self.arguments.keys()} + + if self.outputs: + step_description['outputs'] = [self._output_to_json_structure(output_id) for output_id in self.outputs] + + if self.hyperparams: + hyperparams = {} + + for hyperparameter_name in self.hyperparams.keys(): + hyperparams[hyperparameter_name] = self._hyperparameter_to_json_structure(hyperparameter_name) + + step_description['hyperparams'] = hyperparams + + if self.users: + step_description['users'] = self.users + + return step_description + + def get_primitive_id(self) -> str: + if self.primitive is not None: + return self.primitive.metadata.query()['id'] + else: + return self.primitive_description['id'] + + +SS = typing.TypeVar('SS', bound='SubpipelineStep') + + +class SubpipelineStep(StepBase): + def __init__(self, pipeline_description: typing.Dict = None, *, pipeline: 'Pipeline' = None, resolver: typing.Optional[Resolver] = None) -> None: + super().__init__(resolver=resolver) + + if pipeline is None: + if pipeline_description is None: + raise exceptions.InvalidArgumentValueError("\"pipeline_description\" and \"pipeline\" arguments are both None.") + + pipeline = self.resolver.get_pipeline(pipeline_description) + elif pipeline_description is not None: + raise exceptions.InvalidArgumentValueError("\"pipeline_description\" and \"pipeline\" arguments cannot be both provided.") + + if pipeline is None: + # If still "None" it means resolver returned "None". + # We just store provided pipeline description. + self.pipeline_description = pipeline_description + self.pipeline = None + else: + self.pipeline_description = None + self.pipeline = pipeline + + self.inputs: typing.List[str] = [] + self.outputs: typing.List[typing.Optional[str]] = [] + + @classmethod + def get_step_type(cls) -> metadata_base.PipelineStepType: + return metadata_base.PipelineStepType.SUBPIPELINE + + def add_input(self, data_reference: str) -> None: + if self.pipeline is not None: + if len(self.inputs) == len(self.pipeline.inputs): + raise exceptions.InvalidArgumentValueError("All pipeline's inputs are already provided.") + + self.inputs.append(data_reference) + + def add_output(self, output_id: typing.Optional[str]) -> None: + """ + Define an output from this step. + + Underlying pipeline can have multiple outputs but not all have to be + defined as outputs of the step. They can be skipped using ``None``. + + Parameters + ---------- + output_id: + ID to be used in the data reference, mapping pipeline's outputs in order. + If ``None`` this pipeline's output is ignored and not mapped to a data reference. + """ + + if output_id is not None: + if output_id in self.outputs: + raise exceptions.InvalidArgumentValueError("Output with ID '{output_id}' already exists.".format(output_id=output_id)) + + if self.pipeline is not None: + if len(self.outputs) == len(self.pipeline.outputs): + raise exceptions.InvalidArgumentValueError("All pipeline's outputs are already mapped.") + + self.outputs.append(output_id) + + def check_add(self, existing_steps: 'typing.Sequence[StepBase]', available_data_references: typing.AbstractSet[str]) -> None: + # Order of steps can be arbitrary during execution (given that inputs for a step are available), but we still + # want some partial order during construction. We want that arguments can already be satisfied by existing steps. + for data_reference in self.inputs: + if not isinstance(data_reference, str): + raise exceptions.InvalidArgumentTypeError("Input data reference '{data_reference}' is not a string.".format(data_reference=data_reference)) + elif data_reference not in available_data_references: + raise exceptions.InvalidPipelineError("Input data reference '{data_reference}' is not among available data references.".format(data_reference=data_reference)) + + # TODO: Check that all inputs are satisfied? + + def get_free_hyperparams(self) -> typing.Sequence: + if self.pipeline is None: + raise exceptions.InvalidStateError("Pipeline has not been resolved.") + + return self.pipeline.get_free_hyperparams() + + def get_all_hyperparams(self) -> typing.Sequence: + if self.pipeline is None: + raise exceptions.InvalidStateError("Pipeline has not been resolved.") + + return self.pipeline.get_all_hyperparams() + + def get_input_data_references(self) -> typing.AbstractSet[str]: + return set(self.inputs) + + def get_output_data_references(self) -> typing.AbstractSet[str]: + data_references = set() + + for output_id in self.outputs: + if output_id is not None: + data_references.add('steps.{i}.{output_id}'.format(i=self.index, output_id=output_id)) + + return data_references + + @classmethod + def from_json_structure(cls: typing.Type[SS], step_description: typing.Dict, *, resolver: Resolver = None) -> SS: + step = cls(step_description['pipeline'], resolver=resolver) + + for input_description in step_description['inputs']: + step.add_input(input_description['data']) + + for output_description in step_description['outputs']: + step.add_output(output_description.get('id', None)) + + return step + + def _input_to_json_structure(self, data_reference: str) -> typing.Dict: + return {'data': data_reference} + + def _output_to_json_structure(self, output_id: typing.Optional[str]) -> typing.Dict: + if output_id is None: + return {} + else: + return {'id': output_id} + + def to_json_structure(self, *, nest_subpipelines: bool = False) -> typing.Dict: + if nest_subpipelines: + if self.pipeline is None: + raise exceptions.InvalidStateError("Pipeline has not been resolved.") + + pipeline_description = self.pipeline._to_json_structure(nest_subpipelines=True) + elif self.pipeline is None: + pipeline_description = self.pipeline_description + else: + pipeline_description = { + 'id': self.pipeline.id, + 'digest': self.pipeline.get_digest(), + } + + step_description = { + 'type': self.get_step_type().name, + 'pipeline': pipeline_description, + 'inputs': [self._input_to_json_structure(data_reference) for data_reference in self.inputs], + 'outputs': [self._output_to_json_structure(output_id) for output_id in self.outputs], + } + + return step_description + + def get_pipeline_id(self) -> str: + if self.pipeline is not None: + return self.pipeline.id + else: + return self.pipeline_description['id'] + + +SL = typing.TypeVar('SL', bound='PlaceholderStep') + + +class PlaceholderStep(StepBase): + def __init__(self, resolver: Resolver = None) -> None: + super().__init__(resolver=resolver) + + self.inputs: typing.List[str] = [] + self.outputs: typing.List[str] = [] + + @classmethod + def get_step_type(cls) -> metadata_base.PipelineStepType: + return metadata_base.PipelineStepType.PLACEHOLDER + + def add_input(self, data_reference: str) -> None: + self.inputs.append(data_reference) + + def add_output(self, output_id: str) -> None: + if output_id in self.outputs: + raise exceptions.InvalidArgumentValueError("Output with ID '{output_id}' already exists.".format(output_id=output_id)) + + self.outputs.append(output_id) + + def check_add(self, existing_steps: 'typing.Sequence[StepBase]', available_data_references: typing.AbstractSet[str]) -> None: + # Order of steps can be arbitrary during execution (given that inputs for a step are available), but we still + # want some partial order during construction. We want that arguments can already be satisfied by existing steps. + for data_reference in self.inputs: + if not isinstance(data_reference, str): + raise exceptions.InvalidArgumentTypeError("Input data reference '{data_reference}' is not a string.".format(data_reference=data_reference)) + elif data_reference not in available_data_references: + raise exceptions.InvalidArgumentValueError("Input data reference '{data_reference}' is not among available data references.".format(data_reference=data_reference)) + + def get_free_hyperparams(self) -> typing.Sequence: + return [] + + def get_all_hyperparams(self) -> typing.Sequence: + return [] + + def get_input_data_references(self) -> typing.AbstractSet[str]: + return set(self.inputs) + + def get_output_data_references(self) -> typing.AbstractSet[str]: + data_references = set() + + for output_id in self.outputs: + data_references.add('steps.{i}.{output_id}'.format(i=self.index, output_id=output_id)) + + return data_references + + @classmethod + def from_json_structure(cls: typing.Type[SL], step_description: typing.Dict, *, resolver: Resolver = None) -> SL: + step = cls(resolver=resolver) + + for input_description in step_description['inputs']: + step.add_input(input_description['data']) + + for output_description in step_description['outputs']: + step.add_output(output_description['id']) + + return step + + def _input_to_json_structure(self, data_reference: str) -> typing.Dict: + return {'data': data_reference} + + def _output_to_json_structure(self, output_id: str) -> typing.Dict: + return {'id': output_id} + + def to_json_structure(self) -> typing.Dict: + step_description = { + 'type': self.get_step_type().name, + 'inputs': [self._input_to_json_structure(data_reference) for data_reference in self.inputs], + 'outputs': [self._output_to_json_structure(output_id) for output_id in self.outputs], + } + + return step_description + + +P = typing.TypeVar('P', bound='Pipeline') + + +class Pipeline: + """ + Class representing a pipeline. + + Attributes + ---------- + id: + A unique ID to identify this pipeline. + created: + Timestamp of pipeline creation in UTC timezone. + source: + Description of source. + name: + Name of the pipeline. + description: + Description of the pipeline. + users: + Users associated with the pipeline. + inputs: + A sequence of input descriptions which provide names for pipeline inputs. + outputs: + A sequence of output descriptions which provide data references for pipeline outputs. + steps: + A sequence of steps defining this pipeline. + + Parameters + ---------- + pipeline_id: + Optional ID for the pipeline. If not provided, it is automatically generated. + context: + DEPRECATED: argument ignored. + created: + Optional timestamp of pipeline creation in UTC timezone. If not provided, the current time will be used. + source: + Description of source. Optional. + name: + Name of the pipeline. Optional. + description: + Description of the pipeline. Optional. + """ + + id: str + created: datetime.datetime + source: typing.Dict + name: str + description: str + users: typing.List[typing.Dict] + inputs: typing.List[typing.Dict] + outputs: typing.List[typing.Dict] + steps: typing.List[StepBase] + + @deprecate.arguments('context', message="argument ignored") + def __init__( + self, pipeline_id: str = None, *, context: metadata_base.Context = None, + created: datetime.datetime = None, source: typing.Dict = None, name: str = None, + description: str = None + ) -> None: + if pipeline_id is None: + pipeline_id = str(uuid.uuid4()) + + if created is None: + created = datetime.datetime.now(datetime.timezone.utc) + elif created.tzinfo is None or created.tzinfo.utcoffset(created) is None: + raise exceptions.InvalidArgumentValueError("'created' timestamp is missing timezone information.") + else: + # Convert to UTC timezone and set "tzinfo" to "datetime.timezone.utc". + created = created.astimezone(datetime.timezone.utc) + + self.id = pipeline_id + self.created = created + self.source = source + self.name = name + self.description = description + + self.inputs: typing.List[typing.Dict] = [] + self.outputs: typing.List[typing.Dict] = [] + self.steps: typing.List[StepBase] = [] + self.users: typing.List[typing.Dict] = [] + + def add_input(self, name: str = None) -> str: + """ + Add an input to the pipeline. + + Parameters + ---------- + name: + Optional human friendly name for the input. + + Returns + ------- + Data reference for the input added. + """ + + input_description = {} + + if name is not None: + input_description['name'] = name + + self.inputs.append(input_description) + + return 'inputs.{i}'.format(i=len(self.inputs) - 1) + + def add_output(self, data_reference: str, name: str = None) -> str: + """ + Add an output to the pipeline. + + Parameters + ---------- + data_reference: + Data reference to use as an output. + name: + Optional human friendly name for the output. + + Returns + ------- + Data reference for the output added. + """ + + if data_reference not in self.get_available_data_references(): + raise exceptions.InvalidArgumentValueError("Invalid data reference '{data_reference}'.".format(data_reference=data_reference)) + + output_description = { + 'data': data_reference, + } + + if name is not None: + output_description['name'] = name + + self.outputs.append(output_description) + + return 'outputs.{i}'.format(i=len(self.outputs) - 1) + + def add_step(self, step: StepBase) -> None: + """ + Add a step to the sequence of steps in the pipeline. + + Parameters + ---------- + step: + A step to add. + """ + + if not isinstance(step, StepBase): + raise exceptions.InvalidArgumentTypeError("Step is not an instance of StepBase.") + + step.set_index(len(self.steps)) + + try: + step.check_add(self.steps, self.get_available_data_references()) + except Exception as error: + raise exceptions.InvalidArgumentValueError("Cannot add step {step_index}.".format(step_index=step.index)) from error + + self.steps.append(step) + + def replace_step(self, index: int, replacement_step: StepBase) -> None: + """ + Replace an existing step (generally a placeholder) with a new step + (generally a subpipeline). It makes sure that all inputs are available + at that point in the pipeline, and all outputs needed later from this + step stay available after replacement. + + If the old pipeline (one before the step being replaced) has already been + made public under some ID, make sure that new pipeline (one with replaced + step) has a new different ID before making it public. + + Parameters + ---------- + index: + Index of the step to replace. + replacement_step: + A new step. + """ + + # TODO: Handle the case when there is a primitive reference to this step (which is a primitive step in such case). + # If we are replacing it with a sub-pipeline or placeholder, we should fail. + + if not 0 <= index < len(self.steps): + raise exceptions.InvalidArgumentValueError("Step index does not point to an existing step.") + + if not isinstance(replacement_step, StepBase): + raise exceptions.InvalidArgumentTypeError("Step is not an instance of StepBase.") + + replacement_step.set_index(index) + + try: + replacement_step.check_add(self.steps[0:index], self.get_available_data_references(index)) + except Exception as error: + raise exceptions.InvalidArgumentValueError("Cannot replace step {step_index}.".format(step_index=index)) from error + + # Which inputs are needed later on? + later_input_data_references: typing.Set[str] = set() + for step in self.steps[index + 1:]: + later_input_data_references.update(step.get_input_data_references()) + + # Compute which data references needed later are contributed by existing step? + used_output_data_references = self.steps[index].get_output_data_references() & later_input_data_references + + # A replacement step has to contribute at least those data references as well. + if not replacement_step.get_output_data_references() >= used_output_data_references: + raise exceptions.InvalidArgumentValueError("Cannot replace step {step_index}. Replacement step is not providing needed outputs: {missing_outputs}".format( + step_index=index, + missing_outputs=sorted(used_output_data_references - replacement_step.get_output_data_references()), + )) + + self.steps[index] = replacement_step + + def add_user(self, user_description: typing.Dict) -> None: + """ + Add a description of user to a list of users associated with the pipeline. + + Parameters + ---------- + user_description: + User description. + """ + + if 'id' not in user_description: + raise exceptions.InvalidArgumentValueError("User description is missing user ID.") + + self.users.append(user_description) + + def get_free_hyperparams(self) -> typing.Sequence: + """ + Returns pipeline's hyper-parameters which have not been fixed by the pipeline as + a list of free hyper-parameters for each step, in order of steps. + + Returns + ------- + A list of hyper-parameters configuration for free hyper-parameters for each step. + """ + + return [step.get_free_hyperparams() for step in self.steps] + + def get_all_hyperparams(self) -> typing.Sequence: + """ + Returns pipeline's hyper-parameters as a list of hyper-parameters + for each step, in order of steps. + + Returns + ------- + A list of hyper-parameters configuration for all hyper-parameters for each step. + """ + + return [step.get_all_hyperparams() for step in self.steps] + + def has_placeholder(self) -> bool: + """ + Returns ``True`` if the pipeline has a placeholder step, in the pipeline itself, or any subpipeline. + + Returns + ------- + ``True`` if the pipeline has a placeholder step. + """ + + for step in self.steps: + if isinstance(step, PlaceholderStep): + return True + elif isinstance(step, SubpipelineStep): + if step.pipeline is None: + raise exceptions.InvalidStateError("Pipeline has not been resolved.") + elif step.pipeline.has_placeholder(): + return True + + return False + + def get_available_data_references(self, for_step: int = None) -> typing.AbstractSet[str]: + """ + Returns a set of data references provided by existing steps (and pipeline inputs). + + Those data references can be used by consequent steps as their inputs. + + Attributes + ---------- + for_step: + Instead of using all existing steps, use only steps until ``for_step`` step. + + Returns + ------- + A set of data references. + """ + + data_references = set() + + for i, input_description in enumerate(self.inputs): + data_references.add('inputs.{i}'.format(i=i)) + + for step in self.steps[0:for_step]: + output_data_references = step.get_output_data_references() + + existing_data_references = data_references & output_data_references + if existing_data_references: + raise exceptions.InvalidPipelineError("Steps have overlapping output data references: {existing_data_references}".format(existing_data_references=existing_data_references)) + + data_references.update(output_data_references) + + return data_references + + @deprecate.function(message="use get_producing_outputs method instead") + def get_exposable_outputs(self) -> typing.AbstractSet[str]: + return self.get_producing_outputs() + + def get_producing_outputs(self) -> typing.AbstractSet[str]: + """ + Returns a set of recursive data references of all values produced by the pipeline + during its run. + + This represents outputs of each step of the pipeline, the outputs of the pipeline + itself, but also exposable outputs of any sub-pipeline. The latter are prefixed with + the step prefix, e.g., ``steps.1.steps.4.produce`` is ``steps.4.produce`` output + of a sub-pipeline step with index 1. + + Outputs of sub-pipelines are represented twice, as an output of the step and + as an output of the sub-pipeline. This is done because not all outputs of a sub-pipeline + are necessary exposed as an output of a step because they might not be used in + the outer pipeline, but the sub-pipeline still defines them. + + A primitive might have additional produce methods which could be called but they + are not listed among step's outputs. Data references related to those produce + methods are not returned. + + Returns + ------- + A set of recursive data references. + """ + + exposable_outputs: typing.Set[str] = set() + + for step_index, step in enumerate(self.steps): + output_data_references = set(step.get_output_data_references()) + + if isinstance(step, SubpipelineStep): + for exposable_output in step.pipeline.get_producing_outputs(): + output_data_references.add('steps.{step_index}.{exposable_output}'.format( + step_index=step_index, + exposable_output=exposable_output, + )) + + existing_data_references = exposable_outputs & output_data_references + if existing_data_references: + raise exceptions.InvalidPipelineError("Steps have overlapping exposable data references: {existing_data_references}".format(existing_data_references=existing_data_references)) + + exposable_outputs.update(output_data_references) + + for i, output_description in enumerate(self.outputs): + exposable_outputs.add('outputs.{i}'.format(i=i)) + + return exposable_outputs + + def check(self, *, allow_placeholders: bool = False, standard_pipeline: bool = True, input_types: typing.Dict[str, type] = None) -> None: + """ + Check if the pipeline is a valid pipeline. + + It supports checking against non-resolved primitives and pipelines, but in that case + checking will be very limited. Make sure you used a strict resolver to assure + full checking of this pipeline and any sub-pipelines. + + Raises an exception if check fails. + + Parameters + ---------- + allow_placeholders: + Do we allow placeholders in a pipeline? + standard_pipeline: + Check it as a standard pipeline (inputs are Dataset objects, output is a DataFrame)? + input_types: + A map of types available as inputs. If provided, overrides ``standard_pipeline``. + """ + + self._check(allow_placeholders, standard_pipeline, input_types) + + def _check(self, allow_placeholders: bool, standard_pipeline: bool, input_types: typing.Optional[typing.Dict[str, type]]) -> typing.Sequence[TypeInfo]: + # Generating JSON also checks it against the pipeline schema. + # We do not set "nest_subpipelines" because recursive checks are done + # by this method's recursive call (when sub-pipelines are resolved). + self.to_json_structure() + + # Map between available data references and their types. + environment: typing.Dict[str, TypeInfo] = {} + + # Inputs are never singleton. + if input_types is not None: + if len(self.inputs) != len(input_types): + raise exceptions.InvalidPipelineError("Pipeline '{pipeline_id}' accepts {inputs} input(s), but {input_types} provided.".format( + pipeline_id=self.id, + inputs=len(self.inputs), + input_types=len(input_types), + )) + + for data_reference, structural_type in input_types.items(): + environment[data_reference] = TypeInfo(structural_type, False) + elif standard_pipeline: + for i, input_description in enumerate(self.inputs): + environment['inputs.{i}'.format(i=i)] = TypeInfo(container.Dataset, False) + else: + for i, input_description in enumerate(self.inputs): + # We do not really know what the inputs are. + environment['inputs.{i}'.format(i=i)] = TypeInfo(typing.Any, False) # type: ignore + + for step_index, step in enumerate(self.steps): + assert step_index == step.index + + if isinstance(step, PlaceholderStep): + if not allow_placeholders: + raise exceptions.InvalidPipelineError("Step {step_index} of pipeline '{pipeline_id}' is a placeholder but there should be no placeholders.".format( + step_index=step_index, + pipeline_id=self.id, + )) + + for data_reference in step.inputs: + # This is checked already during pipeline construction in "check_add". + assert data_reference in environment + + for data_reference in step.get_output_data_references(): + # This is checked already during pipeline construction in "add_output". + assert data_reference not in environment + + # We cannot really know a type of the placeholder output given current pipeline description. + environment[data_reference] = TypeInfo(typing.Any, None) # type: ignore + + elif isinstance(step, SubpipelineStep): + subpipeline_input_types: typing.Dict[str, type] = {} + for i, data_reference in enumerate(step.inputs): + # This is checked already during pipeline construction in "check_add". + assert data_reference in environment + + input_data_reference = 'inputs.{i}'.format(i=i) + + assert input_data_reference not in subpipeline_input_types + subpipeline_input_types[input_data_reference] = environment[data_reference].structural_type + + # Resolving is optional. Of course full checking is not really possible without resolving. + if step.pipeline is not None: + outputs_types = step.pipeline._check(allow_placeholders, False, subpipeline_input_types) + + for i, output_id in enumerate(step.outputs): + if output_id is not None: + output_data_reference = 'steps.{i}.{output_id}'.format(i=step.index, output_id=output_id) + + # This is checked already during pipeline construction in "add_output". + assert output_data_reference not in environment + + if step.pipeline is not None: + environment[output_data_reference] = outputs_types[i] + else: + # We cannot really know a type of the output without resolving. + environment[output_data_reference] = TypeInfo(typing.Any, None) # type: ignore + + elif isinstance(step, PrimitiveStep): + if step.primitive is not None: + primitive_metadata = step.primitive.metadata.query() + primitive_methods = primitive_metadata['primitive_code'].get('instance_methods', {}) + primitive_arguments = primitive_metadata['primitive_code'].get('arguments', {}) + + for argument_name, argument_description in step.arguments.items(): + # This is checked already during pipeline construction in "check_add". + if utils.is_sequence(argument_description['data']): + for data_reference in argument_description['data']: + assert data_reference in environment + else: + assert argument_description['data'] in environment + + if step.primitive is not None: + # This is checked already during pipeline construction in "add_argument". + assert argument_name in primitive_arguments + + if argument_description['type'] == metadata_base.ArgumentType.DATA: + type_info = environment[argument_description['data']] + + # The error is only if it is exactly "False". If it is "None", we do not know and we do not want any false positives. + if type_info.singleton == False: # noqa + raise exceptions.InvalidPipelineError( + "Argument '{argument_name}' of step {step_index} of pipeline '{pipeline_id}' is singleton data, but available data reference is not.".format( + argument_name=argument_name, + step_index=step_index, + pipeline_id=self.id, + ), + ) + + # We cannot really check if types match because we do not know + # the type of elements from just container structural type. + elif step.primitive is not None: + assert argument_description['type'] == metadata_base.ArgumentType.CONTAINER, argument_description['type'] + + if utils.is_sequence(argument_description['data']): + if not utils.is_subclass(primitive_arguments[argument_name]['type'], container.List): + raise exceptions.InvalidPipelineError( + "Argument '{argument_name}' of step {step_index} of pipeline '{pipeline_id}' should have type 'List' to support getting a list of values, " + "but it has type '{argument_type}'.".format( + argument_name=argument_name, + step_index=step_index, + pipeline_id=self.id, + argument_type=primitive_arguments[argument_name]['type'], + ), + ) + + else: + type_info = environment[argument_description['data']] + + if type_info.structural_type is typing.Any or primitive_arguments[argument_name]['type'] is typing.Any: + # No type information. + pass + elif not utils.is_subclass(type_info.structural_type, primitive_arguments[argument_name]['type']): + raise exceptions.InvalidPipelineError( + "Argument '{argument_name}' of step {step_index} of pipeline '{pipeline_id}' has type '{argument_type}', but it is getting a type '{input_type}'.".format( + argument_name=argument_name, + step_index=step_index, + pipeline_id=self.id, + argument_type=primitive_arguments[argument_name]['type'], + input_type=type_info.structural_type, + ), + ) + + if step.primitive is not None: + hyperparams = step.get_primitive_hyperparams() + + for hyperparameter_name, hyperparameter_description in step.hyperparams.items(): + # This is checked already during pipeline construction in "add_hyperparameter". + assert hyperparameter_name in hyperparams.configuration + + if hyperparameter_description['type'] == metadata_base.ArgumentType.DATA: + if utils.is_sequence(hyperparameter_description['data']): + data_references = hyperparameter_description['data'] + else: + data_references = typing.cast(typing.Sequence, [hyperparameter_description['data']]) + + for data_reference in data_references: + # This is checked already during pipeline construction in "check_add". + assert data_reference in environment + + if not isinstance(data_reference, str): + raise exceptions.InvalidArgumentTypeError("Hyper-parameter data reference '{data_reference}' is not a string.".format(data_reference=data_reference)) + + type_info = environment[data_reference] + + # The error is only if it is exactly "False". If it is "None", we do not know and we do not want any false positives. + if type_info.singleton == False: # noqa + raise exceptions.InvalidPipelineError( + "Hyper-parameter '{hyperparameter_name}' of step {step_index} of pipeline '{pipeline_id}' is singleton data, " + "but available data reference '{data_reference}' is not.".format( + hyperparameter_name=hyperparameter_name, + step_index=step_index, + pipeline_id=self.id, + data_reference=data_reference, + ), + ) + + # We cannot really check if types match because we do not know + # the type of elements from just container structural type. + + elif hyperparameter_description['type'] == metadata_base.ArgumentType.PRIMITIVE: + if utils.is_sequence(hyperparameter_description['data']): + primitive_references = hyperparameter_description['data'] + else: + primitive_references = typing.cast(typing.Sequence, [hyperparameter_description['data']]) + + primitives = [] + for primitive_reference in primitive_references: + # This is checked already during pipeline construction in "check_add". + assert 0 <= primitive_reference < step_index + + primitive_step = self.steps[primitive_reference] + + if not isinstance(primitive_step, PrimitiveStep): + raise exceptions.InvalidPipelineError( + "Hyper-parameter '{hyperparameter_name}' of step {step_index} of pipeline '{pipeline_id}' " + "does not point to a primitive step (step {primitive_reference}).".format( + hyperparameter_name=hyperparameter_name, + step_index=step_index, + pipeline_id=self.id, + primitive_reference=primitive_reference, + ), + ) + + if primitive_step.primitive is None: + primitives.append(typing.Any) + else: + primitives.append(primitive_step.primitive) + + if utils.is_sequence(hyperparameter_description['data']): + if not hyperparams.configuration[hyperparameter_name].can_accept_value_type(primitives): + raise exceptions.InvalidPipelineError( + "Hyper-parameter '{hyperparameter_name}' of step {step_index} of pipeline '{pipeline_id}' cannot accept primitives {primitives}.".format( + hyperparameter_name=hyperparameter_name, + step_index=step_index, + pipeline_id=self.id, + primitives=primitives, + ), + ) + else: + assert len(primitives) == 1 + + if not hyperparams.configuration[hyperparameter_name].can_accept_value_type(primitives[0]): + raise exceptions.InvalidPipelineError( + "Hyper-parameter '{hyperparameter_name}' of step {step_index} of pipeline '{pipeline_id}' cannot accept a primitive '{primitive}'.".format( + hyperparameter_name=hyperparameter_name, + step_index=step_index, + pipeline_id=self.id, + primitive=primitives[0], + ), + ) + + elif hyperparameter_description['type'] == metadata_base.ArgumentType.CONTAINER: + # This is checked already during pipeline construction in "check_add". + assert hyperparameter_description['data'] in environment + + type_info = environment[hyperparameter_description['data']] + + if not hyperparams.configuration[hyperparameter_name].can_accept_value_type(type_info.structural_type): + raise exceptions.InvalidPipelineError( + "Hyper-parameter '{hyperparameter_name}' of step {step_index} of pipeline '{pipeline_id}' cannot accept a value of type '{input_type}'.".format( + hyperparameter_name=hyperparameter_name, + step_index=step_index, + pipeline_id=self.id, + input_type=type_info.structural_type, + ), + ) + + elif hyperparameter_description['type'] == metadata_base.ArgumentType.VALUE: + # "VALUE" hyper-parameter value has already been checked in "add_hyperparameter". + pass + + else: + raise exceptions.UnexpectedValueError("Unknown hyper-parameter type: {hyperparameter_type}".format(hyperparameter_type=hyperparameter_description['type'])) + + for output_id in step.outputs: + output_data_reference = 'steps.{i}.{output_id}'.format(i=step.index, output_id=output_id) + + assert output_data_reference not in environment + + if step.primitive is not None: + # This is checked already during pipeline construction in "add_output". + assert output_id in primitive_methods + + method_description = primitive_methods[output_id] + + produce_type = method_description['returns'] + + # This should be checked by some other part of the code (like primitive validation). + assert issubclass(produce_type, base.CallResult), produce_type + + output_type = utils.get_type_arguments(produce_type)[base.T] # type: ignore + + environment[output_data_reference] = TypeInfo(output_type, method_description.get('singleton', False)) + else: + # We cannot really know a type of the output without resolving. + environment[output_data_reference] = TypeInfo(typing.Any, None) # type: ignore + + else: + raise exceptions.UnexpectedValueError("Unknown step type: {step_type}".format(step_type=type(step))) + + outputs_types = [] + for output_description in self.outputs: + # This is checked already during pipeline construction in "add_output". + assert output_description['data'] in environment, output_description['data'] + + outputs_types.append(environment[output_description['data']]) + + return outputs_types + + @classmethod + def from_yaml(cls: typing.Type[P], string_or_file: typing.Union[str, typing.IO[typing.Any]], *, resolver: typing.Optional[Resolver] = None, + strict_digest: bool = False) -> P: + description = utils.yaml_load(string_or_file) + + return cls.from_json_structure(description, resolver=resolver, strict_digest=strict_digest) + + @classmethod + def from_json(cls: typing.Type[P], string_or_file: typing.Union[str, typing.IO[typing.Any]], *, resolver: typing.Optional[Resolver] = None, + strict_digest: bool = False) -> P: + if isinstance(string_or_file, str): + description = json.loads(string_or_file) + else: + description = json.load(string_or_file) + + return cls.from_json_structure(description, resolver=resolver, strict_digest=strict_digest) + + @classmethod + def _get_step_class(cls, step_type: typing.Any) -> StepBase: + if step_type == metadata_base.PipelineStepType.PRIMITIVE: + return PrimitiveStep + elif step_type == metadata_base.PipelineStepType.SUBPIPELINE: + return SubpipelineStep + elif step_type == metadata_base.PipelineStepType.PLACEHOLDER: + return PlaceholderStep + else: + raise exceptions.InvalidArgumentValueError("Invalid step type '{step_type}'.".format(step_type=step_type)) + + @classmethod + def _get_source(cls, pipeline_description: typing.Dict) -> typing.Optional[typing.Dict]: + return pipeline_description.get('source', None) + + @classmethod + def _canonical_pipeline_description(cls, pipeline_description: typing.Dict) -> typing.Dict: + """ + Before we compute digest of the pipeline description, we have to convert it to a + canonical structure. + + Currently, this is just removing any sub-pipelines the description might have nested. + """ + + pipeline_description = copy.deepcopy(pipeline_description) + + for step_description in pipeline_description['steps']: + if step_description['type'] == metadata_base.PipelineStepType.SUBPIPELINE: + new_description = { + 'id': step_description['pipeline']['id'], + } + if 'digest' in step_description['pipeline']: + new_description['digest'] = step_description['pipeline']['digest'] + step_description['pipeline'] = new_description + + # Not really part of pipeline schema, but used in evaluation. Digest should + # not be computed using it, if it was passed in. We also do not want to store + # it in metalearning database as part of the pipeline document so that we are + # not storing same pipeline multiple times, just with different rank values. + if 'pipeline_rank' in pipeline_description: + del pipeline_description['pipeline_rank'] + + return pipeline_description + + @classmethod + def from_json_structure(cls: typing.Type[P], pipeline_description: typing.Dict, *, resolver: typing.Optional[Resolver] = None, + strict_digest: bool = False) -> P: + PIPELINE_SCHEMA_VALIDATOR.validate(pipeline_description) + + if 'digest' in pipeline_description: + digest = utils.compute_digest(cls._canonical_pipeline_description(pipeline_description)) + + if digest != pipeline_description['digest']: + if strict_digest: + raise exceptions.DigestMismatchError( + "Digest for pipeline '{pipeline_id}' does not match a computed one. Provided digest: {pipeline_digest}. Computed digest: {new_pipeline_digest}.".format( + pipeline_id=pipeline_description['id'], + pipeline_digest=pipeline_description['digest'], + new_pipeline_digest=digest, + ) + ) + else: + logger.warning( + "Digest for pipeline '%(pipeline_id)s' does not match a computed one. Provided digest: %(pipeline_digest)s. Computed digest: %(new_pipeline_digest)s.", + { + 'pipeline_id': pipeline_description['id'], + 'pipeline_digest': pipeline_description['digest'], + 'new_pipeline_digest': digest, + }, + ) + + # If no timezone information is provided, we assume UTC. If there is timezone information, + # we convert timestamp to UTC in the constructor of "Pipeline". + created = dateparser.parse(pipeline_description['created'], settings={'TIMEZONE': 'UTC'}) + source = cls._get_source(pipeline_description) + + pipeline = cls( + pipeline_id=pipeline_description['id'], created=created, source=source, + name=pipeline_description.get('name', None), description=pipeline_description.get('description', None) + ) + + for input_description in pipeline_description['inputs']: + pipeline.add_input(input_description.get('name', None)) + + for step_description in pipeline_description['steps']: + step = cls._get_step_class(step_description['type']).from_json_structure(step_description, resolver=resolver) + pipeline.add_step(step) + + for output_description in pipeline_description['outputs']: + pipeline.add_output(output_description['data'], output_description.get('name', None)) + + for user_description in pipeline_description.get('users', []): + pipeline.add_user(user_description) + + return pipeline + + def _inputs_to_json_structure(self) -> typing.Sequence[typing.Dict]: + return self.inputs + + def _outputs_to_json_structure(self) -> typing.Sequence[typing.Dict]: + return self.outputs + + def _source_to_json_structure(self) -> typing.Optional[typing.Dict]: + return self.source + + def _users_to_json_structure(self) -> typing.Optional[typing.Sequence[typing.Dict]]: + # Returns "None" if an empty list. + return self.users or None + + def _to_json_structure(self, *, nest_subpipelines: bool = False) -> typing.Dict: + # Timestamp should already be in UTC and in particular "tzinfo" should be "datetime.timezone.utc". + assert self.created.tzinfo == datetime.timezone.utc, self.created + # We remove timezone information before formatting to not have "+00:00" added and + # we then manually add "Z" instead (which has equivalent meaning). + created = self.created.replace(tzinfo=None).isoformat('T') + 'Z' + + pipeline_description: typing.Dict = { + 'id': self.id, + 'schema': PIPELINE_SCHEMA_VERSION, + 'created': created, + 'inputs': self._inputs_to_json_structure(), + 'outputs': self._outputs_to_json_structure(), + 'steps': [], + } + + source = self._source_to_json_structure() + if source is not None: + pipeline_description['source'] = source + + users = self._users_to_json_structure() + if users is not None: + pipeline_description['users'] = users + + if self.name is not None: + pipeline_description['name'] = self.name + if self.description is not None: + pipeline_description['description'] = self.description + + for step in self.steps: + if isinstance(step, SubpipelineStep): + pipeline_description['steps'].append(step.to_json_structure(nest_subpipelines=nest_subpipelines)) + else: + pipeline_description['steps'].append(step.to_json_structure()) + + pipeline_description['digest'] = utils.compute_digest(self._canonical_pipeline_description(pipeline_description)) + + return pipeline_description + + def to_json_structure(self, *, nest_subpipelines: bool = False, canonical: bool = False) -> typing.Dict: + if canonical: + nest_subpipelines = False + + pipeline_description = self._to_json_structure(nest_subpipelines=nest_subpipelines) + + if canonical: + pipeline_description = self._canonical_pipeline_description(pipeline_description) + + PIPELINE_SCHEMA_VALIDATOR.validate(pipeline_description) + + return pipeline_description + + def to_json(self, file: typing.IO[typing.Any] = None, *, nest_subpipelines: bool = False, canonical: bool = False, **kwargs: typing.Any) -> typing.Optional[str]: + obj = self.to_json_structure(nest_subpipelines=nest_subpipelines, canonical=canonical) + + if 'allow_nan' not in kwargs: + kwargs['allow_nan'] = False + + if file is None: + return json.dumps(obj, **kwargs) + else: + json.dump(obj, file, **kwargs) + return None + + def to_yaml(self, file: typing.IO[typing.Any] = None, *, nest_subpipelines: bool = False, canonical: bool = False, **kwargs: typing.Any) -> typing.Optional[str]: + obj = self.to_json_structure(nest_subpipelines=nest_subpipelines, canonical=canonical) + + return utils.yaml_dump(obj, stream=file, **kwargs) + + def equals(self, pipeline: P, *, strict_order: bool = False, only_control_hyperparams: bool = False) -> bool: + """ + Check if the two pipelines are equal in the sense of isomorphism. + + Parameters + ---------- + pipeline: + A pipeline instance. + strict_order: + If true, we will treat inputs of `Set` hyperparameters as a list, and the order of primitives are determined by their step indices. + Otherwise we will try to sort contents of `Set` hyperparameters so the orders of their contents are not important, + and we will try topological sorting to determine the order of nodes. + only_control_hyperparams: + If true, equality checks will not happen for any hyperparameters that are not of the ``ControlParameter`` semantic type, i.e. + there will be no checks for hyperparameters that are specific to the hyperparameter optimization phase, and not part of the + logic of the pipeline. + + Notes + ----- + This algorithm checks if the two pipelines are equal in the sense of isomorphism by solving a graph isomorphism + problem. The general graph isomorphism problem is known to be neither P nor NP-complete. However, + our pipelines are DAGs so we could have an algorithm to check its isomorphism in polynomial time. + + The complexity of this algorithm is around :math:`O((V + E)logV)`, where :math:`V` is the number of steps in the + pipeline and :math:`E` is the number of output references. It tries to assign unique orders to all nodes layer + by layer greedily followed by a topological sort using DFS. Then we can get a unique, hashable & comparable + tuple representing the structure of the pipeline. It is also a unique representation of the equivalence class of + a pipeline in the sense of isomorphism. + """ + + # TODO: We could cache the representation once the pipeline is freezed. + return \ + PipelineHasher(self, strict_order, only_control_hyperparams).unique_equivalence_class_repr() == \ + PipelineHasher(pipeline, strict_order, only_control_hyperparams).unique_equivalence_class_repr() + + def hash(self, *, strict_order: bool = False, only_control_hyperparams: bool = False) -> int: + """ + Get the hash value of a pipeline. It simply hashes the unique representation of the equivalence class of + a pipeline in the sense of isomorphism. + + strict_order: + If true, we will treat inputs of `Set` hyperparameters as a list, and the order of primitives are determined by their step indices. + Otherwise we will try to sort contents of `Set` hyperparameters so the orders of their contents are not important, + and we will try topological sorting to determine the order of nodes. + only_control_hyperparams: + If true, equality checks will not happen for any hyperparameters that are not of the ``ControlParameter`` semantic type, i.e. + there will be no checks for hyperparameters that are specific to the hyperparameter optimization phase, and not part of the + logic of the pipeline. + """ + + # TODO: We could cache the hash once the pipeline is freezed. + return hash(PipelineHasher(self, strict_order, only_control_hyperparams)) + + def get_digest(self) -> str: + return self._to_json_structure(nest_subpipelines=False)['digest'] + + +# There are several forms of input indices. +# 1. Named arguments. They are typically strings or tuple-wrapped strings. +# 2. Pipeline outputs. They are integers. +# 3. Value-type & container-type hyperparameters. They are strings. +# 4. Data-type hyperparameters. They are tuples like (name, type) or (name, type, index). +# 5. Primitive-type hyperparameters. They are strings or tuples like (name, index). +InputIndex = typing.Union[int, str, typing.Tuple[str], typing.Tuple[str, str], typing.Tuple[str, int], typing.Tuple[str, str, int]] +OutputIndex = int +Edge = typing.NamedTuple('Edge', [('input_index', InputIndex), ('output_index', OutputIndex)]) +PD = typing.TypeVar('PD', bound='PipelineDAG') + + +class OrderedNode(metaclass=utils.AbstractMetaclass): + """This class represents a node in a DAG. + + Parameters + ---------- + name: + The name of this node. + topological_order: + The topological order of this node in the DAG. + inputs_ref: + The inputs containing unresolved reference strings or list of indices. + + Attributes + ---------- + name: + The name of this node. + topological_order: + The topological order of a node in a DAG. + global_order: + The global order of a node in a DAG. + inputs: + The inputs of the node. They serve as the edges in a DAG. + children: + The descendants of this node. + """ + + name: str + topological_order: int + global_order: int + inputs: typing.Dict + children: typing.Dict + + def __init__(self, name: str, topological_order: int = 0, inputs_ref: typing.Optional[typing.Union[typing.Dict[InputIndex, str], typing.List[str]]] = None) -> None: + self.name = name + self.topological_order: int = topological_order + + if inputs_ref is None: + inputs_ref = collections.OrderedDict() + elif isinstance(inputs_ref, list): + inputs_ref = collections.OrderedDict(enumerate(inputs_ref)) + self._inputs_ref = inputs_ref + + self.global_order: typing.Optional[int] = None + self.inputs: typing.Dict[InputIndex, typing.Tuple['OrderedNode', int]] = collections.OrderedDict() + self.children: typing.DefaultDict['OrderedNode', typing.Set[InputIndex]] = collections.defaultdict(set) + self._frozen = False + self._unique_equivalence_class_repr: typing.Optional[typing.Tuple] = None + + @property + def inputs_count(self) -> int: + """ + Returns the count of inputs. + """ + return len(self._inputs_ref) + + def outputs(self) -> typing.DefaultDict[OutputIndex, typing.Set[typing.Tuple['OrderedNode', InputIndex]]]: + reverse_dict: typing.DefaultDict[OutputIndex, typing.Set[typing.Tuple[OrderedNode, InputIndex]]] = collections.defaultdict(set) + for node, input_indices in self.children.items(): + for input_index in input_indices: + output_index = node.inputs[input_index][1] + reverse_dict[output_index].add((node, input_index)) + return reverse_dict + + @property + def frozen(self) -> bool: + """ + If a node is frozen, its representation can be cached. + + Returns + ------- + The frozen state of the node. + """ + + return self._frozen + + @frozen.setter + def frozen(self, value: bool) -> None: + assert isinstance(value, bool) + self._frozen = value + if not value: + # Force cleanup. + self._unique_equivalence_class_repr = None + + def add_child(self, node: 'OrderedNode', edge: Edge) -> None: + """ + Add a child node. + + Parameters + ---------- + node: + The child node. + edge: + The edge connects parent node and child node. + """ + + self.children[node].add(edge.input_index) + node.inputs[edge.input_index] = (self, edge.output_index) + + def remove_child(self, child: 'OrderedNode', input_index: typing.Optional[InputIndex]) -> None: + """ + Remove a child node. + + Parameters + ---------- + child: + The child node. + input_index: + The related input index of the child node. If it is None, all edges between the child ndoe and the parent node will be removed. + """ + + if input_index is None: + for input_index in self.children[child]: + del child.inputs[input_index] + del self.children[child] + else: + edges = self.children[child] + edges.remove(input_index) + del child.inputs[input_index] + if not edges: + del self.children[child] + + def change_input(self, input_index: InputIndex, new_parent: 'OrderedNode', new_input_index: typing.Optional[InputIndex] = None, new_output_index: typing.Optional[OutputIndex] = None) -> None: + """ + Change the input of the node. + + Parameters + ---------- + input_index: + The input index we want to change. + new_parent: + The new parent of the node. + new_input_index: + The new input index. If it is None, the original index will be kept. + new_output_index: + The new output index. If it is None, the original index will be kept. + """ + + parent, output_index = self.inputs[input_index] + parent.remove_child(self, input_index) + if new_output_index is None: + new_output_index = output_index + if new_input_index is None: + new_input_index = input_index + else: + del self.inputs[input_index] + new_parent.add_child(self, Edge(input_index=new_input_index, output_index=new_output_index)) + + def join(self, node_with_inputs: 'OrderedNode') -> None: + """ + Join by the edges of the nodes. + + Two nodes can be joined only if the output indices of node A (`self` here) match the input indices of node B (`node_with_inputs` here). + The join operation needs two nodes: A and B. Suppose A's children are {A+} and B's parents are {B-}. + + It removes all edges between B and {B-} & between A and {A+}, then creating new edges to connect {B-} and {A+}. + + Parameters + ---------- + node_with_inputs: + The node which provides inputs. + + Notes + ----- + The function is named ``join`` because it is similar to "join" of SQL since they both concatenate items by their common indices. + """ + + outputs = self.outputs() + # Set & dict size will be changed during iteration. Use a list to fix them. + for input_index, (parent, parent_output_index) in list(node_with_inputs.inputs.items()): + assert isinstance(input_index, int) + for child, child_input in outputs[input_index]: + child.change_input(child_input, parent, new_output_index=parent_output_index) + parent.remove_child(node_with_inputs, input_index) + + @abc.abstractmethod + def reference_name(self) -> int: + """ + The name to reference itself. + """ + + @abc.abstractmethod + def output_reference_names(self) -> typing.List[str]: + """ + The names for other nodes to refer its outputs. + """ + + def resolve_input_references(self, nodes_outputs_reverse_dict: typing.Dict[str, typing.Tuple['OrderedNode', OutputIndex]]) -> None: + """ + Resolve input references with a lookup dict. + """ + + for input_index, ref in self._inputs_ref.items(): + parent, output_index = nodes_outputs_reverse_dict[ref] + parent.add_child(self, Edge(input_index=input_index, output_index=output_index)) + + def _unique_ordered_inputs(self) -> typing.Tuple: + input_orders = [(name, parent.global_order, output_index) for name, (parent, output_index) in self.inputs.items()] + input_orders.sort() + return tuple(input_orders) + + def unique_equivalence_class_repr(self) -> typing.Tuple: + """ + Get the unique representation of the equivalence class of the node in the sense of isomorphism. + """ + + if not self.frozen or self._unique_equivalence_class_repr is None: + repr_tuple = (self.name, self._unique_ordered_inputs(), self.topological_order) + if self.frozen: + self._unique_equivalence_class_repr = repr_tuple + else: + self._unique_equivalence_class_repr = None + return repr_tuple + + return self._unique_equivalence_class_repr + + +class InputsNode(OrderedNode): + """This class represents the inputs of a pipeline. This node is unique in a pipeline. + + Parameters + ---------- + pipeline_inputs: + Inputs of the pipeline. It is a list contains description dicts of inputs. Their order matters. + They will not be resolved as data reference strings, so we use `pipeline_inputs` as its name instead of `inputs_ref` which will be resolved. + """ + def __init__(self, pipeline_inputs: typing.List[typing.Dict]) -> None: + super().__init__('Inputs') + + self.pipeline_inputs = copy.deepcopy(pipeline_inputs) + self.global_order = 0 + + @property + def inputs_count(self) -> int: + """ + Return the count of inputs. + """ + return len(self.pipeline_inputs) + + def reference_name(self) -> int: + """ + We specify that the input node has index -1. + """ + + return -1 + + def output_reference_names(self) -> typing.List[str]: + """ + The names for other nodes to refer its outputs. + """ + + return ['inputs.{i}'.format(i=i) for i in range(self.inputs_count)] + + def unique_equivalence_class_repr(self) -> typing.Tuple: + """ + Get the unique representation of the equivalence class of the node in the sense of isomorphism. + """ + + return self.name, self.inputs_count + + +class OutputsNode(OrderedNode): + """This class represents the outputs of a pipeline. This node is unique in a pipeline. + + Parameters + ---------- + pipeline_outputs: + Outputs of a pipeline. It is a list contains description dicts of outputs. Their order matters. + """ + def __init__(self, pipeline_outputs: typing.List[typing.Dict]) -> None: + super().__init__('Outputs', inputs_ref=[v['data'] for v in pipeline_outputs]) + + self.outputs_count = len(pipeline_outputs) + + def reference_name(self) -> int: + """ + We specify that the output node has index -2. + """ + + return -2 + + def output_reference_names(self) -> typing.List[str]: + """ + The names for other nodes to refer its outputs. + """ + + return [] + + +class PrimitiveNode(OrderedNode): + """ + This class represents a primitive step in a DAG. + + Attributes + ---------- + index: + The index of this step in the pipeline. + primitive_step: + The PrimitiveStep instance. + _steps_ref: + Raw inputs info contains step reference indices. + steps: + Steps used by this node as parameters or hyperparameters. + values: + Inputs contains simple value. + strict_order: + If true, we will treat inputs of `Set` hyperparameters as a list. + Otherwise we will try to sort their contents so the orders of their contents are not important. + only_control_hyperparams: + If true, hyperparameters that are not of the `ControlParameter` semantic type. will not be included + in the node's representation. + """ + + index: int + primitive_step: PrimitiveStep + _steps_ref: typing.Dict + steps: typing.Dict + values: typing.Dict + strict_order: bool + only_control_hyperparams: bool + + def __init__(self, primitive: PrimitiveStep, *, strict_order: bool, only_control_hyperparams: bool) -> None: + # We wraps argument names with a tuple to unify sorting. + super().__init__(primitive.get_primitive_id(), inputs_ref={(k,): v['data'] for k, v in primitive.arguments.items()}) + + self.index: int = primitive.index + self.primitive_step = primitive + self.strict_order = strict_order + self.only_control_hyperparams = only_control_hyperparams + + self._outputs: typing.List[str] = primitive.outputs.copy() + self._steps_ref: typing.Dict[InputIndex, int] = collections.OrderedDict() + self.steps: typing.Dict[InputIndex, OrderedNode] = collections.OrderedDict() + self.values: typing.Dict[str, typing.Any] = collections.OrderedDict() + + if self.primitive_step.primitive is not None: + hyperparameters = self.primitive_step.get_primitive_hyperparams().configuration + else: + hyperparameters = None + + # Resolve hyper-parameters. For sequential hyperparameters, we consider their order matters. + for name, hyperparameter_description in primitive.hyperparams.items(): + if only_control_hyperparams and hyperparameters is not None and CONTROL_HYPERPARAMETER_SEMANTIC_TYPE not in hyperparameters[name].semantic_types: + continue + is_set = isinstance(hyperparameters[name], hyperparams_module.Set) if hyperparameters is not None else False + if hyperparameter_description['type'] == metadata_base.ArgumentType.DATA: + if utils.is_sequence(hyperparameter_description['data']): + data_references: typing.List[str] = typing.cast(typing.List[str], hyperparameter_description['data']) + if is_set and not strict_order: + data_references = sorted(data_references) + for i, data_reference in enumerate(data_references): + self._inputs_ref[name, metadata_base.ArgumentType.DATA.name, i] = data_reference + else: + self._inputs_ref[name, metadata_base.ArgumentType.DATA.name] = hyperparameter_description['data'] + elif hyperparameter_description['type'] == metadata_base.ArgumentType.PRIMITIVE: + if utils.is_sequence(hyperparameter_description['data']): + primitive_references: typing.List[int] = typing.cast(typing.List[int], hyperparameter_description['data']) + if is_set and not strict_order: + primitive_references = sorted(primitive_references) + for i, primitive_reference in enumerate(primitive_references): + self._steps_ref[name, i] = primitive_reference + else: + self._steps_ref[name] = hyperparameter_description['data'] + elif hyperparameter_description['type'] == metadata_base.ArgumentType.CONTAINER: + self._inputs_ref[name, metadata_base.ArgumentType.CONTAINER.name] = hyperparameter_description['data'] + elif hyperparameter_description['type'] == metadata_base.ArgumentType.VALUE: + data = hyperparameter_description['data'] + if is_set and not strict_order: + assert isinstance(data, list) + # encode the value + simple_data = self._serialize_hyperparamter_value(name, data, True) + assert utils.is_sequence(simple_data) + data = [x for _, x in sorted(zip(simple_data, data), key=lambda pair: pair[0])] + self.values[name] = data + else: + raise exceptions.UnexpectedValueError("Unknown hyper-parameter type: {hyperparameter_type}".format(hyperparameter_type=hyperparameter_description['type'])) + + def reference_name(self) -> int: + return self.index + + def output_reference_names(self) -> typing.List[str]: + """ + The names for other nodes to refer its outputs. + """ + + return ['steps.{i}.{output_id}'.format(i=self.index, output_id=output_id) for output_id in self._outputs] + + def resolve_step_references(self, nodes_reverse_dict: typing.Dict[int, OrderedNode]) -> None: + """ + Resolve step references with a lookup dict. + """ + + for input_index, ref in self._steps_ref.items(): + self.steps[input_index] = nodes_reverse_dict[ref] + + def _serialize_hyperparamter_value(self, name: str, data: typing.Any, is_sequence: bool) -> typing.Any: + if self.primitive_step.primitive is not None: + configuration = self.primitive_step.get_primitive_hyperparams().configuration + if name not in configuration: + raise exceptions.InvalidArgumentValueError( + "Unknown hyper-parameter name '{name}' for primitive {primitive}.".format( + name=name, + primitive=self.primitive_step.primitive, + ), + ) + hyperparameter = configuration[name] + else: + hyperparameter = hyperparams_module.Hyperparameter[type(data)](data) # type: ignore + + serialized = hyperparameter.value_to_json_structure(data) + + if is_sequence: + return [json.dumps(s, sort_keys=True) for s in serialized] + else: + return json.dumps(serialized, sort_keys=True) + + def _unique_serialized_values(self) -> typing.Tuple: + values = [(name, self._serialize_hyperparamter_value(name, data, False)) for name, data in self.values.items()] + # Sort by value names. + values.sort() + return tuple(values) + + def _unique_step_references(self) -> typing.Tuple: + steps_orders = [(name, node.global_order) for name, node in self.steps.items()] + steps_orders.sort() + return tuple(steps_orders) + + def unique_equivalence_class_repr(self) -> typing.Tuple: + """ + Get the unique representation of the equivalence class of the node in the sense of isomorphism. + """ + + if not self.frozen or self._unique_equivalence_class_repr is None: + repr_tuple = (self.name, self._unique_ordered_inputs(), self._unique_step_references(), self._unique_serialized_values(), self.topological_order) + if self.frozen: + self._unique_equivalence_class_repr = repr_tuple + else: + self._unique_equivalence_class_repr = None + return repr_tuple + + return self._unique_equivalence_class_repr + + +class PlaceholderNode(OrderedNode): + """ + This class represents a placeholder step in a DAG. + + Attributes + ---------- + index: + The index of this step in the pipeline. + """ + + index: int + + def __init__(self, placeholder: PlaceholderStep) -> None: + super().__init__(PlaceholderStep.__name__, inputs_ref=placeholder.inputs.copy()) + self.index: int = placeholder.index + self._outputs: typing.List[str] = placeholder.outputs.copy() + + def reference_name(self) -> int: + return self.index + + def output_reference_names(self) -> typing.List[str]: + """ + The names for other nodes to refer its outputs. + """ + + return ['steps.{i}.{output_id}'.format(i=self.index, output_id=output_id) for output_id in self._outputs] + + +class SubpipelineNode(OrderedNode): + """ + This class represents a subpipeline step in a DAG. + + If this sub-pipeline has been resolved, then its graph is expected to be merged into its parent graph; + otherwise `unique_equivalence_class_repr()` is called to get a unique representation according to its ID. + + Parameters + ---------- + subpipeline: + A subpipeline instance. + + Attributes + ---------- + index: + The index of this step in the pipeline. + pipeline_id: + The pipeline ID of subpipeline. + pipeline: + The sub-pipeline instance. If the sub-pipeline hasn't been resolved, it should be `None`. + strict_order: + If true, we will treat inputs of `Set` hyperparameters as a list. + Otherwise we will try to sort their contents so the orders of their contents are not important. + only_control_hyperparams: + If true, hyperparameters that are not of the ``ControlParameter`` semantic type will not be included + in the graph representation of this subpipeline's primitive steps. + """ + + index: int + pipeline_id: str + pipeline: typing.Optional[Pipeline] + strict_order: bool + only_control_hyperparams: bool + + def __init__(self, subpipeline: SubpipelineStep, *, strict_order: bool, only_control_hyperparams: bool) -> None: + super().__init__(SubpipelineStep.__name__, inputs_ref=subpipeline.inputs.copy()) + self.strict_order = strict_order + self.only_control_hyperparams = only_control_hyperparams + self.index: int = subpipeline.index + + assert subpipeline.outputs is not None + + self._outputs: typing.List[str] = subpipeline.outputs.copy() + self.pipeline_id: str = subpipeline.get_pipeline_id() + self.pipeline: typing.Optional[Pipeline] = subpipeline.pipeline + + def graph(self) -> typing.Optional['PipelineDAG']: + """ + Get the graph of the pipeline inside. + + Returns + ------- + If this node has been resolved, return the graph; return None otherwise. + """ + + if self.pipeline is not None: + return PipelineDAG(self.pipeline, strict_order=self.strict_order, only_control_hyperparams=self.only_control_hyperparams) + return None + + def reference_name(self) -> int: + return self.index + + def output_reference_names(self) -> typing.List[str]: + """ + The names for other nodes to refer its outputs. + """ + + # Do not export null output_id. + return ['steps.{i}.{output_id}'.format(i=self.index, output_id=output_id) for output_id in self._outputs if output_id is not None] + + def unique_equivalence_class_repr(self) -> typing.Tuple: + """ + Get the unique representation of the equivalence class of the node in the sense of isomorphism. + + This is only used when the sub-pipeline hasn't been resolved. Otherwise, its graph should be used. + """ + return super().unique_equivalence_class_repr() + (self.pipeline_id,) + + +class PipelineDAG: + """ + Directed acyclic graph builder for a pipeline. + + It has an input node as the head of the DAG and an output node as the tail. + + Attributes + ---------- + pipeline: + The associated pipeline instance. + step_nodes: + These nodes belong to the steps of the pipeline, ordered by their index (including the extra inputs node & outputs node). + It will be changed if we try to expand this graph. + nodes: + A set of **all** nodes in the graph. + It will be changed if we try to expand this graph. + strict_order: + If true, we will treat inputs of `Set` hyperparameters as a list. + Otherwise we will try to sort their contents so the orders of their contents are not important. + only_control_hyperparams: + If true, hyperparameters that are not of the ``ControlParameter`` semantic type will not be included + in the graph representation of this pipeline's primitive steps. + """ + + pipeline: Pipeline + step_nodes: typing.List[OrderedNode] + nodes: typing.Set[OrderedNode] + strict_order: bool + only_control_hyperparams: bool + + def __init__(self, pipeline: Pipeline, *, strict_order: bool, only_control_hyperparams: bool) -> None: + self.pipeline = pipeline + self.strict_order = strict_order + self.only_control_hyperparams = only_control_hyperparams + + self.step_nodes: typing.List[OrderedNode] = [] + self._nodes_reverse_dict: typing.Dict[int, OrderedNode] = {} + self._nodes_outputs_reverse_dict: typing.Dict[str, typing.Tuple[OrderedNode, OutputIndex]] = {} + + self.inputs_node = InputsNode(pipeline.inputs) + self.outputs_node = OutputsNode(pipeline.outputs) + + self.step_nodes.append(self.inputs_node) + self.step_nodes.extend(self._convert_step_to_node(step) for step in pipeline.steps) + self.step_nodes.append(self.outputs_node) + + self.nodes: typing.Set[OrderedNode] = set(self.step_nodes) + + # Build reversed mappings. + for node in self.step_nodes: + self._update_references(node) + + # Build the DAG. + for node in self.step_nodes: + self._resolve_references(node) + + def _convert_step_to_node(self, step: StepBase) -> OrderedNode: + node: OrderedNode + if isinstance(step, PrimitiveStep): + node = PrimitiveNode(step, strict_order=self.strict_order, only_control_hyperparams=self.only_control_hyperparams) + elif isinstance(step, PlaceholderStep): + node = PlaceholderNode(step) + elif isinstance(step, SubpipelineStep): + node = SubpipelineNode(step, strict_order=self.strict_order, only_control_hyperparams=self.only_control_hyperparams) + else: + # New type of steps should be added here. + raise NotImplementedError("Step type={t} is not supported.".format(t=type(step))) + return node + + def _update_references(self, node: OrderedNode) -> None: + for output_index, output_id in enumerate(node.output_reference_names()): + self._nodes_outputs_reverse_dict[output_id] = (node, output_index) + self._nodes_reverse_dict[node.reference_name()] = node + + def _resolve_references(self, node: OrderedNode) -> None: + node.resolve_input_references(self._nodes_outputs_reverse_dict) + if isinstance(node, PrimitiveNode): + node.resolve_step_references(self._nodes_reverse_dict) + + def body_nodes(self) -> typing.Set[OrderedNode]: + """ + Return all nodes expect the inputs node and outputs node in the graph. + """ + + return self.nodes - {self.inputs_node, self.outputs_node} + + def expand_node(self, node: OrderedNode, graph: PD) -> None: + """ + Replace a node with a graph. + """ + + assert node in self.nodes + + # Update node records. + loc = self.step_nodes.index(node) + self.step_nodes = self.step_nodes[:loc] + graph.step_nodes[1:-1] + self.step_nodes[loc + 1:] + self.nodes.remove(node) + self.nodes.update(graph.body_nodes()) + + # Join nodes. + graph.inputs_node.join(node) + node.join(graph.outputs_node) + + def expand_subpipelines(self, recursive: bool = True) -> None: + """ + Extract all nodes inside a subpipeline's graph and integrate them into this graph. + + Parameters + ---------- + recursive: + If true, we will expand subpipelines of all depth (that is, subpipelines of subpipelines). + """ + + # Pick up subpipeline nodes into a list because expanding nodes will change the graph. + subpipelines: typing.List[SubpipelineNode] = [node for node in self.nodes if isinstance(node, SubpipelineNode)] + for subpipeline_node in subpipelines: + subgraph: typing.Optional[PipelineDAG] = subpipeline_node.graph() + if subgraph is not None: + if recursive: + subgraph.expand_subpipelines(recursive=recursive) + self.expand_node(subpipeline_node, subgraph) + + +class PipelineHasher: + """ + Hash helper for pipelines. + + This algorithm checks if the two pipelines are equal in the sense of isomorphism by solving a graph isomorphism + problem. The general graph isomorphism problem is known to be neither P nor NP-complete. However, + our pipelines are DAGs so we could have an algorithm to check its isomorphism in polynomial time. + + The complexity of this algorithm is around :math:`O((V + E)logV)`, where :math:`V` is the number of steps in the + pipeline and :math:`E` is the number of output references. + + The algorithm follows these steps: + + 1. Construct a DAG from the given pipeline. A directed edge is pointed from A to B if A depends on B directly. + 2. Perform topological sort on the DAG using DFS. Nodes with same topological order are put into the same layer. + 3. Using a greedy algorithm to get 'global' orders of nodes. + It sorts the nodes in the same layer by making use of the global order of nodes they depend on. + 4. Get a unique, hashable & comparable tuple representing the structure of the pipeline according to the global order of nodes. + It also provides a unique representation of the equivalence class of a pipeline in the sense of isomorphism. + + And about supporting new steps, one should extend PipelineDAG._convert_step_to_node`. + + Attributes + ---------- + pipeline: + The associated pipeline instance. + graph: + The graph representation of the pipeline. + strict_order: + If true, we will treat inputs of `Set` hyperparameters as a list, and the order of primitives are determined by their step indices. + Otherwise we will try to sort contents of `Set` hyperparameters so the orders of their contents are not important, + and we will try topological sorting to determine the order of nodes. + """ + + pipeline: Pipeline + graph: PipelineDAG + strict_order: bool + + def __init__(self, pipeline: Pipeline, strict_order: bool = False, only_control_hyperparams: bool = False) -> None: + self.pipeline = pipeline + self.strict_order = strict_order + self.graph = PipelineDAG(pipeline, strict_order=strict_order, only_control_hyperparams=only_control_hyperparams) + self.graph.expand_subpipelines(recursive=True) + + self._hash: typing.Optional[int] = None + self._representation: typing.Optional[typing.Tuple] = None + self._layers: typing.List[typing.List[OrderedNode]] = [[self.graph.inputs_node]] + + self._unordered_nodes: typing.Set[OrderedNode] = set() + + def _dfs_topological_ordering(self, node: OrderedNode) -> OrderedNode: + for parent, output_index in node.inputs.values(): + if parent in self._unordered_nodes: + self._dfs_topological_ordering(parent) + node.topological_order = max(node.topological_order, parent.topological_order + 1) + + self._unordered_nodes.remove(node) + + # Classify it into layers. + while len(self._layers) < node.topological_order + 1: + self._layers.append([]) + self._layers[node.topological_order].append(node) + + return node + + def _global_ordering(self) -> None: + global_order = -1 + for layer in self._layers: + for node in layer: + node.frozen = True # Enable cache so we can be much faster in comparison. + layer.sort(key=lambda x: x.unique_equivalence_class_repr()) + last = None + for j, node in enumerate(layer): + # Keep symmetric. Nodes with same local_order should have same global_order. + if node.unique_equivalence_class_repr() != last: + global_order += 1 + last = node.unique_equivalence_class_repr() + node.global_order = global_order + + def unique_equivalence_class_repr(self) -> typing.Tuple: + """ + Get the unique representation of the equivalence class of the pipeline in the sense of isomorphism. + """ + + if self._representation is None: + if self.strict_order: + for i, node in enumerate(self.graph.step_nodes): + node.topological_order = i + node.global_order = i + self._representation = tuple(node.unique_equivalence_class_repr() for node in self.graph.step_nodes) + else: + self._unordered_nodes = self.graph.nodes.copy() + self._unordered_nodes.remove(self.graph.inputs_node) + # Perform topological sort. + while self._unordered_nodes: + node = next(iter(self._unordered_nodes)) # Retrieve an item without deleting it. + self._dfs_topological_ordering(node) + + self._global_ordering() + self._representation = tuple(node.unique_equivalence_class_repr() for layer in self._layers for node in layer) + + return self._representation + + def __hash__(self) -> int: + if self._hash is None: + self._hash = hash(self.unique_equivalence_class_repr()) + return self._hash + + +def get_pipeline( + pipeline_path: str, *, strict_resolving: bool = False, strict_digest: bool = False, + pipeline_search_paths: typing.Sequence[str] = None, respect_environment_variable: bool = True, load_all_primitives: bool = True, + resolver_class: typing.Type[Resolver] = Resolver, pipeline_class: typing.Type[Pipeline] = Pipeline, +) -> Pipeline: + resolver = resolver_class( + strict_resolving=strict_resolving, strict_digest=strict_digest, pipeline_search_paths=pipeline_search_paths, + respect_environment_variable=respect_environment_variable, load_all_primitives=load_all_primitives, + ) + + if os.path.exists(pipeline_path): + with utils.open(pipeline_path, 'r', encoding='utf8') as pipeline_file: + if pipeline_path.endswith('.yml') or pipeline_path.endswith('.yaml'): + return pipeline_class.from_yaml(pipeline_file, resolver=resolver, strict_digest=strict_digest) + elif pipeline_path.endswith('.json'): + return pipeline_class.from_json(pipeline_file, resolver=resolver, strict_digest=strict_digest) + else: + raise ValueError("Unknown file extension.") + else: + return resolver.get_pipeline({'id': pipeline_path}) + + +def describe_handler( + arguments: argparse.Namespace, *, resolver_class: typing.Type[Resolver] = None, + no_resolver_class: typing.Type[Resolver] = None, pipeline_class: typing.Type[Pipeline] = None, +) -> None: + if resolver_class is None: + resolver_class = Resolver + if no_resolver_class is None: + no_resolver_class = NoResolver + if pipeline_class is None: + pipeline_class = Pipeline + + if getattr(arguments, 'no_resolving', False): + resolver: Resolver = no_resolver_class() + else: + resolver = resolver_class( + strict_resolving=getattr(arguments, 'strict_resolving', False), + strict_digest=getattr(arguments, 'strict_digest', False), + pipeline_search_paths=getattr(arguments, 'pipeline_search_paths', []), + ) + + output_stream = getattr(arguments, 'output', sys.stdout) + + has_errored = False + + for pipeline_path in arguments.pipelines: + if getattr(arguments, 'list', False): + print(pipeline_path, file=output_stream) + + try: + with utils.open(pipeline_path, 'r', encoding='utf8') as pipeline_file: + if pipeline_path.endswith('.yml') or pipeline_path.endswith('.yaml') or pipeline_path.endswith('.yml.gz') or pipeline_path.endswith('.yaml.gz'): + pipeline = pipeline_class.from_yaml( + pipeline_file, + resolver=resolver, + strict_digest=getattr(arguments, 'strict_digest', False), + ) + elif pipeline_path.endswith('.json') or pipeline_path.endswith('.json.gz'): + pipeline = pipeline_class.from_json( + pipeline_file, + resolver=resolver, + strict_digest=getattr(arguments, 'strict_digest', False), + ) + else: + raise ValueError("Unknown file extension.") + except Exception as error: + if getattr(arguments, 'continue', False): + traceback.print_exc(file=output_stream) + print(f"Error parsing pipeline: {pipeline_path}", file=output_stream) + has_errored = True + continue + else: + raise Exception(f"Error parsing pipeline: {pipeline_path}") from error + + if getattr(arguments, 'check', True): + try: + pipeline.check( + allow_placeholders=getattr(arguments, 'allow_placeholders', False), + standard_pipeline=getattr(arguments, 'standard_pipeline', True), + ) + except Exception as error: + if getattr(arguments, 'continue', False): + traceback.print_exc(file=output_stream) + print(f"Error checking pipeline: {pipeline_path}", file=output_stream) + has_errored = True + continue + else: + raise Exception("Error checking pipeline: {pipeline_path}".format(pipeline_path=pipeline_path)) from error + + try: + if getattr(arguments, 'set_source_name', None) is not None: + if pipeline.source is None: + pipeline.source = {} + if arguments.set_source_name: + pipeline.source['name'] = arguments.set_source_name + elif 'name' in pipeline.source: + del pipeline.source['name'] + if not pipeline.source: + pipeline.source = None + + pipeline_description = pipeline.to_json_structure(canonical=True) + + if getattr(arguments, 'print', False): + pprint.pprint(pipeline_description, stream=output_stream) + else: + json.dump( + pipeline_description, + output_stream, + indent=(getattr(arguments, 'indent', 2) or None), + sort_keys=getattr(arguments, 'sort_keys', False), + allow_nan=False, + ) # type: ignore + output_stream.write('\n') + except Exception as error: + if getattr(arguments, 'continue', False): + traceback.print_exc(file=output_stream) + print(f"Error describing pipeline: {pipeline_path}", file=output_stream) + has_errored = True + continue + else: + raise Exception(f"Error describing pipeline: {pipeline_path}") from error + + if has_errored: + sys.exit(1) + + +if pyarrow_lib is not None: + pyarrow_lib._default_serialization_context.register_type( + Pipeline, 'd3m.pipeline', pickle=True, + ) + + +def main(argv: typing.Sequence) -> None: + raise exceptions.NotSupportedError("This CLI has been removed. Use \"python3 -m d3m pipeline describe\" instead.") + + +if __name__ == '__main__': + main(sys.argv) diff --git a/d3m/d3m/metadata/pipeline_run.py b/d3m/d3m/metadata/pipeline_run.py new file mode 100644 index 0000000..3262c29 --- /dev/null +++ b/d3m/d3m/metadata/pipeline_run.py @@ -0,0 +1,1683 @@ +import argparse +import collections +import copy +import datetime +import enum +import json +import logging +import os.path +import re +import sys +import traceback +import typing +import uuid +import yaml + +import dateparser # type: ignore +import git # type: ignore +import GPUtil # type: ignore + +import d3m +from d3m import container, environment_variables, exceptions, utils, types +from d3m.metadata import base as metadata_base, hyperparams as hyperparams_module, pipeline as pipeline_module, problem +from d3m.primitive_interfaces import base + +# See: https://gitlab.com/datadrivendiscovery/d3m/issues/66 +try: + from pyarrow import lib as pyarrow_lib # type: ignore +except ModuleNotFoundError: + pyarrow_lib = None + +__all__ = ('PipelineRun', 'User', 'RuntimeEnvironment') + +logger = logging.getLogger(__name__) + +DOCKER_MAC_ADDRESS_MASK = 0x0242ac110000 +PROC_INFO_RE = re.compile(r'^([^:]+?)\s*:\s*(.*)$') +PROC_MEMORY_PATH = '/proc/meminfo' +PROC_CPU_PATH = '/proc/cpuinfo' +PROC_CPU_MODEL_NAME_KEY = 'model name' +PROC_CPU_PHYSICAL_ID_KEY = 'physical id' +PROC_CPU_CORES_KEY = 'cpu cores' +PROC_TOTAL_MEMORY_KEY = 'MemTotal' +CGROUP_MEMORY_LIMIT_PATH = '/sys/fs/cgroup/memory/memory.limit_in_bytes' +CGROUP_CPU_SHARES_PATH = '/sys/fs/cgroup/cpu/cpu.shares' +CGROUP_CPU_CFS_PERIOD_US_PATH = '/sys/fs/cgroup/cpu/cpu.cfs_period_us' +CGROUP_CPU_CFS_QUOTA_US_PATH = '/sys/fs/cgroup/cpu/cpu.cfs_quota_us' + +WORKER_ID_NAMESPACE = uuid.UUID('2e4b9ab7-2207-4975-892b-0e01bf95babf') + +# Comma because we unpack the list of validators returned from "load_schema_validators". +PIPELINE_RUN_SCHEMA_VALIDATOR, = utils.load_schema_validators(metadata_base.SCHEMAS, ('pipeline_run.json',)) + +PIPELINE_RUN_SCHEMA_VERSION = 'https://metadata.datadrivendiscovery.org/schemas/v0/pipeline_run.json' + + +class User(dict): + def __init__(self, id_: str, chosen: bool = False, rationale: str = None) -> None: + super().__init__() + + self['id'] = id_ + self['chosen'] = chosen + + if rationale is not None: + self['rationale'] = rationale + + @classmethod + def _yaml_representer(cls, dumper: yaml.Dumper, data: typing.Any) -> typing.Any: + return dumper.represent_dict(data) + + +utils.yaml_add_representer(User, User._yaml_representer) + + +class PipelineRunStep: + def __init__( + self, step_type: metadata_base.PipelineStepType, start: str, environment: typing.Dict[str, typing.Any] = None + ) -> None: + self.type = step_type + self.status: typing.Dict[str, typing.Any] = {} + self.start: str = start + self.end: str = None + self.environment = environment + + def to_json_structure(self) -> typing.Dict: + if self.start is None: + raise exceptions.InvalidStateError("Start timestamp not set.") + + if self.end is None: + raise exceptions.InvalidStateError("End timestamp not set.") + + if 'state' not in self.status: + raise exceptions.InvalidStateError("Status not set.") + + json_structure = { + 'type': self.type.name, + 'status': self.status, + 'start': self.start, + 'end': self.end + } + + if self.environment is not None: + json_structure['environment'] = self.environment + + return json_structure + + def set_successful(self, message: str = None) -> None: + self.status['state'] = metadata_base.PipelineRunStatusState.SUCCESS.name + if message is not None and message: + self.status['message'] = message + + def set_failed(self, message: str = None) -> None: + self.status['state'] = metadata_base.PipelineRunStatusState.FAILURE.name + if message is not None and message: + self.status['message'] = message + + def set_end_timestamp(self) -> None: + self.end = utils.datetime_for_json(datetime.datetime.now(datetime.timezone.utc)) + + +class PipelineRunPrimitiveStep(PipelineRunStep): + def __init__( + self, step: pipeline_module.PrimitiveStep, start: str, environment: typing.Dict[str, typing.Any] = None, + ) -> None: + super().__init__( + step_type=metadata_base.PipelineStepType.PRIMITIVE, + start=start, + environment=environment + ) + + self.hyperparams: hyperparams_module.Hyperparams = None + self.pipeline_hyperparams: typing.Set[str] = None + self.random_seed: typing.Optional[int] = None + self.method_calls: typing.List[typing.Dict[str, typing.Any]] = [] + self.arguments = step.arguments + + def to_json_structure(self) -> typing.Dict: + json_structure = super().to_json_structure() + + # Validate that the Method calls are finished, and they have status. + for method_call in self.method_calls: + if 'end' not in method_call: + raise exceptions.InvalidStateError("End timestamp not set.") + if 'status' not in method_call: + raise exceptions.InvalidStateError("Status not set.") + + if self.method_calls: + json_structure['method_calls'] = self.method_calls + + if self.random_seed is not None: + json_structure['random_seed'] = self.random_seed + + hyperparams_json_structure = self._hyperparams_to_json_structure() + if hyperparams_json_structure is not None: + json_structure['hyperparams'] = hyperparams_json_structure + + return json_structure + + def _hyperparams_to_json_structure(self) -> typing.Optional[typing.Dict]: + if self.hyperparams is None: + return None + + hyperparams_json = {} + + for hyperparameter_name, value in self.hyperparams.items(): + if hyperparameter_name in self.pipeline_hyperparams: + continue + + hyperparams_json[hyperparameter_name] = { + 'type': metadata_base.ArgumentType.VALUE.name, + 'data': self.hyperparams.configuration[hyperparameter_name].value_to_json_structure(value), + } + + if hyperparams_json: + return hyperparams_json + else: + return None + + def add_method_call( + self, method_name: str, *, runtime_arguments: typing.Dict = None, + environment: typing.Dict[str, typing.Any] = None + ) -> int: + """ + Returns + ------- + The id of the method call. + """ + + if runtime_arguments is None: + runtime_arguments = {} + else: + # We convert everything directly to json structure. + def recurse(item: typing.Any) -> typing.Any: + if isinstance(item, enum.Enum): + return item.name + elif not isinstance(item, typing.Dict): + return item + else: + _json_structure = {} + for key, value in item.items(): + _json_structure[key] = recurse(value) + return _json_structure + + runtime_arguments = recurse(runtime_arguments) + + if method_name == '__init__' and runtime_arguments: + raise exceptions.InvalidArgumentValueError( + f'MethodCall with method `__init__` cannot have arguments. ' + f'Hyper-parameters are the arguments to `__init__`.' + ) + + method_call: typing.Dict[str, typing.Any] = { + 'name': method_name, + } + + if runtime_arguments: + method_call['arguments'] = runtime_arguments + + # we store everything as json structure. + if environment is not None: + method_call['environment'] = environment + + self.method_calls.append(method_call) + return len(self.method_calls) - 1 + + def set_method_call_start_timestamp(self, method_call_id: int) -> None: + self.method_calls[method_call_id]['start'] = utils.datetime_for_json(datetime.datetime.now()) + + def set_method_call_end_timestamp(self, method_call_id: int) -> None: + if 'start' not in self.method_calls[method_call_id]: + raise exceptions.InvalidStateError("Start timestamp not set.") + self.method_calls[method_call_id]['end'] = utils.datetime_for_json(datetime.datetime.now()) + + def set_method_call_result_metadata(self, method_call_id: int, result: typing.Union[base.CallResult, base.MultiCallResult]) -> None: + metadata = None + if isinstance(result, base.CallResult): + if result.value is not None and isinstance(result.value, types.Container): + metadata = { + # TODO: Should we use "to_internal_json_structure" here? + 'value': result.value.metadata.to_json_structure() + } + elif isinstance(result, base.MultiCallResult): + metadata = { + # TODO: Should we use "to_internal_json_structure" here? + produce_method_name: value.metadata.to_json_structure() + for produce_method_name, value in result.values.items() + if value is not None and isinstance(value, types.Container) + } + + # check if metadata is empty + if metadata is not None: + for key, value in metadata.items(): + if value is not None: + self.method_calls[method_call_id]['metadata'] = metadata + break + + def set_method_call_successful(self, method_call_id: int, message: str = None) -> None: + self.method_calls[method_call_id]['status'] = { + 'state': metadata_base.PipelineRunStatusState.SUCCESS.name, + } + if message is not None and message: + self.method_calls[method_call_id]['status']['message'] = message + + def set_method_call_failed(self, method_call_id: int, message: str = None) -> None: + self.method_calls[method_call_id]['status'] = { + 'state': metadata_base.PipelineRunStatusState.FAILURE.name, + } + if message is not None and message: + self.method_calls[method_call_id]['status']['message'] = message + + def get_method_call_logging_callback(self, method_call_id: int) -> typing.Callable: + if 'logging' not in self.method_calls[method_call_id]: + self.method_calls[method_call_id]['logging'] = [] + return self.method_calls[method_call_id]['logging'].append + + +class PipelineRunSubpipelineStep(PipelineRunStep): + def __init__(self, start: str, random_seed: int, environment: typing.Dict[str, typing.Any] = None) -> None: + super().__init__( + step_type=metadata_base.PipelineStepType.SUBPIPELINE, + start=start, + environment=environment, + ) + + self.random_seed = random_seed + self.steps: typing.List[typing.Dict] = [] + + def to_json_structure(self) -> typing.Dict: + json_structure = super().to_json_structure() + json_structure['random_seed'] = self.random_seed + if self.steps: + json_structure['steps'] = self.steps + return json_structure + + def add_step(self, step: typing.Dict) -> None: + self.steps.append(step) + + +class PipelineRun: + STEPS = 'steps' + METHOD_CALLS = 'method_calls' + + def __init__( + self, pipeline: pipeline_module.Pipeline, problem_description: problem.Problem = None, *, + phase: metadata_base.PipelineRunPhase, context: metadata_base.Context, + environment: typing.Dict[str, typing.Any], random_seed: int, previous_pipeline_run: 'PipelineRun' = None, + is_standard_pipeline: bool = False, users: typing.Sequence[User] = None, + ) -> None: + self.schema = PIPELINE_RUN_SCHEMA_VERSION + + self.pipeline = { + 'id': pipeline.id, + 'digest': pipeline.get_digest(), + } + + self.datasets: typing.List[typing.Dict[str, typing.Any]] = [] + + self.problem: typing.Dict[str, typing.Any] = None + if problem_description is not None: + self._set_problem(problem_description) + + self.steps: typing.List[PipelineRunStep] = [] + self.status: typing.Dict[str, typing.Any] = {} + self.start: str = None + self.end: str = None + + self.run: typing.Dict[str, typing.Any] = { + 'phase': phase.name, + 'is_standard_pipeline': is_standard_pipeline, + } + self.context = context + self.previous_pipeline_run = previous_pipeline_run + + if users is None: + self.users: typing.List[User] = [] + else: + self.users = list(users) + + self.environment = environment + self.random_seed = random_seed + self.is_standard_pipeline = is_standard_pipeline + + self._components: typing.Dict[str, typing.Any] = {} + self._step_start_timestamps: typing.Dict[int, str] = {} + + def _to_json_structure(self) -> typing.Dict: + if self.start is None: + raise exceptions.InvalidStateError("Start timestamp not set.") + + if self.end is None: + raise exceptions.InvalidStateError("End timestamp not set.") + + if 'state' not in self.status: + raise exceptions.InvalidStateError("Status not set.") + + # Scoring datasets are set only when scoring is used without data preparation. + if 'scoring' in self.run: + if 'data_preparation' in self.run: + if 'datasets' in self.run['scoring']: + raise exceptions.InvalidStateError( + "Scoring datasets must not be provided when scoring is used with data preparation pipeline.", + ) + elif 'datasets' not in self.run['scoring']: + raise exceptions.InvalidStateError( + "Scoring datasets must be provided when scoring is used without data preparation pipeline.", + ) + + json_structure = { + 'schema': self.schema, + 'pipeline': self.pipeline, + 'datasets': self.datasets, + 'status': self.status, + 'start': self.start, + 'end': self.end, + 'run': self.run, + 'environment': self.environment, + 'random_seed': self.random_seed, + } + + if self.steps: + json_structure['steps'] = [step.to_json_structure() for step in self.steps] + + if self.previous_pipeline_run is not None: + json_structure['previous_pipeline_run'] = { + 'id': self.previous_pipeline_run.get_id() + } + + if self.context is not None: + json_structure['context'] = self.context.name + + if self.problem is not None: + json_structure['problem'] = self.problem + + if self.users: + json_structure['users'] = self.users + + json_structure['id'] = utils.compute_hash_id(json_structure) + + return json_structure + + def to_json_structure(self) -> typing.Dict: + # We raise exception here instead of waiting for schema validation to fails to provide a more helpful error message. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/355 + if not self.is_standard_pipeline and not self.datasets: + raise exceptions.InvalidStateError("Pipeline run for a non-standard pipeline cannot be converted to a JSON structure.") + + # TODO: Remove "utils.to_json_structure" once sure that "_to_json_structure" really returns a JSON structure. + json_structure = utils.to_json_structure(self._to_json_structure()) + + PIPELINE_RUN_SCHEMA_VALIDATOR.validate(json_structure) + + return json_structure + + def to_yaml(self, file: typing.IO[typing.Any], *, appending: bool = False, **kwargs: typing.Any) -> typing.Optional[str]: + obj = self.to_json_structure() + + if appending and 'explicit_start' not in kwargs: + kwargs['explicit_start'] = True + + return utils.yaml_dump(obj, stream=file, **kwargs) + + def add_input_dataset(self, dataset: container.Dataset) -> None: + metadata = dataset.metadata.query(()) + self.datasets.append({ + 'id': metadata['id'], + 'digest': metadata['digest'], + }) + + def add_primitive_step(self, step: pipeline_module.PrimitiveStep) -> int: + if not isinstance(step, pipeline_module.PrimitiveStep): + raise exceptions.InvalidArgumentTypeError('step must be of type PrimitiveStep, not {}'.format(type(step))) + self.steps.append( + PipelineRunPrimitiveStep(step, self._step_start_timestamps[len(self.steps)]) + ) + return len(self.steps) - 1 + + def _get_primitive_step(self, primitive_step_id: int) -> PipelineRunPrimitiveStep: + if primitive_step_id >= len(self.steps): + raise exceptions.InvalidArgumentValueError('There does not exist a step with id {}'.format(primitive_step_id)) + + primitive_step = self.steps[primitive_step_id] + if not isinstance(primitive_step, PipelineRunPrimitiveStep): + raise exceptions.InvalidArgumentValueError('Step id {} does not refer to a PipelineRunPrimitiveStep'.format(primitive_step_id)) + + return primitive_step + + def set_primitive_step_hyperparams( + self, primitive_step_id: int, + hyperparams: hyperparams_module.Hyperparams, + pipeline_hyperparams: typing.Dict[str, typing.Dict], + ) -> None: + primitive_step = self._get_primitive_step(primitive_step_id) + primitive_step.hyperparams = hyperparams + primitive_step.pipeline_hyperparams = set(pipeline_hyperparams.keys()) + + def set_primitive_step_random_seed(self, primitive_step_id: int, random_seed: int) -> None: + primitive_step = self._get_primitive_step(primitive_step_id) + primitive_step.random_seed = random_seed + + def add_subpipeline_step(self, subpipeline_run: 'PipelineRun') -> int: + pipeline_run_subpipeline_step = PipelineRunSubpipelineStep( + self._step_start_timestamps[len(self.steps)], subpipeline_run.random_seed + ) + + for step_id, step in enumerate(subpipeline_run.steps): + step_json = step.to_json_structure() + pipeline_run_subpipeline_step.add_step(step_json) + state = step_json['status']['state'] + message = step_json['status'].get('message', None) + if state == metadata_base.PipelineRunStatusState.SUCCESS.name: + pipeline_run_subpipeline_step.set_successful(message) + elif state == metadata_base.PipelineRunStatusState.FAILURE.name: + message = 'Failed on subpipeline step {}:\n{}'.format(step_id, message) + pipeline_run_subpipeline_step.set_failed(message) + if message is not None and message: + self.status['message'] = message + else: + raise exceptions.UnexpectedValueError('unknown subpipeline status state: {}'.format(state)) + + self.steps.append(pipeline_run_subpipeline_step) + + return len(self.steps) - 1 + + def add_method_call_to_primitive_step( + self, primitive_step_id: int, method_name: str, *, + runtime_arguments: typing.Dict = None, environment: typing.Dict[str, typing.Any] = None + ) -> typing.Tuple[int, int]: + if runtime_arguments is None: + runtime_arguments = {} + + # TODO allow runtime arguments not specified in pipeline? + primitive_step = self._get_primitive_step(primitive_step_id) + method_call_id = primitive_step.add_method_call( + method_name, runtime_arguments=runtime_arguments, environment=environment + ) + return (primitive_step_id, method_call_id) + + def get_method_call_logging_callback( + self, step_and_method_call_id: typing.Tuple[int, int] + ) -> typing.Callable: + step_id, method_call_id = step_and_method_call_id + primitive_step = self._get_primitive_step(step_id) + return primitive_step.get_method_call_logging_callback(method_call_id) + + def run_started(self) -> None: + self.start = utils.datetime_for_json(datetime.datetime.now(datetime.timezone.utc)) + + def _set_end_timestamp(self) -> None: + self.end = utils.datetime_for_json(datetime.datetime.now(datetime.timezone.utc)) + + def step_started(self, step_id: int) -> None: + self._step_start_timestamps[step_id] = utils.datetime_for_json(datetime.datetime.now(datetime.timezone.utc)) + + def method_call_started(self, step_and_method_call_id: typing.Tuple[int, int]) -> None: + step_id, method_call_id = step_and_method_call_id + primitive_step = self._get_primitive_step(step_id) + primitive_step.set_method_call_start_timestamp(method_call_id) + + def set_method_call_result_metadata( + self, step_and_method_call_id: typing.Tuple[int, int], + result: typing.Union[base.CallResult, base.MultiCallResult] + ) -> None: + step_id, method_call_id = step_and_method_call_id + primitive_step = self._get_primitive_step(step_id) + primitive_step.set_method_call_result_metadata(method_call_id, result) + + def run_successful(self, message: str = None) -> None: + self._set_end_timestamp() + self.status['state'] = metadata_base.PipelineRunStatusState.SUCCESS.name + if message is not None and message: + self.status['message'] = message + + def step_successful(self, step_id: int, message: str = None) -> None: + if step_id >= len(self.steps): + raise exceptions.InvalidArgumentValueError('There does not exist a step with id {}'.format(step_id)) + self.steps[step_id].set_end_timestamp() + self.steps[step_id].set_successful(message) + + def method_call_successful(self, step_and_method_call_id: typing.Tuple[int, int], message: str = None) -> None: + step_id, method_call_id = step_and_method_call_id + primitive_step = self._get_primitive_step(step_id) + primitive_step.set_method_call_end_timestamp(method_call_id) + primitive_step.set_method_call_successful(method_call_id, message) + + def run_failed(self, message: str = None) -> None: + self._set_end_timestamp() + self.status['state'] = metadata_base.PipelineRunStatusState.FAILURE.name + if message is not None and message: + self.status['message'] = message + + def step_failed(self, step_id: int, message: str = None) -> None: + if step_id >= len(self.steps): + return + self.steps[step_id].set_end_timestamp() + self.steps[step_id].set_failed(message) + + def method_call_failed(self, step_and_method_call_id: typing.Tuple[int, int], message: str = None) -> None: + step_id, method_call_id = step_and_method_call_id + if step_id >= len(self.steps): + return + primitive_step = self._get_primitive_step(step_id) + primitive_step.set_method_call_end_timestamp(method_call_id) + primitive_step.set_method_call_failed(method_call_id, message) + + def is_failed(self) -> bool: + return self.status['state'] == metadata_base.PipelineRunStatusState.FAILURE.name + + def _set_problem(self, problem_description: problem.Problem) -> None: + self.problem = { + 'id': problem_description['id'], + 'digest': problem_description.get_digest(), + } + + def set_fold_group(self, fold_group_id: uuid.UUID, fold: int) -> None: + self.run['fold_group'] = { + 'id': str(fold_group_id), + 'fold': fold, + } + + def set_data_preparation_pipeline_run( + self, data_preparation_pipeline_run: 'PipelineRun' + ) -> None: + if data_preparation_pipeline_run.start is None: + raise exceptions.InvalidArgumentValueError("Data preparation pipeline start timestamp argument not provided.") + + if data_preparation_pipeline_run.end is None: + raise exceptions.InvalidArgumentValueError("Data preparation pipeline end timestamp argument not provided.") + + self.run['data_preparation'] = { + 'pipeline': data_preparation_pipeline_run.pipeline, + 'steps': [step.to_json_structure() for step in data_preparation_pipeline_run.steps], + 'status': data_preparation_pipeline_run.status, + 'start': data_preparation_pipeline_run.start, + 'end': data_preparation_pipeline_run.end, + 'random_seed': data_preparation_pipeline_run.random_seed, + } + + if data_preparation_pipeline_run.is_failed(): + message = 'Data preparation pipeline failed:\n{}'.format( + data_preparation_pipeline_run.status['message'] + ) + self.status['state'] = metadata_base.PipelineRunStatusState.FAILURE.name + if message is not None and message: + self.status['message'] = message + + def set_scoring_pipeline_run( + self, scoring_pipeline_run: 'PipelineRun', scoring_datasets: typing.Sequence[typing.Any] = None, + ) -> None: + if scoring_pipeline_run.start is None: + raise exceptions.InvalidArgumentValueError("Scoring pipeline start timestamp argument not provided.") + + if scoring_pipeline_run.end is None: + raise exceptions.InvalidArgumentValueError("Scoring pipeline end timestamp argument not provided.") + + self.run['scoring'] = { + 'pipeline': scoring_pipeline_run.pipeline, + 'steps': [step.to_json_structure() for step in scoring_pipeline_run.steps], + 'status': scoring_pipeline_run.status, + 'start': scoring_pipeline_run.start, + 'end': scoring_pipeline_run.end, + 'random_seed': scoring_pipeline_run.random_seed, + } + + if scoring_datasets: + self.run['scoring']['datasets'] = [] + for dataset in scoring_datasets: + metadata = dataset.metadata.query(()) + self.run['scoring']['datasets'].append({ + 'id': metadata['id'], + 'digest': metadata['digest'], + }) + + if scoring_pipeline_run.is_failed(): + message = 'Scoring pipeline failed:\n{}'.format( + scoring_pipeline_run.status['message'] + ) + self.status['state'] = metadata_base.PipelineRunStatusState.FAILURE.name + if message is not None and message: + self.status['message'] = message + + def set_scores( + self, scores: container.DataFrame, metrics: typing.Sequence[typing.Dict], + ) -> None: + if not self.is_standard_pipeline: + raise exceptions.InvalidStateError("Setting scores for non-standard pipelines is not allowed.") + + json_scores = [] + + if 'normalized' in scores.columns: + columns = ['metric', 'value', 'normalized'] + else: + columns = ['metric', 'value'] + + for row in scores.loc[:, columns].itertuples(index=False, name=None): + metric, value = row[:2] + + json_scores.append( + { + # TODO: Why is "deepcopy" needed here? + 'metric': copy.deepcopy(self._get_metric_description(metric, metrics)), + 'value': float(value), + }, + ) + + if len(row) == 3: + json_scores[-1]['normalized'] = float(row[2]) + + if not json_scores: + return + + if 'results' not in self.run: + self.run['results'] = {} + + if 'scores' not in self.run['results']: + self.run['results']['scores'] = json_scores + else: + raise exceptions.InvalidStateError("Scores already set for pipeline run.") + + def _get_metric_description(self, metric: str, performance_metrics: typing.Sequence[typing.Dict]) -> typing.Dict: + """ + Returns a metric description from a list of them, given metric. + + Parameters + ---------- + metric: + A metric name. + performance_metrics: + A list of performance metric descriptions requested for scoring. + + Returns + ------- + A metric description. + """ + + for performance_metric in performance_metrics: + if performance_metric['metric'] == metric: + metric_description = { + 'metric': performance_metric['metric'].name, + } + + if performance_metric.get('params', {}): + metric_description['params'] = performance_metric['params'] + + return metric_description + + return { + 'metric': metric, + } + + def set_predictions(self, predictions: container.DataFrame) -> None: + if not self.is_standard_pipeline: + raise exceptions.InvalidStateError("Setting predictions for non-standard pipelines is not allowed.") + + if not isinstance(predictions, container.DataFrame): + logger.warning("Unable to set predictions for pipeline run because predictions are not a DataFrame.") + return + + try: + json_predictions: typing.Dict[str, typing.List] = { + 'header': [], + 'values': [], + } + + column_names = [] + for column_index in range(len(predictions.columns)): + # We use column name from the DataFrame is metadata does not have it. This allows a bit more compatibility. + column_names.append(predictions.metadata.query_column(column_index).get('name', predictions.columns[column_index])) + + # "tolist" converts values to Python values and does not keep them as numpy.float64 or other special types. + json_predictions['values'].append(utils.to_json_structure(predictions.iloc[:, column_index].tolist())) + + json_predictions['header'] += column_names + + except Exception as error: + logger.warning("Unable to convert predictions to JSON structure for pipeline run.", exc_info=error) + return + + if 'results' not in self.run: + self.run['results'] = {} + + if 'predictions' not in self.run['results']: + self.run['results']['predictions'] = json_predictions + else: + raise exceptions.InvalidStateError("Predictions already set for pipeline run.") + + def get_id(self) -> str: + return self._to_json_structure()['id'] + + @classmethod + def json_structure_equals(cls, pipeline_run1: typing.Dict, pipeline_run2: typing.Dict) -> bool: + """ + Checks whether two pipeline runs in a JSON structure are equal. + This ignores the pipeline run id and all timestamps. + """ + + if not isinstance(pipeline_run1, collections.Mapping) or not isinstance(pipeline_run2, collections.Mapping): + raise exceptions.InvalidArgumentTypeError("Pipeline run arguments must be dicts.") + + return utils.json_structure_equals(pipeline_run1, pipeline_run2, {'id', 'start', 'end', 'environment', 'logging'}) + + +class RuntimeEnvironment(dict): + def __init__( + self, *, + worker_id: str = None, + cpu_resources: typing.Dict[str, typing.Any] = None, + memory_resources: typing.Dict[str, typing.Any] = None, + gpu_resources: typing.Dict[str, typing.Any] = None, + reference_benchmarks: typing.Sequence[str] = None, + reference_engine_version: str = None, + engine_version: str = None, + base_docker_image: typing.Dict[str, str] = None, + docker_image: typing.Dict[str, str] = None, + ) -> None: + """ + Create an instance of the runtime environment description in which a pipeline is run. + + All values stored in an instance should be JSON compatible. + + Parameters + ---------- + worker_id: + A globally unique identifier for the machine on which the runtime is running. + The idea is that multiple runs on the same system can be grouped together. + If not provided, `uuid.getnode()` is used to obtain an identifier. + cpu_resources: + A description of the CPU resources available in this environment. + memory_resources: + A description of the memory resources available in this environment. + gpu_resources: + A description of the GPU resources available in this environment. + reference_benchmarks: + A list of ids of standard and optional additional benchmarks which were run in the same or + equivalent RuntimeEnvironment. The timing characteristics of these benchmarks can be + expected to be the same as anything timed in this RuntimeEnvironment. + reference_engine_version: + A git commit hash or version number for the reference engine used. If subclassing the + reference engine, list it here. + engine_version: + A git commit hash or version number for the engine used. This is primarily useful for the + author. If using the reference engine directly, list its git commit hash or version number + here as well as in the reference_engine_version. + base_docker_image: + If the engine was run in a public or known docker container, specify the base docker image + description here. + docker_image: + If the engine was run in a public or known docker container, specify the actual docker + image description here. This is primarily useful for the author. + """ + + super().__init__() + + if worker_id is None: + worker_id = self._get_worker_id() + self['worker_id'] = worker_id + + resources = {} + if cpu_resources is None: + cpu_resources = self._get_cpu_resources() + if cpu_resources is not None: + resources['cpu'] = cpu_resources + if memory_resources is None: + memory_resources = self._get_memory_resources() + if memory_resources is not None: + resources['memory'] = memory_resources + if gpu_resources is None: + gpu_resources = self._get_gpu_resources() + if gpu_resources is not None: + resources['gpu'] = gpu_resources + + if resources: + self['resources'] = resources + + if reference_benchmarks is not None: + self['reference_benchmarks'] = reference_benchmarks + + if reference_engine_version is None: + reference_engine_version = self._get_reference_engine_version() + self['reference_engine_version'] = reference_engine_version + + if engine_version is None: + engine_version = self['reference_engine_version'] + self['engine_version'] = engine_version + + if base_docker_image is None: + base_docker_image = self._get_docker_image( + environment_variables.D3M_BASE_IMAGE_NAME, + environment_variables.D3M_BASE_IMAGE_DIGEST, + ) + if base_docker_image is not None: + self['base_docker_image'] = base_docker_image + + if docker_image is None: + docker_image = self._get_docker_image( + environment_variables.D3M_IMAGE_NAME, + environment_variables.D3M_IMAGE_DIGEST, + ) + if docker_image is not None: + self['docker_image'] = docker_image + + # Here we assume that all values stored in "self" are JSON compatible. + self['id'] = utils.compute_hash_id(self) + + @classmethod + def _get_reference_engine_version(cls) -> str: + try: + # Get the git commit hash of the d3m repository. + path = os.path.abspath(d3m.__file__).rsplit('d3m', 1)[0] + return utils.current_git_commit( + path=path, search_parent_directories=False, + ) + except git.exc.InvalidGitRepositoryError: + return d3m.__version__ + + @classmethod + def _get_worker_id(cls) -> str: + """ + Compute the worker id. + """ + + mac_address = uuid.getnode() + + if mac_address >> 16 == DOCKER_MAC_ADDRESS_MASK >> 16: + # Docker generates MAC addresses in the range 02:42:ac:11:00:00 to 02:42:ac:11:ff:ff + # if one is not provided in the configuration + logger.warning( + "'worker_id' was generated using the MAC address inside Docker " + "container and is not a reliable compute resource identifier." + ) + elif (mac_address >> 40) % 2 == 1: + # uuid.getnode docs state: + # If all attempts to obtain the hardware address fail, we choose a + # random 48-bit number with its eighth bit set to 1 as recommended + # in RFC 4122. + logger.warning( + "'worker_id' was generated using a random number because the " + "MAC address could not be determined." + ) + + return str(uuid.uuid5(WORKER_ID_NAMESPACE, json.dumps(mac_address, sort_keys=True))) + + @classmethod + def _get_docker_image(cls, image_name_env_var: str, image_digest_env_var: str) -> typing.Optional[typing.Dict]: + """ + Returns the docker image description. + """ + + docker_image = {} + + if image_name_env_var not in os.environ: + logger.warning('Docker image environment variable not set: %(variable_name)s', { + 'variable_name': image_name_env_var, + }) + elif os.environ[image_name_env_var]: + docker_image['image_name'] = os.environ[image_name_env_var] + + if image_digest_env_var not in os.environ: + logger.warning('Docker image environment variable not set: %(variable_name)s', { + 'variable_name': image_digest_env_var, + }) + elif os.environ[image_digest_env_var]: + docker_image['image_digest'] = os.environ[image_digest_env_var] + + if docker_image: + return docker_image + else: + return None + + @classmethod + def _get_configured(cls, environment_variable: str) -> typing.Optional[str]: + if environment_variable not in os.environ: + logger.warning('Configuration environment variable not set: %(variable_name)s', { + 'variable_name': environment_variable, + }) + return None + elif os.environ[environment_variable]: + return os.environ[environment_variable] + else: + return None + + # TODO: Split into more methods. + @classmethod + def _get_cpu_resources(cls) -> typing.Optional[typing.Dict[str, typing.Any]]: + cpu_resource: typing.Dict[str, typing.Any] = {} + + cpu_info: typing.Sequence[typing.Dict[str, str]] = [] + try: + cpu_info = cls._read_info_file(PROC_CPU_PATH) + except Exception as error: + logger.warning( + "Failed to get CPU information from '%(proc_cpu_path)s': %(error)s", + { + 'proc_cpu_path': PROC_CPU_PATH, + 'error': error, + }, + ) + + # devices + if cpu_info: + cpu_resource['devices'] = [ + { + 'name': cpu[PROC_CPU_MODEL_NAME_KEY], + } + for cpu in cpu_info + ] + + # physical_present + if cpu_info: + physical_ids: typing.Set[str] = set() + physical_present = 0 + for cpu in cpu_info: + physical_id = cpu[PROC_CPU_PHYSICAL_ID_KEY] + if physical_id in physical_ids: + continue + physical_ids.add(physical_id) + physical_present += int(cpu[PROC_CPU_CORES_KEY]) + cpu_resource['physical_present'] = physical_present + + # logical_present + if cpu_info: + cpu_resource['logical_present'] = len(cpu_info) + + # configured_available + configured_available = cls._get_configured( + environment_variables.D3M_CPU, + ) + if configured_available is not None: + cpu_resource['configured_available'] = configured_available + + # constraints + constraints = {} + try: + with open(CGROUP_CPU_SHARES_PATH, 'r', encoding='ascii') as file: + cpu_shares = int(file.read().strip()) + if cpu_shares < 1e5: + constraints['cpu_shares'] = cpu_shares + except Exception as error: + logger.warning( + "Failed to get CPU information from '%(cgroup_cpu_shares_path)s': %(error)s", + { + 'cgroup_cpu_shares_path': CGROUP_CPU_SHARES_PATH, + 'error': error, + }, + ) + try: + with open(CGROUP_CPU_CFS_PERIOD_US_PATH, 'r', encoding='ascii') as file: + cfs_period_us = int(file.read().strip()) + constraints['cfs_period_us'] = cfs_period_us + except Exception as error: + logger.warning( + "Failed to get CPU information from '%(cgroup_cpu_cfs_period_us_path)s': %(error)s", + { + 'cgroup_cpu_cfs_period_us_path': CGROUP_CPU_CFS_PERIOD_US_PATH, + 'error': error, + }, + ) + try: + with open(CGROUP_CPU_CFS_QUOTA_US_PATH, 'r', encoding='ascii') as file: + cfs_quota_us = int(file.read().strip()) + if cfs_quota_us >= 0: + constraints['cfs_quota_us'] = cfs_quota_us + except Exception as error: + logger.warning( + "Failed to get CPU information from '%(cgroup_cpu_cfs_quota_us_path)s': %(error)s", + { + 'cgroup_cpu_cfs_quota_us_path': CGROUP_CPU_CFS_QUOTA_US_PATH, + 'error': error, + }, + ) + + if 'cfs_period_us' in constraints and 'cfs_quota_us' not in constraints: + del constraints['cfs_period_us'] + + if constraints: + cpu_resource['constraints'] = constraints + + if cpu_resource: + return cpu_resource + else: + return None + + @classmethod + def _read_info_file(cls, path: str) -> typing.Sequence[typing.Dict[str, str]]: + info: typing.List[typing.Dict[str, str]] = [{}] + + with open(path, 'r', encoding='ascii') as file: + for line in file: + line = line.strip() + if not line: + info.append({}) + continue + + match = PROC_INFO_RE.match(line) + if match is None: + raise ValueError("Error parsing.") + + key, value = match.groups() + info[-1][key] = value + + if not info[-1]: + del info[-1] + + return info + + # TODO: Split into more methods. + # TODO: Get memory devices. Consider lshw. + @classmethod + def _get_memory_resources(cls) -> typing.Optional[typing.Dict[str, typing.Any]]: + memory_resource: typing.Dict[str, typing.Any] = {} + + # total_memory (bytes) + try: + memory_info = cls._read_info_file(PROC_MEMORY_PATH)[0] + total_memory_kb = int(memory_info[PROC_TOTAL_MEMORY_KEY].split()[0]) + memory_resource['total_memory'] = total_memory_kb * 1024 + except Exception as error: + logger.warning( + "Failed to get memory information from '%(proc_memory_path)s': %(error)s", + { + 'proc_memory_path': PROC_MEMORY_PATH, + 'error': error, + }, + ) + + # configured_memory + configured_memory = cls._get_configured( + environment_variables.D3M_RAM, + ) + if configured_memory is not None: + memory_resource['configured_memory'] = configured_memory + + # constraints + constraints = {} + try: + with open(CGROUP_MEMORY_LIMIT_PATH, 'r', encoding='ascii') as file: + memory_limit = int(file.read().strip()) + if memory_limit < (sys.maxsize // 4096) * 4096: + constraints['memory_limit'] = memory_limit + except FileNotFoundError: + pass + except Exception as error: + logger.warning( + "Failed to get memory information from '%(cgroup_memory_limit_path)s': %(error)s", + { + 'cgroup_memory_limit_path': CGROUP_MEMORY_LIMIT_PATH, + 'error': error, + }, + ) + + if constraints: + memory_resource['constraints'] = constraints + + if memory_resource: + return memory_resource + else: + return None + + # TODO: Split into more methods. + # TODO: Get GPU constraints. + # TODO: Get GPU memory limit configuration. + @classmethod + def _get_gpu_resources(cls) -> typing.Optional[typing.Dict[str, typing.Any]]: + gpu_resource: typing.Dict[str, typing.Any] = {} + + gpus: typing.List[GPUtil.GPU] = [] + try: + gpus = GPUtil.getGPUs() + except Exception as error: + logger.warning( + "Failed to get GPU information: %(error)s", + { + 'error': error, + }, + ) + + # devices + if gpus: + gpu_resource['devices'] = [ + { + 'name': gpu.name, + 'memory': int(gpu.memoryTotal) * 2**20, + } + for gpu in gpus + ] + + # total_memory (bytes) + if gpus: + total_memory_mib = sum(gpu.memoryTotal for gpu in gpus) + gpu_resource['total_memory'] = int(total_memory_mib) * 2**20 + + if gpu_resource: + return gpu_resource + else: + return None + + @classmethod + def _yaml_representer(cls, dumper: yaml.Dumper, data: typing.Any) -> typing.Any: + return dumper.represent_dict(data) + + +utils.yaml_add_representer(RuntimeEnvironment, RuntimeEnvironment._yaml_representer) + + +def _validate_pipeline_run_random_seeds(pipeline_run: typing.Dict) -> None: + if 'random_seed' not in pipeline_run: + raise exceptions.InvalidPipelineRunError("Pipeline run is missing a random seed.") + + if 'run' in pipeline_run: + if 'data_preparation' in pipeline_run['run'] and 'random_seed' not in pipeline_run['run']['data_preparation']: + raise exceptions.InvalidPipelineRunError("Data preparation pipeline run is missing a random seed.") + + if 'scoring' in pipeline_run['run'] and 'random_seed' not in pipeline_run['run']['scoring']: + raise exceptions.InvalidPipelineRunError("Scoring pipeline run is missing a random seed.") + + for step in pipeline_run.get('steps', []): + if step['type'] == 'SUBPIPELINE': + _validate_pipeline_run_random_seeds(step) + + +def _validate_pipeline_run_timestamps(pipeline_run: typing.Dict, parent_start: datetime.datetime = None, parent_end: datetime.datetime = None) -> None: + if 'start' not in pipeline_run: + raise exceptions.InvalidPipelineRunError("Pipeline run is missing a start timestamp.") + if 'end' not in pipeline_run: + raise exceptions.InvalidPipelineRunError("Pipeline run is missing an end timestamp.") + + start = dateparser.parse(pipeline_run['start'], settings={'TIMEZONE': 'UTC'}) + end = dateparser.parse(pipeline_run['end'], settings={'TIMEZONE': 'UTC'}) + + if start >= end: + raise exceptions.InvalidPipelineRunError("Pipeline run contains a start timestamp which occurs after the corresponding end timestamp.") + + if parent_start is not None and parent_end is not None: + if start <= parent_start or parent_end <= start: + raise exceptions.InvalidPipelineRunError("Pipeline run contains a start timestamp which occurs outside the parent timestamp range.") + + if end <= parent_start or parent_end <= end: + raise exceptions.InvalidPipelineRunError("Pipeline run contains an end timestamp which occurs outside the parent timestamp range.") + + for step in pipeline_run.get('steps', []): + for method_call in pipeline_run.get('method_calls', []): + _validate_pipeline_run_timestamps(method_call, start, end) + + _validate_pipeline_run_timestamps(step, start, end) + + if 'run' in pipeline_run: + if 'data_preparation' in pipeline_run['run']: + _validate_pipeline_run_timestamps(pipeline_run['run']['data_preparation']) + + if 'scoring' in pipeline_run['run']: + _validate_pipeline_run_timestamps(pipeline_run['run']['scoring']) + + +def _validate_success_step(step: typing.Dict) -> None: + if step['type'] == metadata_base.PipelineStepType.PRIMITIVE: + for method_call in step.get('method_calls', []): + if method_call['status']['state'] != metadata_base.PipelineRunStatusState.SUCCESS: + raise exceptions.InvalidPipelineRunError( + "Step with '{expected_status}' status has a method call with '{status}' status".format( + expected_status=metadata_base.PipelineRunStatusState.SUCCESS, + status=method_call['status']['state'], + ), + ) + elif step['type'] == metadata_base.PipelineStepType.SUBPIPELINE: + _recurse_success(step) + else: + raise exceptions.UnexpectedValueError("Invalid pipeline run step type: {step_type}".format(step_type=step['type'])) + + +def _validate_failure_step(step: typing.Dict) -> None: + if step['type'] == metadata_base.PipelineStepType.PRIMITIVE: + found_a_method_call_failure = False + for method_call in step.get('method_calls', []): + if found_a_method_call_failure: + raise exceptions.InvalidPipelineRunError( + "There exists a method call after a method call with '{status}' status.".format( + status=metadata_base.PipelineRunStatusState.FAILURE, + ), + ) + if method_call['status']['state'] == metadata_base.PipelineRunStatusState.FAILURE: + found_a_method_call_failure = True + elif step['type'] == metadata_base.PipelineStepType.SUBPIPELINE: + _recurse_failure(step) + else: + raise exceptions.UnexpectedValueError("Invalid pipeline run step type: {step_type}".format(step_type=step['type'])) + + +def _recurse_success(json_structure: typing.Dict) -> None: + if 'steps' not in json_structure: + raise exceptions.InvalidPipelineRunError("Successful pipeline run with missing steps.") + + for step in json_structure['steps']: + if step['status']['state'] != metadata_base.PipelineRunStatusState.SUCCESS: + raise exceptions.InvalidPipelineRunError( + "Pipeline run with '{expected_status}' status has a step with '{status}' status".format( + expected_status=metadata_base.PipelineRunStatusState.SUCCESS, + status=step['status']['state'], + ), + ) + + _validate_success_step(step) + + +def _recurse_failure(json_structure: typing.Dict) -> None: + found_a_step_failure = False + for step in json_structure.get('steps', []): + if found_a_step_failure: + raise exceptions.InvalidPipelineRunError( + "There exists a step after a step with '{status}' status.".format( + status=metadata_base.PipelineRunStatusState.FAILURE, + ), + ) + + if step['status']['state'] == metadata_base.PipelineRunStatusState.SUCCESS: + _validate_success_step(step) + elif step['status']['state'] == metadata_base.PipelineRunStatusState.FAILURE: + found_a_step_failure = True + _validate_failure_step(step) + + +def _validate_pipeline_run_status_consistency(pipeline_run: typing.Dict) -> None: + """ + Verifies that the success or failure states of pipeline run components are consistent with each other. + Any failure state should be propagated upwards to all parents in the pipeline run. The runtime should + "short-circuit", meaning any failure state in the pipeline run should be the final component. + """ + + state = pipeline_run['status']['state'] + if state == metadata_base.PipelineRunStatusState.SUCCESS: + _recurse_success(pipeline_run) + elif state == metadata_base.PipelineRunStatusState.FAILURE: + _recurse_failure(pipeline_run) + else: + raise exceptions.UnexpectedValueError("Invalid pipeline run state: {state}".format(state=state)) + + +def _get_pipeline_run_references(pipeline_run: typing.Dict) -> typing.List[typing.Dict]: + pipeline_run_references: typing.List[typing.Dict] = [] + + pipeline_run_references += pipeline_run.get('environment', {}).get('reference_benchmarks', []) + + for step in pipeline_run.get('steps', []): + pipeline_run_references += step.get('environment', {}).get('reference_benchmarks', []) + + for method_call in step.get('method_calls', []): + pipeline_run_references += method_call.get('environment', {}).get('reference_benchmarks', []) + + return pipeline_run_references + + +def validate_pipeline_run(pipeline_run: typing.Dict) -> None: + """ + Validates that the pipeline run is valid for the purpose of insertion in the metalearning database. + If not, an exception is raised. + + Generally, metalearning database has additional requirements not captured by JSON schema. + + Parameters + ---------- + pipeline_run: + Pipeline run document. + """ + + PIPELINE_RUN_SCHEMA_VALIDATOR.validate(pipeline_run) + + if pipeline_run['schema'] != PIPELINE_RUN_SCHEMA_VERSION: + raise exceptions.InvalidPipelineRunError( + "Schema field is not '{expected_schema}', but '{actual_schema}'.".format( + expected_schema=pipeline_module.PIPELINE_SCHEMA_VERSION, + actual_schema=pipeline_run['schema'], + ), + ) + + computed_id = utils.compute_hash_id(pipeline_run) + + if pipeline_run['id'] != computed_id: + raise exceptions.InvalidPipelineRunError( + "ID field is not '{computed_id}', but '{actual_id}'.".format( + computed_id=computed_id, + actual_id=pipeline_run['id'], + ), + ) + + for dataset in list(pipeline_run['datasets']) + list(pipeline_run['run'].get('scoring', {}).get('datasets', [])): + if set(dataset.keys()) != {'id', 'digest'}: + raise exceptions.InvalidPipelineRunError("Invalid dataset reference: {dataset}".format(dataset=dataset)) + + pipelines = [pipeline_run['pipeline']] + if 'data_preparation' in pipeline_run['run']: + pipelines.append(pipeline_run['run']['data_preparation']['pipeline']) + if 'scoring' in pipeline_run['run']: + pipelines.append(pipeline_run['run']['scoring']['pipeline']) + + for pipeline in pipelines: + if set(pipeline.keys()) != {'id', 'digest'}: + raise exceptions.InvalidPipelineRunError("Invalid pipeline reference: {pipeline}".format(pipeline=pipeline)) + + if 'problem' in pipeline_run and set(pipeline_run['problem'].keys()) != {'id', 'digest'}: + raise exceptions.InvalidPipelineRunError("Invalid problem reference: {problem}".format(problem=pipeline_run['problem'])) + + referenced_pipeline_runs = [] + if 'previous_pipeline_run' in pipeline_run: + referenced_pipeline_runs.append(pipeline_run['previous_pipeline_run']) + referenced_pipeline_runs += _get_pipeline_run_references(pipeline_run) + if 'scoring' in pipeline_run['run']: + referenced_pipeline_runs += _get_pipeline_run_references(pipeline_run['run']['scoring']) + if 'data_preparation' in pipeline_run['run']: + referenced_pipeline_runs += _get_pipeline_run_references(pipeline_run['run']['data_preparation']) + + for referenced_pipeline_run in referenced_pipeline_runs: + if set(referenced_pipeline_run.keys()) != {'id'}: + raise exceptions.InvalidPipelineRunError("Invalid pipeline run reference: {pipeline_run}".format(pipeline_run=referenced_pipeline_run)) + + _validate_pipeline_run_status_consistency(pipeline_run) + _validate_pipeline_run_timestamps(pipeline_run) + _validate_pipeline_run_random_seeds(pipeline_run) + + +def validate_pipeline(pipeline_description: typing.Dict) -> None: + """ + Validates that the pipeline is valid for the purpose of insertion in the metalearning database. + If not, an exception is raised. + + Generally, metalearning database has additional requirements not captured by JSON schema. + + Parameters + ---------- + pipeline_description: + Pipeline.. + """ + + # Also validates against the schema. It validates top-level "digest" field if it exists. + pipeline = pipeline_module.Pipeline.from_json_structure(pipeline_description, resolver=pipeline_module.NoResolver(strict_digest=True), strict_digest=True) + + if pipeline_description['schema'] != pipeline_module.PIPELINE_SCHEMA_VERSION: + raise exceptions.InvalidPipelineError( + "Schema field is not '{expected_schema}', but '{actual_schema}'.".format( + expected_schema=pipeline_module.PIPELINE_SCHEMA_VERSION, + actual_schema=pipeline_description['schema'], + ), + ) + + # If there is "digest" field we know that it has matched the pipeline. + if 'digest' not in pipeline_description: + raise exceptions.InvalidPipelineError("Digest field is required.") + + # Also validates that there are no nested sub-pipelines. + if pipeline_description != pipeline._canonical_pipeline_description(pipeline_description): + raise exceptions.InvalidPipelineError("Pipeline description is not in canonical structure.") + + # We allow non-standard pipelines but require that all inputs are "Dataset" objects. + input_types = {'inputs.{i}'.format(i=i): container.Dataset for i in range(len(pipeline.inputs))} + pipeline.check(allow_placeholders=False, standard_pipeline=False, input_types=input_types) + + for step in pipeline.steps: + if isinstance(step, pipeline_module.SubpipelineStep): + # We are using "NoResolver", so we have "pipeline_description" available. + if 'digest' not in step.pipeline_description: + raise exceptions.InvalidPipelineError("Digest field in steps is required.") + elif isinstance(step, pipeline_module.PrimitiveStep): + # We are using "NoResolver", so we have "primitive_description" available. + if 'digest' not in step.primitive_description: + # A special case to handle a v2019.6.7 version of the core package where compute scores primitive + # did not have a digest because it was lacking "installation" section in metadata. + # See: https://gitlab.com/datadrivendiscovery/d3m/merge_requests/280 + if step.primitive_description['id'] == '799802fb-2e11-4ab7-9c5e-dda09eb52a70' and step.primitive_description['version'] == '0.3.0': + continue + raise exceptions.InvalidPipelineError("Digest field in steps is required.") + else: + raise exceptions.InvalidPipelineError("Unknown step type: {type}".format(type=type(step))) + + +def validate_problem(problem_description_json_structure: typing.Dict) -> None: + """ + Validates that the problem description is valid for the purpose of insertion in the metalearning database. + If not, an exception is raised. + + Generally, metalearning database has additional requirements not captured by JSON schema. + + Parameters + ---------- + problem_description_json_structure: + Problem description as JSON structure. + """ + + if 'digest' not in problem_description_json_structure: + raise exceptions.InvalidProblemError("Digest field is required.") + + # Also validates against the schema and checks the digest. + problem_description = problem.Problem.from_json_structure(problem_description_json_structure, strict_digest=True) + + if problem_description['schema'] != problem.PROBLEM_SCHEMA_VERSION: + raise exceptions.InvalidProblemError( + "Schema field is not '{expected_schema}', but '{actual_schema}'.".format( + expected_schema=problem.PROBLEM_SCHEMA_VERSION, + actual_schema=problem_description['schema'], + ), + ) + + canonical_problem_description = problem_description._canonical_problem_description(problem_description) + + if problem_description != canonical_problem_description: + raise exceptions.InvalidProblemError("Problem description is not in canonical structure.") + + if problem_description.get('source', {}).get('from', {}).get('type', None) == 'REDACTED': + problem_reference = problem_description['source']['from'].get('problem', {}) + if set(problem_reference.keys()) != {'id', 'digest'}: + raise exceptions.InvalidProblemError("Invalid problem description reference for \"source.from.problem\": {problem}".format(problem=problem_reference)) + + +def validate_dataset(dataset_description: typing.Dict) -> None: + """ + Validates that the dataset description is valid for the purpose of insertion in the metalearning database. + If not, an exception is raised. + + Generally, metalearning database has additional requirements not captured by JSON schema. + + Parameters + ---------- + dataset_description: + Dataset description. + """ + + metadata_base.CONTAINER_SCHEMA_VALIDATOR.validate(dataset_description) + + if dataset_description['schema'] != metadata_base.CONTAINER_SCHEMA_VERSION: + raise exceptions.InvalidDatasetError( + "Schema field is not '{expected_schema}', but '{actual_schema}'.".format( + expected_schema=metadata_base.CONTAINER_SCHEMA_VERSION, + actual_schema=dataset_description['schema'], + ), + ) + + if 'id' not in dataset_description: + raise exceptions.InvalidDatasetError("ID field is required.") + + if 'digest' not in dataset_description: + raise exceptions.InvalidDatasetError("Digest field is required.") + + # Also validates that there are no nested sub-pipelines. + if dataset_description != container.Dataset._canonical_dataset_description(dataset_description): + raise exceptions.InvalidDatasetError("Dataset description is not in canonical structure.") + + if dataset_description['structural_type'] != 'd3m.container.dataset.Dataset': + raise exceptions.InvalidDatasetError("Structural type is not 'd3m.container.dataset.Dataset', but '{type}'.".format(type=dataset_description['structural_type'])) + + if dataset_description.get('source', {}).get('from', {}).get('type', None) == 'REDACTED': + dataset_reference = dataset_description['source']['from'].get('dataset', {}) + if set(dataset_reference.keys()) != {'id', 'digest'}: + raise exceptions.InvalidDatasetError("Invalid dataset reference for \"source.from.dataset\": {dataset}".format(dataset=dataset_reference)) + + +def validate_primitive(primitive_json_structure: typing.Dict) -> None: + """ + Validates that the primitive description is valid for the purpose of insertion in the metalearning database. + If not, an exception is raised. + + Generally, metalearning database has additional requirements not captured by JSON schema. + + Parameters + ---------- + primitive_json_structure: + Primitive description as JSON structure. + """ + + if 'digest' not in primitive_json_structure: + raise exceptions.InvalidProblemError("Digest field is required.") + + metadata_base.PrimitiveMetadata._validate(primitive_json_structure) + + +def pipeline_run_handler(arguments: argparse.Namespace) -> None: + has_errored = False + + for pipeline_run_path in arguments.pipeline_runs: + if getattr(arguments, 'list', False): + print(pipeline_run_path) + + try: + with utils.open(pipeline_run_path, 'r', encoding='utf8') as pipeline_run_file: + if pipeline_run_path.endswith('.yml') or pipeline_run_path.endswith('.yaml') or pipeline_run_path.endswith('.yml.gz') or pipeline_run_path.endswith('.yaml.gz'): + pipeline_runs: typing.Iterator[typing.Dict] = utils.yaml_load_all(pipeline_run_file) + else: + pipeline_runs = typing.cast(typing.Iterator[typing.Dict], [json.load(pipeline_run_file)]) + + # It has to be inside context manager because YAML loader returns a lazy iterator + # which requires an open file while iterating. + for pipeline_run in pipeline_runs: + validate_pipeline_run(pipeline_run) + except Exception as error: + if getattr(arguments, 'continue', False): + traceback.print_exc(file=sys.stdout) + print(f"Error validating a pipeline run: {pipeline_run_path}") + has_errored = True + continue + else: + raise Exception(f"Error validating a pipeline run: {pipeline_run_path}") from error + + if has_errored: + sys.exit(1) + + +def pipeline_handler( + arguments: argparse.Namespace, *, resolver_class: typing.Type[pipeline_module.Resolver] = None, + no_resolver_class: typing.Type[pipeline_module.Resolver] = None, pipeline_class: typing.Type[pipeline_module.Pipeline] = None, +) -> None: + has_errored = False + + for pipeline_path in arguments.pipelines: + if getattr(arguments, 'list', False): + print(pipeline_path) + + try: + with utils.open(pipeline_path, 'r', encoding='utf8') as pipeline_file: + if pipeline_path.endswith('.yml') or pipeline_path.endswith('.yaml') or pipeline_path.endswith('.yml.gz') or pipeline_path.endswith('.yaml.gz'): + pipelines: typing.Iterator[typing.Dict] = utils.yaml_load_all(pipeline_file) + else: + pipelines = typing.cast(typing.Iterator[typing.Dict], [json.load(pipeline_file)]) + + for pipeline in pipelines: + validate_pipeline(pipeline) + except Exception as error: + if getattr(arguments, 'continue', False): + traceback.print_exc(file=sys.stdout) + print(f"Error validating a pipeline: {pipeline_path}") + has_errored = True + continue + else: + raise Exception(f"Error validating a pipeline: {pipeline_path}") from error + + if has_errored: + sys.exit(1) + + +def problem_handler(arguments: argparse.Namespace, *, problem_resolver: typing.Callable = None) -> None: + has_errored = False + + for problem_path in arguments.problems: + if getattr(arguments, 'list', False): + print(problem_path) + + try: + with utils.open(problem_path, 'r', encoding='utf8') as problem_file: + if problem_path.endswith('.yml') or problem_path.endswith('.yaml') or problem_path.endswith('.yml.gz') or problem_path.endswith('.yaml.gz'): + problems: typing.Iterator[typing.Dict] = utils.yaml_load_all(problem_file) + else: + problems = typing.cast(typing.Iterator[typing.Dict], [json.load(problem_file)]) + + for problem in problems: + validate_problem(problem) + except Exception as error: + if getattr(arguments, 'continue', False): + traceback.print_exc(file=sys.stdout) + print(f"Error validating a problem: {problem_path}") + has_errored = True + continue + else: + raise Exception(f"Error validating a problem: {problem_path}") from error + + if has_errored: + sys.exit(1) + + +def dataset_handler(arguments: argparse.Namespace, *, dataset_resolver: typing.Callable = None) -> None: + has_errored = False + + for dataset_path in arguments.datasets: + if getattr(arguments, 'list', False): + print(dataset_path) + + try: + with utils.open(dataset_path, 'r', encoding='utf8') as dataset_file: + if dataset_path.endswith('.yml') or dataset_path.endswith('.yaml'): + datasets: typing.Iterator[typing.Dict] = utils.yaml_load_all(dataset_file) + else: + datasets = typing.cast(typing.Iterator[typing.Dict], [json.load(dataset_file)]) + + for dataset in datasets: + validate_dataset(dataset) + except Exception as error: + if getattr(arguments, 'continue', False): + traceback.print_exc(file=sys.stdout) + print(f"Error validating a dataset: {dataset_path}") + has_errored = True + continue + else: + raise Exception(f"Error validating a dataset: {dataset_path}") from error + + if has_errored: + sys.exit(1) + + +def primitive_handler(arguments: argparse.Namespace) -> None: + has_errored = False + + for primitive_path in arguments.primitives: + if getattr(arguments, 'list', False): + print(primitive_path) + + try: + with utils.open(primitive_path, 'r', encoding='utf8') as primitive_file: + if primitive_path.endswith('.yml') or primitive_path.endswith('.yaml'): + primitives: typing.Iterator[typing.Dict] = utils.yaml_load_all(primitive_file) + else: + primitives = typing.cast(typing.Iterator[typing.Dict], [json.load(primitive_file)]) + + for primitive in primitives: + validate_primitive(primitive) + except Exception as error: + if getattr(arguments, 'continue', False): + traceback.print_exc(file=sys.stdout) + print(f"Error validating a primitive description: {primitive_path}") + has_errored = True + continue + else: + raise Exception(f"Error validating a primitive description: {primitive_path}") from error + + if has_errored: + sys.exit(1) + + +if pyarrow_lib is not None: + pyarrow_lib._default_serialization_context.register_type( + PipelineRun, 'd3m.pipeline_run', pickle=True, + ) diff --git a/d3m/d3m/metadata/primitive_names.py b/d3m/d3m/metadata/primitive_names.py new file mode 100644 index 0000000..5379a0f --- /dev/null +++ b/d3m/d3m/metadata/primitive_names.py @@ -0,0 +1,392 @@ +# Primitive Python paths (Python paths under which primitives registers themselves) have to adhere to namespace rules. +# Those rules describe that the Python path consists of multiple segments, one of them being "primitive name". Those +# names should be a general name to describe the logic of a primitive with the idea that multiple implementations +# of the same logic share the same name. This file contains a list of known and allowed primitive names. +# Names should be descriptive and something which can help people understand what the primitive is about. +# You can assume general understanding of data science concepts and names. +# +# Everyone is encouraged to help currate this list and suggest improvements (merging, removals, additions) +# of values in that list by submitting a merge request. We are not strict about names here, the main purpose of +# this list is to encourage collaboration and primitive name reuse when that is reasonable. Please check the list +# first when deciding on a Python path of your primitive and see if it can fit well under an existing name. +# +# On Linux, you can sort the list by running: +# +# grep "^ *'" d3m/metadata/primitive_names.py | env LC_COLLATE=C sort -u +# +# See: https://gitlab.com/datadrivendiscovery/d3m/issues/3 + +PRIMITIVE_NAMES = [ + 'categorical_to_binary', + 'discrete_cosine_transform', + 'fast_fourier_transform', + 'holt_smoothing', + 'holt_winters_exponential_smoothing', + 'mean_average_transform', + 'non_negative_matrix_factorization', + 'pyod_cof', + 'simple_exponential_smoothing', + 'time_stamp_validation', + 'time_series_moving_average', + 'time_series_seasonality_trend_decomposition', + 'variational_auto_encoder', + 'ada_boost', + 'adaptive_simultaneous_markov_blanket', + 'add', + 'add_semantic_types', + 'adjacency_spectral_embedding', + 'ape', + 'ard', + 'arima', + 'audio_featurization', + 'audio_reader', + 'audio_slicer', + 'audio_transfer', + 'average_pooling_1d', + 'average_pooling_2d', + 'average_pooling_3d', + 'bagging', + 'batch_normalization', # To be used with "layer" primitive family. + 'bayesian_logistic_regression', # To be used with "classification" primitive family. + 'bernoulli_naive_bayes', + 'bert_classifier', + 'binarizer', + 'binary_crossentropy', + 'binary_encoder', + 'cast_to_type', + 'categorical_accuracy', + 'categorical_crossentropy', + 'categorical_hinge', + 'categorical_imputer', + 'channel_averager', + 'clean_augmentation', + 'cleaning_featurizer', + 'cluster', + 'cluster_curve_fitting_kmeans', + 'column_fold', + 'column_map', + 'column_parser', + 'column_type_profiler', + 'compute_scores', + 'concat', + 'conditioner', + 'construct_predictions', + 'convolution_1d', + 'convolution_2d', + 'convolution_3d', + 'convolutional_neural_net', + 'corex_continuous', + 'corex_supervised', + 'corex_text', + 'cosine_proximity', + 'count_vectorizer', + 'cover_tree', + 'croc', + 'csv_reader', + 'cut_audio', + 'data_conversion', + 'dataframe_to_list', + 'dataframe_to_list_of_list', + 'dataframe_to_list_of_ndarray', + 'dataframe_to_ndarray', + 'dataframe_to_tensor', + 'datamart_augmentation', + 'datamart_download', + 'dataset_map', + 'dataset_text_reader', + 'dataset_to_dataframe', + 'dataset_sample', + 'datetime_field_compose', + 'datetime_range_filter', + 'decision_tree', + 'deep_feature_synthesis', + 'deep_markov_bernoulli_forecaster', + 'deep_markov_categorical_forecaster', + 'deep_markov_gaussian_forecaster', + 'denormalize', + 'dense', + 'diagonal_mvn', + 'dict_vectorizer', + 'dimension_selection', + 'discriminative_structured_classifier', + 'do_nothing', + 'do_nothing_for_dataset', + 'doc_2_vec', + 'dropout', + 'dummy', + 'echo_ib', + 'echo_linear', + 'edge_list_to_graph', + 'ekss', + 'elastic_net', + 'encoder', + 'enrich_dates', + 'ensemble_forest', + 'ensemble_voting', + 'extra_trees', + 'extract_columns', + 'extract_columns_by_semantic_types', + 'extract_columns_by_structural_types', + 'fast_ica', + 'fast_lad', + 'feature_agglomeration', + 'feed_forward_neural_net', + 'fixed_split_dataset_split', + 'flatten', + 'forward', + 'gaussian', # To be used with "classification" or "clustering" primitive family. + 'gaussian_naive_bayes', + 'gaussian_process', + 'gaussian_random_projection', + 'gcn_mixhop', + 'general_relational_dataset', + 'generative_structured_classifier', + 'generic_univariate_select', + 'geocoding', + 'glda', + 'global_average_pooling_1d', + 'global_average_pooling_2d', + 'global_average_pooling_3d', + 'global_causal_discovery', + 'global_structure_imputer', + 'gmm', + 'go_dec', + 'goturn', + 'gradient_boosting', + 'graph_node_splitter', + 'graph_to_edge_list', + 'graph_transformer', + 'grasta', + 'grasta_masked', + 'greedy_imputation', + 'grouping_field_compose', + 'grouse', + 'hdbscan', + 'hdp', + 'hinge', + 'horizontal_concat', + 'i3d', + 'i_vector_extractor', + 'ibex', + 'identity_parentchildren_markov_blanket', + 'image_reader', + 'image_transfer', + 'image_transfer_learning_transformer', + 'imputer', + 'inceptionV3_image_feature', + 'increment', + 'iqr_scaler', + 'iterative_labeling', + 'iterative_regression_imputation', + 'joint_mutual_information', + 'k_means', + 'k_neighbors', + 'kernel_pca', + 'kernel_ridge', + 'kfold_dataset_split', + 'kfold_time_series_split', + 'kss', + 'kullback_leibler_divergence', + 'l1_low_rank', + 'label_decoder', + 'label_encoder', + 'labler', + 'laplacian_spectral_embedding', + 'largest_connected_component', + 'lars', + 'lasso', + 'lasso_cv', + 'lda', + 'light_gbm', + 'linear', + 'linear_discriminant_analysis', + 'linear_svc', + 'linear_svr', + 'link_prediction', # To be used with "collaborative_filtering" or "graph_matching" primitive family. + 'list_to_dataframe', + 'list_to_ndarray', + 'load_edgelist', + 'load_graphs', + 'load_single_graph', + 'local_structure_imputer', + 'log_mel_spectrogram', + 'logcosh', + 'logistic_regression', # To be used with "classification" primitive family. + 'loss', + 'lstm', + 'lupi_svm', + 'max_abs_scaler', + 'max_pooling_1d', + 'max_pooling_2d', + 'max_pooling_3d', + 'mean_absolute_error', + 'mean_absolute_percentage_error', + 'mean_baseline', + 'mean_imputation', + 'mean_squared_error', + 'mean_squared_logarithmic_error', + 'merge_partial_predictions', + 'metafeature_extractor', + 'mice_imputation', + 'min_max_scaler', + 'missing_indicator', + 'mlp', + 'model', + 'monomial', + 'multinomial_naive_bayes', + 'multitable_featurization', + 'mutual_info', # To be used with "classification" or "regression" primitive family. + 'naive_bayes', + 'ndarray_to_dataframe', + 'ndarray_to_list', + 'nearest_centroid', + 'nk_sent2vec', + 'no_split_dataset_split', + 'non_parametric', # To be used with "clustering" primitive family. + 'normalize_column_references', + 'normalize_graphs', + 'normalizer', + 'null', + 'number_of_clusters', + 'numeric_range_filter', + 'nystroem', + 'one_hot_encoder', + 'ordinal_encoder', + 'out_of_sample_adjacency_spectral_embedding', + 'out_of_sample_laplacian_spectral_embedding', + 'output_dataframe', + 'owl', # To be used with "regression" primitive family. + 'parser', # To be used with "collaborative_filtering", "graph_matching", "vertex_nomination", or "community_detection" primitive family. + 'pass_to_ranks', + 'passive_aggressive', + 'pca', + 'pca_features', + 'pcp_ialm', + 'poisson', + 'polynomial_features', + 'primitive_sum', + 'profiler', + 'huber_pca', + 'low_rank_imputer', + 'high_rank_imputer', + 'quadratic_discriminant_analysis', + 'quantile_transformer', + 'ragged_dataset_reader', + 'random', + 'random_classifier', + 'random_forest', + 'random_projection_time_series_featurization', + 'random_sampling_imputer', + 'random_trees_embedding', + 'rank', # To be used with "classification" primitive family. + 'ravel', + 'rbf_sampler', + 'recommender_system', # To be used with "collaborative_filtering" primitive family. + 'redact_columns', + 'regex_filter', + 'relational_time_series', + 'remote_sensing_pretrained', + 'remove_columns', + 'remove_duplicate_columns', + 'remove_semantic_types', + 'rename_duplicate_name', + 'replace_semantic_types', + 'replace_singletons', + 'resnet50_image_feature', + 'resnext101_kinetics_video_features', + 'retina_net', + 'reverse', + 'rfd', + 'rfe', + 'rffeatures', + 'rfm_precondition_ed_gaussian_krr', + 'rfm_precondition_ed_polynomial_krr', + 'ridge', + 'rnn_time_series', + 'robust_scaler', + 'rpca_lbd', + 'score_based_markov_blanket', + 'sdne', + 'search', + 'search_hybrid', + 'search_hybrid_numeric', + 'search_numeric', + 'seeded', # To be used with "graph_matching" primitive family. + 'seeded_graph_matching', # To be used with "vertex_nomination" primitive family. + 'segment_curve_fitter', + 'select_fwe', + 'select_percentile', + 'sequence_to_bag_of_tokens', + 'sgd', + 'shapelet_learning', + 'signal_dither', + 'signal_framer', + 'signal_mfcc', + 'simon', + 'simple_imputer', + 'simultaneous_markov_blanket', + 'sparse_categorical_crossentropy', + 'sparse_pca', + 'sparse_random_projection', + 'spectral', # To be used with "vertex_nomination" primitive family. + 'spectral_graph', # To be used with "clustering" primitive family. + 'splitter', + 'squared_hinge', + 'ssc_admm', + 'ssc_cvx', + 'ssc_omp', + 'stack_ndarray_column', + 'stacking', # To be used with "operator" primitive family. + 'standard_scaler', + 'string_imputer', + 'structured', # To be used with "classification" primitive family. + 'subtract', + 'sum', + 'svc', + 'svr', + 't_distributed_stochastic_neighbor_embedding', + 'tabular_extractor', + 'targets_reader', + 'tensor_machines_binary', # To be used with "classification" primitive family. + 'tensor_machines_regularized_least_squares', + 'term_filter', + 'text_classifier', + 'text_encoder', + 'text_reader', + 'text_summarization', + 'text_to_vocabulary', + 'text_tokenizer', + 'tfidf_vectorizer', + 'time_series_forecasting', + 'time_series_formatter', + 'time_series_neighbours', + 'time_series_reshaper', + 'time_series_to_list', + 'to_numeric', + 'topic_vectorizer', + 'train_score_dataset_split', + 'trecs', + 'tree_augmented_naive_bayes', + 'trim_regressor', + 'truncated_svd', + 'unary_encoder', + 'unfold', + 'unicorn', + 'uniform_segmentation', + 'update_semantic_types', + 'variance_threshold', + 'vector_autoregression', + 'vertical_concatenate', + 'vgg16', + 'vgg16_image_feature', + 'video_reader', + 'voter', + 'voting', + 'wikifier', + 'word_2_vec', + 'word_embedding_builder', + 'xgboost_dart', + 'xgboost_gbtree', + 'yolo', + 'zero_count', +] diff --git a/d3m/d3m/metadata/problem.py b/d3m/d3m/metadata/problem.py new file mode 100644 index 0000000..520d5f3 --- /dev/null +++ b/d3m/d3m/metadata/problem.py @@ -0,0 +1,1039 @@ +import abc +import argparse +import copy +import functools +import json +import logging +import math +import os.path +import pprint +import sys +import traceback +import typing +from urllib import parse as url_parse + +from . import base +from d3m import deprecate, exceptions, utils + +# See: https://gitlab.com/datadrivendiscovery/d3m/issues/66 +try: + from pyarrow import lib as pyarrow_lib # type: ignore +except ModuleNotFoundError: + pyarrow_lib = None + +__all__ = ('TaskKeyword', 'PerformanceMetric', 'Problem') + +logger = logging.getLogger(__name__) + +# Comma because we unpack the list of validators returned from "load_schema_validators". +PROBLEM_SCHEMA_VALIDATOR, = utils.load_schema_validators(base.SCHEMAS, ('problem.json',)) + +PROBLEM_SCHEMA_VERSION = 'https://metadata.datadrivendiscovery.org/schemas/v0/problem.json' + + +def sigmoid(x: float) -> float: + """ + Numerically stable scaled logistic function. + + Maps all values ``x`` to [0, 1]. Values between -1000 and 1000 are + mapped reasonably far from 0 and 1, after which the function + converges to bounds. + + Parameters + ---------- + x: + Input. + + Returns + ------- + Output. + """ + + scale = 1 / 1000 + + if x >= 0: + ex = math.exp(scale * -x) + return 1 / (1 + ex) + else: + ex = math.exp(scale * x) + return ex / (1 + ex) + + +class TaskKeywordBase: + _d3m_map: typing.Dict[str, 'TaskKeywordBase'] = {} + + @classmethod + def get_map(cls) -> dict: + """ + Returns the map between D3M problem description JSON string and enum values. + + Returns + ------- + The map. + """ + + return cls._d3m_map + + @classmethod + def parse(cls, name: str) -> 'TaskKeywordBase': + """ + Converts D3M problem description JSON string into enum value. + + Parameters + ---------- + name: + D3M problem description JSON string. + + Returns + ------- + Enum value. + """ + + return cls.get_map()[name] + + def unparse(self) -> str: + """ + Converts enum value to D3M problem description JSON string. + + Returns + ------- + D3M problem description JSON string. + """ + + for key, value in self.get_map().items(): + if self == value: + return key + + raise exceptions.InvalidStateError("Cannot convert {self}.".format(self=self)) + + def __ge__(self, other: typing.Any) -> bool: + if self.__class__ is other.__class__: + return list(self.__class__.__members__.keys()).index(self.value) >= list(other.__class__.__members__.keys()).index(other.value) # type: ignore + return NotImplemented + + def __gt__(self, other: typing.Any) -> bool: + if self.__class__ is other.__class__: + return list(self.__class__.__members__.keys()).index(self.value) > list(other.__class__.__members__.keys()).index(other.value) # type: ignore + return NotImplemented + + def __le__(self, other: typing.Any) -> bool: + if self.__class__ is other.__class__: + return list(self.__class__.__members__.keys()).index(self.value) <= list(other.__class__.__members__.keys()).index(other.value) # type: ignore + return NotImplemented + + def __lt__(self, other: typing.Any) -> bool: + if self.__class__ is other.__class__: + return list(self.__class__.__members__.keys()).index(self.value) < list(other.__class__.__members__.keys()).index(other.value) # type: ignore + return NotImplemented + + +TaskKeyword = utils.create_enum_from_json_schema_enum( + 'TaskKeyword', base.DEFINITIONS_JSON, + 'definitions.problem.properties.task_keywords.items.oneOf[*].enum[*]', + module=__name__, base_class=TaskKeywordBase, +) + +TaskKeyword._d3m_map.update({ + 'classification': TaskKeyword.CLASSIFICATION, # type: ignore + 'regression': TaskKeyword.REGRESSION, # type: ignore + 'clustering': TaskKeyword.CLUSTERING, # type: ignore + 'linkPrediction': TaskKeyword.LINK_PREDICTION, # type: ignore + 'vertexNomination': TaskKeyword.VERTEX_NOMINATION, # type: ignore + 'vertexClassification': TaskKeyword.VERTEX_CLASSIFICATION, # type: ignore + 'communityDetection': TaskKeyword.COMMUNITY_DETECTION, # type: ignore + 'graphMatching': TaskKeyword.GRAPH_MATCHING, # type: ignore + 'forecasting': TaskKeyword.FORECASTING, # type: ignore + 'collaborativeFiltering': TaskKeyword.COLLABORATIVE_FILTERING, # type: ignore + 'objectDetection': TaskKeyword.OBJECT_DETECTION, # type: ignore + 'semiSupervised': TaskKeyword.SEMISUPERVISED, # type: ignore + 'binary': TaskKeyword.BINARY, # type: ignore + 'multiClass': TaskKeyword.MULTICLASS, # type: ignore + 'multiLabel': TaskKeyword.MULTILABEL, # type: ignore + 'univariate': TaskKeyword.UNIVARIATE, # type: ignore + 'multivariate': TaskKeyword.MULTIVARIATE, # type: ignore + 'overlapping': TaskKeyword.OVERLAPPING, # type: ignore + 'nonOverlapping': TaskKeyword.NONOVERLAPPING, # type: ignore + 'tabular': TaskKeyword.TABULAR, # type: ignore + 'relational': TaskKeyword.RELATIONAL, # type: ignore + 'nested': TaskKeyword.NESTED, # type: ignore + 'image': TaskKeyword.IMAGE, # type: ignore + 'audio': TaskKeyword.AUDIO, # type: ignore + 'video': TaskKeyword.VIDEO, # type: ignore + 'speech': TaskKeyword.SPEECH, # type: ignore + 'text': TaskKeyword.TEXT, # type: ignore + 'graph': TaskKeyword.GRAPH, # type: ignore + 'multiGraph': TaskKeyword.MULTIGRAPH, # type: ignore + 'timeSeries': TaskKeyword.TIME_SERIES, # type: ignore + 'grouped': TaskKeyword.GROUPED, # type: ignore + 'geospatial': TaskKeyword.GEOSPATIAL, # type: ignore + 'remoteSensing': TaskKeyword.REMOTE_SENSING, # type: ignore + 'lupi': TaskKeyword.LUPI, # type: ignore + 'missingMetadata': TaskKeyword.MISSING_METADATA, # type: ignore +}) + + +class PerformanceMetricBase: + _d3m_map: typing.ClassVar[typing.Dict[str, 'PerformanceMetricBase']] = {} + _requires_confidence_set: typing.ClassVar[typing.Set['PerformanceMetricBase']] = set() + _requires_rank_set: typing.ClassVar[typing.Set['PerformanceMetricBase']] = set() + _best_value_map: typing.ClassVar[typing.Dict['PerformanceMetricBase', float]] = {} + _worst_value_map: typing.ClassVar[typing.Dict['PerformanceMetricBase', float]] = {} + _additional_score_class_map: typing.ClassVar[typing.Dict['PerformanceMetricBase', type]] = {} + + @classmethod + def get_map(cls) -> dict: + """ + Returns the map between D3M problem description JSON string and enum values. + + Returns + ------- + The map. + """ + + return cls._d3m_map + + @classmethod + def parse(cls, name: str) -> 'PerformanceMetricBase': + """ + Converts D3M problem description JSON string into enum value. + + Parameters + ---------- + name: + D3M problem description JSON string. + + Returns + ------- + Enum value. + """ + + return cls.get_map()[name] + + def unparse(self) -> str: + """ + Converts enum value to D3M problem description JSON string. + + Returns + ------- + D3M problem description JSON string. + """ + + for key, value in self.get_map().items(): + if self == value: + return key + + raise exceptions.InvalidStateError("Cannot convert {self}.".format(self=self)) + + def requires_confidence(self) -> bool: + """ + Returns ``True`` if this metric requires confidence column. + + Returns + ------- + ``True`` if this metric requires confidence column. + """ + + return self in self._requires_confidence_set + + def requires_rank(self) -> bool: + """ + Returns ``True`` if this metric requires rank column. + + Returns + ------- + ``True`` if this metric requires rank column. + """ + + return self in self._requires_rank_set + + def best_value(self) -> float: + """ + The best possible value of the metric. + + Returns + ------- + The best possible value of the metric. + """ + + return self._best_value_map[self] + + def worst_value(self) -> float: + """ + The worst possible value of the metric. + + Returns + ------- + The worst possible value of the metric. + """ + + return self._worst_value_map[self] + + def normalize(self, value: float) -> float: + """ + Normalize the ``value`` for this metric so that it is between 0 and 1, + inclusive, where 1 is the best score and 0 is the worst. + + Parameters + ---------- + value: + Value of this metric to normalize. + + Returns + ------- + A normalized metric. + """ + + worst_value = self.worst_value() + best_value = self.best_value() + + return self._normalize(worst_value, best_value, value) + + @classmethod + def _normalize(cls, worst_value: float, best_value: float, value: float) -> float: + assert worst_value <= value <= best_value or worst_value >= value >= best_value, (worst_value, value, best_value) + + if math.isinf(best_value) and math.isinf(worst_value): + value = sigmoid(value) + if best_value > worst_value: # "best_value" == inf, "worst_value" == -inf + best_value = 1.0 + worst_value = 0.0 + else: # "best_value" == -inf, "worst_value" == inf + best_value = 0.0 + worst_value = 1.0 + elif math.isinf(best_value): + value = sigmoid(value - worst_value) + if best_value > worst_value: # "best_value" == inf + best_value = 1.0 + worst_value = 0.5 + else: # "best_value" == -inf + best_value = 0.0 + worst_value = 0.5 + elif math.isinf(worst_value): + value = sigmoid(best_value - value) + if best_value > worst_value: # "worst_value" == -inf + best_value = 0.5 + worst_value = 1.0 + else: # "worst_value" == inf + best_value = 0.5 + worst_value = 0.0 + + return (value - worst_value) / (best_value - worst_value) + + def get_class(self) -> typing.Any: + """ + Returns a class suitable for computing this metric. + """ + + # Importing here to prevent import cycle. + from d3m import metrics + + if self in metrics.class_map: + return metrics.class_map[self] # type: ignore + + if self in self._additional_score_class_map: + return self._additional_score_class_map[self] # type: ignore + + raise exceptions.NotSupportedError("Computing metric {metric} is not supported.".format(metric=self)) + + @classmethod + def register_metric(cls, name: str, *, best_value: float, worst_value: float, score_class: type, requires_confidence: bool = False, requires_rank: bool = False) -> None: + cls.register_value(name, name) # type: ignore + cls._best_value_map[cls[name]] = best_value # type: ignore + cls._worst_value_map[cls[name]] = worst_value # type: ignore + cls._additional_score_class_map[cls[name]] = score_class # type: ignore + + if requires_confidence: + PerformanceMetric._requires_confidence_set.add(cls[name]) # type: ignore + + if requires_rank: + PerformanceMetric._requires_rank_set.add(cls[name]) # type: ignore + + +PerformanceMetric = utils.create_enum_from_json_schema_enum( + 'PerformanceMetric', base.DEFINITIONS_JSON, + 'definitions.performance_metric.oneOf[*].properties.metric.enum[*]', + module=__name__, base_class=PerformanceMetricBase, +) + +PerformanceMetric._d3m_map.update({ + 'accuracy': PerformanceMetric.ACCURACY, # type: ignore + 'precision': PerformanceMetric.PRECISION, # type: ignore + 'recall': PerformanceMetric.RECALL, # type: ignore + 'f1': PerformanceMetric.F1, # type: ignore + 'f1Micro': PerformanceMetric.F1_MICRO, # type: ignore + 'f1Macro': PerformanceMetric.F1_MACRO, # type: ignore + 'rocAuc': PerformanceMetric.ROC_AUC, # type: ignore + 'rocAucMicro': PerformanceMetric.ROC_AUC_MICRO, # type: ignore + 'rocAucMacro': PerformanceMetric.ROC_AUC_MACRO, # type: ignore + 'meanSquaredError': PerformanceMetric.MEAN_SQUARED_ERROR, # type: ignore + 'rootMeanSquaredError': PerformanceMetric.ROOT_MEAN_SQUARED_ERROR, # type: ignore + 'meanAbsoluteError': PerformanceMetric.MEAN_ABSOLUTE_ERROR, # type: ignore + 'rSquared': PerformanceMetric.R_SQUARED, # type: ignore + 'normalizedMutualInformation': PerformanceMetric.NORMALIZED_MUTUAL_INFORMATION, # type: ignore + 'jaccardSimilarityScore': PerformanceMetric.JACCARD_SIMILARITY_SCORE, # type: ignore + 'precisionAtTopK': PerformanceMetric.PRECISION_AT_TOP_K, # type: ignore + 'objectDetectionAP': PerformanceMetric.OBJECT_DETECTION_AVERAGE_PRECISION, # type: ignore + 'hammingLoss': PerformanceMetric.HAMMING_LOSS, # type: ignore + 'meanReciprocalRank': PerformanceMetric.MEAN_RECIPROCAL_RANK, # type: ignore + 'hitsAtK': PerformanceMetric.HITS_AT_K, # type: ignore +}) +PerformanceMetric._requires_confidence_set.update({ + PerformanceMetric.ROC_AUC, + PerformanceMetric.ROC_AUC_MICRO, + PerformanceMetric.ROC_AUC_MACRO, + PerformanceMetric.OBJECT_DETECTION_AVERAGE_PRECISION, +}) +PerformanceMetric._requires_rank_set.update({ + PerformanceMetric.MEAN_RECIPROCAL_RANK, + PerformanceMetric.HITS_AT_K, +}) +PerformanceMetric._best_value_map.update({ + PerformanceMetric.ACCURACY: 1.0, # type: ignore + PerformanceMetric.PRECISION: 1.0, # type: ignore + PerformanceMetric.RECALL: 1.0, # type: ignore + PerformanceMetric.F1: 1.0, # type: ignore + PerformanceMetric.F1_MICRO: 1.0, # type: ignore + PerformanceMetric.F1_MACRO: 1.0, # type: ignore + PerformanceMetric.ROC_AUC: 1.0, # type: ignore + PerformanceMetric.ROC_AUC_MICRO: 1.0, # type: ignore + PerformanceMetric.ROC_AUC_MACRO: 1.0, # type: ignore + PerformanceMetric.MEAN_SQUARED_ERROR: 0.0, # type: ignore + PerformanceMetric.ROOT_MEAN_SQUARED_ERROR: 0.0, # type: ignore + PerformanceMetric.MEAN_ABSOLUTE_ERROR: 0.0, # type: ignore + PerformanceMetric.R_SQUARED: 1.0, # type: ignore + PerformanceMetric.NORMALIZED_MUTUAL_INFORMATION: 1.0, # type: ignore + PerformanceMetric.JACCARD_SIMILARITY_SCORE: 1.0, # type: ignore + PerformanceMetric.PRECISION_AT_TOP_K: 1.0, # type: ignore + PerformanceMetric.OBJECT_DETECTION_AVERAGE_PRECISION: 1.0, # type: ignore + PerformanceMetric.HAMMING_LOSS: 0.0, # type: ignore + PerformanceMetric.MEAN_RECIPROCAL_RANK: 1.0, # type: ignore + PerformanceMetric.HITS_AT_K: 1.0, # type: ignore +}) +PerformanceMetric._worst_value_map.update({ + PerformanceMetric.ACCURACY: 0.0, # type: ignore + PerformanceMetric.PRECISION: 0.0, # type: ignore + PerformanceMetric.RECALL: 0.0, # type: ignore + PerformanceMetric.F1: 0.0, # type: ignore + PerformanceMetric.F1_MICRO: 0.0, # type: ignore + PerformanceMetric.F1_MACRO: 0.0, # type: ignore + PerformanceMetric.ROC_AUC: 0.0, # type: ignore + PerformanceMetric.ROC_AUC_MICRO: 0.0, # type: ignore + PerformanceMetric.ROC_AUC_MACRO: 0.0, # type: ignore + PerformanceMetric.MEAN_SQUARED_ERROR: float('inf'), # type: ignore + PerformanceMetric.ROOT_MEAN_SQUARED_ERROR: float('inf'), # type: ignore + PerformanceMetric.MEAN_ABSOLUTE_ERROR: float('inf'), # type: ignore + PerformanceMetric.R_SQUARED: float('-inf'), # type: ignore + PerformanceMetric.NORMALIZED_MUTUAL_INFORMATION: 0.0, # type: ignore + PerformanceMetric.JACCARD_SIMILARITY_SCORE: 0.0, # type: ignore + PerformanceMetric.PRECISION_AT_TOP_K: 0.0, # type: ignore + PerformanceMetric.OBJECT_DETECTION_AVERAGE_PRECISION: 0.0, # type: ignore + PerformanceMetric.HAMMING_LOSS: 1.0, # type: ignore + PerformanceMetric.MEAN_RECIPROCAL_RANK: 0.0, # type: ignore + PerformanceMetric.HITS_AT_K: 0.0, # type: ignore +}) + +# Here are all legacy (before v4.0.0) task types and task subtypes mapped to task keywords. +TASK_TYPE_TO_KEYWORDS_MAP: typing.Dict[typing.Optional[str], typing.List] = { + None: [], + 'classification': [TaskKeyword.CLASSIFICATION], # type: ignore + 'regression': [TaskKeyword.REGRESSION], # type: ignore + 'clustering': [TaskKeyword.CLUSTERING], # type: ignore + 'linkPrediction': [TaskKeyword.LINK_PREDICTION], # type: ignore + 'vertexClassification': [TaskKeyword.VERTEX_CLASSIFICATION], # type: ignore + 'vertexNomination': [TaskKeyword.VERTEX_NOMINATION], # type: ignore + 'communityDetection': [TaskKeyword.COMMUNITY_DETECTION], # type: ignore + 'graphMatching': [TaskKeyword.GRAPH_MATCHING], # type: ignore + 'timeSeriesForecasting': [TaskKeyword.TIME_SERIES, TaskKeyword.FORECASTING], # type: ignore + 'collaborativeFiltering': [TaskKeyword.COLLABORATIVE_FILTERING], # type: ignore + 'objectDetection': [TaskKeyword.OBJECT_DETECTION], # type: ignore + 'semiSupervisedClassification': [TaskKeyword.SEMISUPERVISED, TaskKeyword.CLASSIFICATION], # type: ignore + 'semiSupervisedRegression': [TaskKeyword.SEMISUPERVISED, TaskKeyword.REGRESSION], # type: ignore + 'binary': [TaskKeyword.BINARY], # type: ignore + 'multiClass': [TaskKeyword.MULTICLASS], # type: ignore + 'multiLabel': [TaskKeyword.MULTILABEL], # type: ignore + 'univariate': [TaskKeyword.UNIVARIATE], # type: ignore + 'multivariate': [TaskKeyword.MULTIVARIATE], # type: ignore + 'overlapping': [TaskKeyword.OVERLAPPING], # type: ignore + 'nonOverlapping': [TaskKeyword.NONOVERLAPPING], # type: ignore +} +JSON_TASK_TYPE_TO_KEYWORDS_MAP: typing.Dict[typing.Optional[str], typing.List] = { + None: [], + 'CLASSIFICATION': [TaskKeyword.CLASSIFICATION], # type: ignore + 'REGRESSION': [TaskKeyword.REGRESSION], # type: ignore + 'CLUSTERING': [TaskKeyword.CLUSTERING], # type: ignore + 'LINK_PREDICTION': [TaskKeyword.LINK_PREDICTION], # type: ignore + 'VERTEX_CLASSIFICATION': [TaskKeyword.VERTEX_CLASSIFICATION], # type: ignore + 'VERTEX_NOMINATION': [TaskKeyword.VERTEX_NOMINATION], # type: ignore + 'COMMUNITY_DETECTION': [TaskKeyword.COMMUNITY_DETECTION], # type: ignore + 'GRAPH_MATCHING': [TaskKeyword.GRAPH_MATCHING], # type: ignore + 'TIME_SERIES_FORECASTING': [TaskKeyword.TIME_SERIES, TaskKeyword.FORECASTING], # type: ignore + 'COLLABORATIVE_FILTERING': [TaskKeyword.COLLABORATIVE_FILTERING], # type: ignore + 'OBJECT_DETECTION': [TaskKeyword.OBJECT_DETECTION], # type: ignore + 'SEMISUPERVISED_CLASSIFICATION': [TaskKeyword.SEMISUPERVISED, TaskKeyword.CLASSIFICATION], # type: ignore + 'SEMISUPERVISED_REGRESSION': [TaskKeyword.SEMISUPERVISED, TaskKeyword.REGRESSION], # type: ignore + 'BINARY': [TaskKeyword.BINARY], # type: ignore + 'MULTICLASS': [TaskKeyword.MULTICLASS], # type: ignore + 'MULTILABEL': [TaskKeyword.MULTILABEL], # type: ignore + 'UNIVARIATE': [TaskKeyword.UNIVARIATE], # type: ignore + 'MULTIVARIATE': [TaskKeyword.MULTIVARIATE], # type: ignore + 'OVERLAPPING': [TaskKeyword.OVERLAPPING], # type: ignore + 'NONOVERLAPPING': [TaskKeyword.NONOVERLAPPING], # type: ignore +} + + +class Loader(metaclass=utils.AbstractMetaclass): + """ + A base class for problem loaders. + """ + + @abc.abstractmethod + def can_load(self, problem_uri: str) -> bool: + """ + Return ``True`` if this loader can load a problem from a given URI ``problem_uri``. + + Parameters + ---------- + problem_uri: + A URI to load a problem from. + + Returns + ------- + ``True`` if this loader can load a problem from ``problem_uri``. + """ + + @abc.abstractmethod + def load(self, problem_uri: str, *, problem_id: str = None, problem_version: str = None, + problem_name: str = None, strict_digest: bool = False, handle_score_split: bool = True) -> 'Problem': + """ + Loads the problem at ``problem_uri``. + + Parameters + ---------- + problem_uri: + A URI to load. + problem_id: + Override problem ID determined by the loader. + problem_version: + Override problem version determined by the loader. + problem_name: + Override problem name determined by the loader. + strict_digest: + If computed digest does not match the one provided in metadata, raise an exception? + handle_score_split: + Rename a scoring problem to not have the same name as testing problem + and update dataset references. + + Returns + ------- + A loaded problem. + """ + + @classmethod + def get_problem_class(cls) -> 'typing.Type[Problem]': + return Problem + + +class D3MProblemLoader(Loader): + """ + A class for loading of D3M problems. + + Loader support only loading from a local file system. + URI should point to the ``problemDoc.json`` file in the D3M problem directory. + """ + + SUPPORTED_VERSIONS = {'3.0', '3.1', '3.1.1', '3.1.2', '3.2.0', '3.2.1', '3.3.0', '3.3.1', '4.0.0', '4.1.0'} + + def can_load(self, dataset_uri: str) -> bool: + try: + parsed_uri = url_parse.urlparse(dataset_uri, allow_fragments=False) + except Exception: + return False + + if parsed_uri.scheme != 'file': + return False + + if parsed_uri.netloc not in ['', 'localhost']: + return False + + if not parsed_uri.path.startswith('/'): + return False + + if os.path.basename(parsed_uri.path) != 'problemDoc.json': + return False + + return True + + # "strict_digest" is not used because there is no digest in D3M problem descriptions. + def load(self, problem_uri: str, *, problem_id: str = None, problem_version: str = None, + problem_name: str = None, strict_digest: bool = False, handle_score_split: bool = True) -> 'Problem': + assert self.can_load(problem_uri) + + parsed_uri = url_parse.urlparse(problem_uri, allow_fragments=False) + + problem_doc_path = parsed_uri.path + + try: + with open(problem_doc_path, 'r', encoding='utf8') as problem_doc_file: + problem_doc = json.load(problem_doc_file) + except FileNotFoundError as error: + raise exceptions.ProblemNotFoundError( + "D3M problem '{problem_uri}' cannot be found.".format(problem_uri=problem_uri), + ) from error + + problem_schema_version = problem_doc.get('about', {}).get('problemSchemaVersion', '3.3.0') + if problem_schema_version not in self.SUPPORTED_VERSIONS: + logger.warning("Loading a problem with unsupported schema version '%(version)s'. Supported versions: %(supported_versions)s", { + 'version': problem_schema_version, + 'supported_versions': self.SUPPORTED_VERSIONS, + }) + + # To be compatible with problem descriptions which do not adhere to the schema and have only one entry for data. + if not isinstance(problem_doc['inputs']['data'], list): + problem_doc['inputs']['data'] = [problem_doc['inputs']['data']] + + performance_metrics = [] + for performance_metric in problem_doc['inputs']['performanceMetrics']: + params = {} + + if 'posLabel' in performance_metric: + params['pos_label'] = performance_metric['posLabel'] + + if 'K' in performance_metric: + params['k'] = performance_metric['K'] + + performance_metrics.append({ + 'metric': PerformanceMetric.parse(performance_metric['metric']), + }) + + if params: + performance_metrics[-1]['params'] = params + + inputs = [] + for data in problem_doc['inputs']['data']: + targets = [] + for target in data['targets']: + targets.append({ + 'target_index': target['targetIndex'], + 'resource_id': target['resID'], + 'column_index': target['colIndex'], + 'column_name': target['colName'], + }) + + if 'numClusters' in target: + targets[-1]['clusters_number'] = target['numClusters'] + + privileged_data_columns = [] + for privileged_data in data.get('privilegedData', []): + privileged_data_columns.append({ + 'privileged_data_index': privileged_data['privilegedDataIndex'], + 'resource_id': privileged_data['resID'], + 'column_index': privileged_data['colIndex'], + 'column_name': privileged_data['colName'], + }) + + problem_input = { + 'dataset_id': data['datasetID'], + } + + if targets: + problem_input['targets'] = targets + + if privileged_data_columns: + problem_input['privileged_data'] = privileged_data_columns + + if data.get('forecastingHorizon', {}).get('horizonValue', None): + problem_input['forecasting_horizon'] = { + 'resource_id': data['forecastingHorizon']['resID'], + 'column_index': data['forecastingHorizon']['colIndex'], + 'column_name': data['forecastingHorizon']['colName'], + 'horizon_value': data['forecastingHorizon']['horizonValue'], + } + + inputs.append(problem_input) + + document_problem_id = problem_doc['about']['problemID'] + # Handle a special case for SCORE dataset splits (those which have "targets.csv" file). + # They are the same as TEST dataset splits, but we present them differently, so that + # SCORE dataset splits have targets as part of data. Because of this we also update + # corresponding problem ID. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/176 + if handle_score_split and os.path.exists(os.path.join(os.path.dirname(problem_doc_path), '..', 'targets.csv')) and document_problem_id.endswith('_TEST'): + document_problem_id = document_problem_id[:-5] + '_SCORE' + + # Also update dataset references. + for data in problem_doc.get('inputs', {}).get('data', []): + if data['datasetID'].endswith('_TEST'): + data['datasetID'] = data['datasetID'][:-5] + '_SCORE' + + # "dataSplits" is not exposed as a problem description. One should provide splitting + # configuration to a splitting pipeline instead. Similarly, "outputs" are not exposed either. + description = { + 'schema': PROBLEM_SCHEMA_VERSION, + 'id': problem_id or document_problem_id, + 'version': problem_version or problem_doc['about'].get('problemVersion', '1.0'), + 'name': problem_name or problem_doc['about']['problemName'], + 'location_uris': [ + # We reconstruct the URI to normalize it. + utils.fix_uri(problem_doc_path), + ], + 'problem': {}, + } + + task_keywords: typing.List = [] + + # Legacy (before v4.0.0). + task_keywords += TASK_TYPE_TO_KEYWORDS_MAP[problem_doc['about'].get('taskType', None)] + task_keywords += TASK_TYPE_TO_KEYWORDS_MAP[problem_doc['about'].get('taskSubType', None)] + + if problem_doc['about'].get('taskKeywords', []): + for task_keyword in problem_doc['about']['taskKeywords']: + task_keywords.append(TaskKeyword.parse(task_keyword)) + + if task_keywords: + description['problem']['task_keywords'] = sorted(set(task_keywords)) # type: ignore + + if performance_metrics: + description['problem']['performance_metrics'] = performance_metrics # type: ignore + + if problem_doc['about'].get('problemDescription', None): + description['description'] = problem_doc['about']['problemDescription'] # type: ignore + + if problem_doc['about'].get('problemURI', None): + typing.cast(typing.List[str], description['location_uris']).append(problem_doc['about']['problemURI']) + + if inputs: + description['inputs'] = inputs # type: ignore + + if 'dataAugmentation' in problem_doc: + description['data_augmentation'] = problem_doc['dataAugmentation'] + + # We do not want empty objects. + if not description['problem']: + del description['problem'] + + problem_class = self.get_problem_class() + + return problem_class(description) + + +P = typing.TypeVar('P', bound='Problem') + + +# TODO: It should be probably immutable. +class Problem(dict): + """ + A class representing a problem. + """ + + def __init__(self, problem_description: typing.Dict = None, *, strict_digest: bool = False) -> None: + super().__init__(problem_description) + + PROBLEM_SCHEMA_VALIDATOR.validate(self) + + if 'digest' in self: + digest = self.get_digest() + + if digest != self['digest']: + if strict_digest: + raise exceptions.DigestMismatchError( + "Digest for problem description '{problem_id}' does not match a computed one. Provided digest: {problem_digest}. Computed digest: {new_problem_digest}.".format( + problem_id=self['id'], + problem_digest=self['digest'], + new_problem_digest=digest, + ) + ) + else: + logger.warning( + "Digest for problem description '%(problem_id)s' does not match a computed one. Provided digest: %(problem_digest)s. Computed digest: %(new_problem_digest)s.", + { + 'problem_id': self['id'], + 'problem_digest': self['digest'], + 'new_problem_digest': digest, + }, + ) + + # We do not want it to be stored in the object because it can become + # obsolete. Use "get_digest" to get the current digest. + del self['digest'] + + loaders: typing.List[Loader] = [ + D3MProblemLoader(), + ] + + @classmethod + def load(cls, problem_uri: str, *, problem_id: str = None, problem_version: str = None, + problem_name: str = None, strict_digest: bool = False, handle_score_split: bool = True) -> 'Problem': + """ + Tries to load problem from ``problem_uri`` using all registered problem loaders. + + Parameters + ---------- + problem_uri: + A URI to load. + problem_id: + Override problem ID determined by the loader. + problem_version: + Override problem version determined by the loader. + problem_name: + Override problem name determined by the loader. + strict_digest: + If computed digest does not match the one provided in metadata, raise an exception? + handle_score_split: + Rename a scoring problem to not have the same name as testing problem + and update dataset references. + + Returns + ------- + A loaded problem. + """ + + for loader in cls.loaders: + if loader.can_load(problem_uri): + return loader.load( + problem_uri, problem_id=problem_id, problem_version=problem_version, + problem_name=problem_name, strict_digest=strict_digest, + handle_score_split=handle_score_split, + ) + + raise exceptions.ProblemUriNotSupportedError( + "No known loader could load problem from '{problem_uri}'.".format(problem_uri=problem_uri) + ) + + # TODO: Allow one to specify priority which would then insert loader at a different place and not at the end? + @classmethod + def register_loader(cls, loader: Loader) -> None: + """ + Registers a new problem loader. + + Parameters + ---------- + loader: + An instance of the loader class implementing a new loader. + """ + + cls.loaders.append(loader) + + def __repr__(self) -> str: + return self.__str__() + + def _get_description_keys(self) -> typing.Sequence[str]: + return 'id', 'name', 'location_uris' + + def __str__(self) -> str: + return '{class_name}({description})'.format( + class_name=type(self).__name__, + description=', '.join('{key}=\'{value}\''.format(key=key, value=self[key]) for key in self._get_description_keys() if key in self), + ) + + def copy(self: P) -> P: + return copy.deepcopy(self) + + @classmethod + def _canonical_problem_description(cls: typing.Type[P], problem_description: typing.Dict) -> P: + """ + Before we compute digest of the problem description, we have to convert it to a + canonical structure. + + Currently, this is just removing any local URIs the description might have. + """ + + # Making a copy. + problem_description = dict(problem_description) + + utils.filter_local_location_uris(problem_description) + + if 'digest' in problem_description: + del problem_description['digest'] + + return cls(problem_description) + + def get_digest(self) -> str: + # We use "to_json_structure" here and not "to_reversible_json_structure" + # because pickled values might not be deterministic. + return utils.compute_digest(utils.to_json_structure(self._to_simple_structure(canonical=True))) + + def _to_simple_structure(self, *, canonical: bool = False) -> typing.Dict: + problem_description = self + + if canonical: + problem_description = self._canonical_problem_description(self) + + return dict(problem_description) + + def to_simple_structure(self, *, canonical: bool = False) -> typing.Dict: + problem_description = self._to_simple_structure(canonical=canonical) + + problem_description['digest'] = self.get_digest() + + return problem_description + + @classmethod + def from_simple_structure(cls: typing.Type[P], structure: typing.Dict, *, strict_digest: bool = False) -> P: + return cls(structure, strict_digest=strict_digest) + + def to_json_structure(self, *, canonical: bool = False) -> typing.Dict: + """ + For standard enumerations we map them to strings. Non-standard problem + description fields we convert in a reversible manner. + """ + + PROBLEM_SCHEMA_VALIDATOR.validate(self) + + simple_structure = copy.deepcopy(self.to_simple_structure(canonical=canonical)) + + if simple_structure.get('problem', {}).get('task_keywords', []): + simple_structure['problem']['task_keywords'] = [task_keyword.name for task_keyword in simple_structure['problem']['task_keywords']] + if simple_structure.get('problem', {}).get('performance_metrics', []): + for metric in simple_structure['problem']['performance_metrics']: + metric['metric'] = metric['metric'].name + + return utils.to_reversible_json_structure(simple_structure) + + @classmethod + def from_json_structure(cls: typing.Type[P], structure: typing.Dict, *, strict_digest: bool = False) -> P: + """ + For standard enumerations we map them from strings. For non-standard problem + description fields we used a reversible conversion. + """ + + simple_structure = utils.from_reversible_json_structure(structure) + + # Legacy (before v4.0.0). + legacy_task_keywords: typing.List[TaskKeyword] = [] # type: ignore + legacy_task_keywords += JSON_TASK_TYPE_TO_KEYWORDS_MAP[simple_structure.get('problem', {}).get('task_type', None)] + legacy_task_keywords += JSON_TASK_TYPE_TO_KEYWORDS_MAP[simple_structure.get('problem', {}).get('task_subtype', None)] + + if legacy_task_keywords: + # We know "problem" field exists. + simple_structure['problem']['task_keywords'] = simple_structure['problem'].get('task_keywords', []) + legacy_task_keywords + + if simple_structure.get('problem', {}).get('task_keywords', []): + mapped_task_keywords = [] + for task_keyword in simple_structure['problem']['task_keywords']: + if isinstance(task_keyword, str): + mapped_task_keywords.append(TaskKeyword[task_keyword]) + else: + mapped_task_keywords.append(task_keyword) + simple_structure['problem']['task_keywords'] = mapped_task_keywords + if simple_structure.get('problem', {}).get('performance_metrics', []): + for metric in simple_structure['problem']['performance_metrics']: + if isinstance(metric['metric'], str): + metric['metric'] = PerformanceMetric[metric['metric']] + + return cls.from_simple_structure(simple_structure, strict_digest=strict_digest) + + +@deprecate.function(message="use Problem.load class method instead") +def parse_problem_description(problem_doc_path: str) -> Problem: + """ + Parses problem description according to ``problem.json`` metadata schema. + + It converts constants to enumerations when suitable. + + Parameters + ---------- + problem_doc_path: + File path to the problem description (``problemDoc.json``). + + Returns + ------- + A parsed problem. + """ + + return Problem.load(problem_uri=utils.fix_uri(problem_doc_path)) + + +def problem_serializer(obj: Problem) -> dict: + data: typing.Dict = { + 'problem': dict(obj), + } + + if type(obj) is not Problem: + data['type'] = type(obj) + + return data + + +def problem_deserializer(data: dict) -> Problem: + problem = data.get('type', Problem)(data['problem']) + return problem + + +if pyarrow_lib is not None: + pyarrow_lib._default_serialization_context.register_type( + Problem, 'd3m.problem', + custom_serializer=problem_serializer, + custom_deserializer=problem_deserializer, + ) + + +def get_problem(problem_uri: str, *, strict_digest: bool = False, datasets_dir: str = None, handle_score_split: bool = True) -> Problem: + if datasets_dir is not None: + datasets, problem_descriptions = utils.get_datasets_and_problems(datasets_dir, handle_score_split) + + if problem_uri in problem_descriptions: + problem_uri = problem_descriptions[problem_uri] + + problem_uri = utils.fix_uri(problem_uri) + + return Problem.load(problem_uri, strict_digest=strict_digest) + + +def describe_handler( + arguments: argparse.Namespace, *, problem_resolver: typing.Callable = None, +) -> None: + if problem_resolver is None: + problem_resolver = get_problem + + output_stream = getattr(arguments, 'output', sys.stdout) + + has_errored = False + + for problem_path in arguments.problems: + if getattr(arguments, 'list', False): + print(problem_path, file=output_stream) + + try: + problem = problem_resolver(problem_path, strict_digest=getattr(arguments, 'strict_digest', False)) + except Exception as error: + if getattr(arguments, 'continue', False): + traceback.print_exc(file=output_stream) + print(f"Error parsing problem: {problem_path}", file=output_stream) + has_errored = True + continue + else: + raise Exception(f"Error parsing problem: {problem_path}") from error + + try: + problem_description = problem.to_json_structure(canonical=True) + + if getattr(arguments, 'print', False): + pprint.pprint(problem_description, stream=output_stream) + elif not getattr(arguments, 'no_print', False): + json.dump( + problem_description, + output_stream, + indent=(getattr(arguments, 'indent', 2) or None), + sort_keys=getattr(arguments, 'sort_keys', False), + allow_nan=False, + ) # type: ignore + output_stream.write('\n') + except Exception as error: + if getattr(arguments, 'continue', False): + traceback.print_exc(file=output_stream) + print(f"Error describing problem: {problem_path}", file=output_stream) + has_errored = True + continue + else: + raise Exception(f"Error describing problem: {problem_path}") from error + + if has_errored: + sys.exit(1) + + +def main(argv: typing.Sequence) -> None: + raise exceptions.NotSupportedError("This CLI has been removed. Use \"python3 -m d3m problem describe\" instead.") + + +if __name__ == '__main__': + main(sys.argv) diff --git a/d3m/d3m/metadata/schemas/v0/container.json b/d3m/d3m/metadata/schemas/v0/container.json new file mode 100644 index 0000000..f0a8852 --- /dev/null +++ b/d3m/d3m/metadata/schemas/v0/container.json @@ -0,0 +1,62 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "id": "https://metadata.datadrivendiscovery.org/schemas/v0/container.json", + "title": "Container metadata", + "description": "Schema for metadata for the container (value passed between primitives).", + "type": "object", + "properties": { + "schema": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/schema" + }, + "id": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/id" + }, + "version": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/version" + }, + "digest": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/digest" + }, + "name": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/name" + }, + "other_names": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/other_names" + }, + "description": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/description" + }, + "keywords": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/keywords" + }, + "source": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/source" + }, + "structural_type": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/structural_type" + }, + "stored_size": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/stored_size" + }, + "approximate_stored_size": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/approximate_stored_size" + }, + "semantic_types": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/semantic_types" + }, + "dimension": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/dimension" + }, + "location_uris": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/location_uris" + }, + "data_metafeatures": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/data_metafeatures" + } + }, + "required": [ + "schema", + "structural_type" + ], + "additionalProperties": true +} diff --git a/d3m/d3m/metadata/schemas/v0/data.json b/d3m/d3m/metadata/schemas/v0/data.json new file mode 100644 index 0000000..d6e8ffe --- /dev/null +++ b/d3m/d3m/metadata/schemas/v0/data.json @@ -0,0 +1,64 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "id": "https://metadata.datadrivendiscovery.org/schemas/v0/datum.json", + "title": "Data metadata", + "description": "Schema for metadata for data itself (e.g., cells).", + "type": "object", + "properties": { + "name": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/name" + }, + "other_names": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/other_names" + }, + "description": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/description" + }, + "keywords": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/keywords" + }, + "source": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/source" + }, + "structural_type": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/structural_type" + }, + "media_types": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/media_types" + }, + "sampling_rate": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/sampling_rate" + }, + "stored_size": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/stored_size" + }, + "semantic_types": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/semantic_types" + }, + "dimension": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/dimension" + }, + "location_base_uris": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/location_base_uris" + }, + "file_columns": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/file_columns" + }, + "file_columns_count": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/file_columns_count" + }, + "foreign_key": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/foreign_key" + }, + "boundary_for": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/boundary_for" + }, + "data_metafeatures": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/data_metafeatures" + }, + "all_distinct_values": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/all_distinct_values" + } + }, + "additionalProperties": true +} diff --git a/d3m/d3m/metadata/schemas/v0/definitions.json b/d3m/d3m/metadata/schemas/v0/definitions.json new file mode 100644 index 0000000..ae536fc --- /dev/null +++ b/d3m/d3m/metadata/schemas/v0/definitions.json @@ -0,0 +1,4415 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "id": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json", + "definitions": { + "id": { + "type": "string", + "description": "A static id. It should never change for a given value, even if the value itself is changing. For example, all versions of the same primitive should have the same id. If possible, it should be a UUID generated in any way, but if there is an existing id available, it can be reused." + }, + "hash_id": { + "type": "string", + "description": "An UUIDv5 id computed by using UUID namespace \"8614b2cc-89ef-498e-9254-833233b3959b\" and JSON-serialized contents of the document without the \"id\" field for UUID name." + }, + "version": { + "type": "string", + "description": "A string representing a version. Versions can be PEP 440 version strings or a SHA256 hexadecimal digest of value's content, if applicable. In the former case they are compared according to PEP 440 rules." + }, + "digest": { + "type": "string", + "description": "A SHA256 hexadecimal digest of value's content. For datasets is digest over all files. For primitives it is a digest of its \"id\" and \"installation\" metadata. For other JSON-compatible structures, it is generally a digest of the canonical JSON-serialization of the structure, without the \"digest\" field itself.", + "pattern": "^[a-fA-F0-9]{64}$" + }, + "schema": { + "type": "string", + "description": "A URI representing a metadata.datadrivendiscovery.org schema and version to which metadata conforms.", + "format": "uri" + }, + "description": { + "type": "string", + "description": "A natural language description in an unspecified language." + }, + "name": { + "type": "string", + "description": "A human readable name in an unspecified language or format." + }, + "other_names": { + "type": "array", + "description": "Any other names associated with the value.", + "items": { + "$ref": "#/definitions/name" + }, + "minItems": 1 + }, + "python_path": { + "type": "string", + "description": "A fully-qualified Python path to primitive's class under the \"d3m.primitives\" namespace.", + "pattern": "^d3m\\.primitives\\." + }, + "original_python_path": { + "type": "string", + "description": "A fully-qualified Python path to primitive's class inside installable package and not one under the \"d3m.primitives\" namespace." + }, + "dimension": { + "type": "object", + "description": "Metadata for the dimension (e.g., rows and columns).", + "properties": { + "name": { + "$ref": "#/definitions/name" + }, + "description": { + "$ref": "#/definitions/description" + }, + "semantic_types": { + "$ref": "#/definitions/semantic_types" + }, + "length": { + "type": "integer", + "description": "Number of elements in a given dimension (number of samples, number of columns, etc.)." + }, + "sampling_rate": { + "allOf": [{"$ref": "#/definitions/sampling_rate"}], + "description": "If values in the dimension are sampled, this value represents the sampling rate in seconds." + } + }, + "required": [ + "length" + ], + "additionalProperties": true + }, + "data_metafeatures": { + "type": "object", + "description": "Some data metafeatures can apply both at the container (dataset) or internal data levels (resource, table, column). In any case they apply and hold for the whole underlying structure. For example, if \"number_distinct_values\" is set at a dataset level, it means that all columns in the dataset have this number of distinct values. If it is set only for a target column, then only that column has this number of distinct values, classes.", + "properties": { + "number_of_attributes": { + "type": "integer", + "description": "The number of attributes in the data." + }, + "number_of_instances": { + "type": "integer", + "description": "The number of instances in the data." + }, + "dimensionality": { + "type": "number", + "description": "Number of attributes divided by the number of instances." + }, + "number_of_numeric_attributes": { + "type": "integer", + "description": "Number of numeric attributes, which are not also categorical." + }, + "ratio_of_numeric_attributes": { + "type": "number", + "description": "Ratio of number of numeric attributes to total number of attributes." + }, + "number_of_string_attributes": { + "type": "integer", + "description": "Number of string attributes, which are not also categorical." + }, + "ratio_of_string_attributes": { + "type": "number", + "description": "Ratio of number of string attributes to total number of attributes." + }, + "number_of_categorical_attributes": { + "type": "integer", + "description": "Number of categorical attributes." + }, + "ratio_of_categorical_attributes": { + "type": "number", + "description": "Ratio of number of categorical attributes to total number of attributes." + }, + "number_of_other_attributes": { + "type": "integer", + "description": "Number of other (not numeric, not string, and not categorical) attributes." + }, + "ratio_of_other_attributes": { + "type": "number", + "description": "Ratio of number of other attributes to total number of attributes." + }, + "number_of_discrete_attributes": { + "type": "integer", + "description": "Number of discrete attributes. A discrete attribute is a numeric attribute with only integer values." + }, + "ratio_of_discrete_attributes": { + "type": "number", + "description": "Ratio of number of discrete attributes to total number of attributes. A discrete attribute is a numeric attribute with only integer values." + }, + "number_of_binary_attributes": { + "type": "integer", + "description": "Number of binary attributes. A binary attribute is a discrete attribute with exactly two values." + }, + "ratio_of_binary_attributes": { + "type": "number", + "description": "Ratio of number of binary attributes to total number of attributes. A binary attribute is a discrete attribute with exactly two values." + }, + "attribute_counts_by_structural_type": { + "type": "object", + "description": "A map between structural types as string and a count of attributes with that structural type.", + "additionalProperties": { + "type": "integer" + } + }, + "attribute_ratios_by_structural_type": { + "type": "object", + "description": "A map between structural types as string and a ratio of attributes with that structural type to all attributes.", + "additionalProperties": { + "type": "number" + } + }, + "attribute_counts_by_semantic_type": { + "type": "object", + "description": "A map between semantic types and a count of attributes with that semantic type. Attributes can have multiple semantic types.", + "additionalProperties": { + "type": "integer" + } + }, + "attribute_ratios_by_semantic_type": { + "type": "object", + "description": "A map between semantic types as string and a ratio of attributes with that semantic type to all attributes. Attributes can have multiple semantic types.", + "additionalProperties": { + "type": "number" + } + }, + "number_distinct_values": { + "type": "integer", + "description": "The number of distinct non-missing values for categorical or discrete values." + }, + "entropy_of_values": { + "type": "number", + "description": "The entropy of non-missing values. If values are not categorical or discrete, they are binned into \"number of all values\" ^ 1/3 bins." + }, + "value_counts_aggregate": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics of occurrence counts of non-missing values. If values are not categorical or discrete, they are binned into \"number of all values\" ^ 1/3 bins." + }, + "value_probabilities_aggregate": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics of probabilities of non-missing values. Probability of a value is defined as \"an occurrence count of a non-missing value\" / \"number of all non-missing values\". If values are not categorical or discrete, they are binned into \"number of all values\" ^ 1/3 bins." + }, + "values_aggregate": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics of numeric non-missing values." + }, + "number_distinct_values_of_categorical_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the number of distinct non-missing values in each categorical attributes." + }, + "number_distinct_values_of_numeric_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the number of distinct non-missing values in each numeric attributes." + }, + "number_distinct_values_of_discrete_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the number of distinct non-missing values in each discrete attributes." + }, + "mean_of_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the mean of numeric attributes." + }, + "standard_deviation_of_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the standard deviation of numeric attributes." + }, + "kurtosis_of_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the kurtosis of numeric attributes." + }, + "skew_of_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the skew of numeric attributes." + }, + "entropy_of_categorical_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the entropy of categorical attributes." + }, + "entropy_of_numeric_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the entropy of numeric attributes." + }, + "entropy_of_discrete_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the entropy of discrete attributes." + }, + "entropy_of_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the entropy of all (categorical and numeric) attributes." + }, + "joint_entropy_of_categorical_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the joint entropy of every categorical attribute with a given target." + }, + "joint_entropy_of_numeric_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the joint entropy of every numeric attribute with a given target." + }, + "joint_entropy_of_discrete_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the joint entropy of every discrete attribute with a given target." + }, + "joint_entropy_of_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the joint entropy of every (categorical and numeric) attribute with a given target." + }, + "mutual_information_of_categorical_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the mutual information of every categorical attribute with a given target." + }, + "mutual_information_of_numeric_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the mutual information of every numeric attribute with a given target." + }, + "mutual_information_of_discrete_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the mutual information of every discrete attribute with a given target." + }, + "mutual_information_of_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the mutual information of every (categorical and numeric) attribute with a given target." + }, + "pearson_correlation_of_numeric_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the pearson correlation between all pairs of numeric attributes. If set on a target column, it represents aggregate statistics about the pearson correlation of every numeric attribute with that target." + }, + "spearman_correlation_of_numeric_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the spearman correlation between all pairs of numeric attributes. If set on a target column, it represents aggregate statistics about the spearman correlation of every numeric attribute with that target." + }, + "canonical_correlation_of_numeric_attributes": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the canonical correlation between all pairs of numeric attributes. If set on a target column, it represents aggregate statistics about the canonical correlation of every numeric attribute with that target." + }, + "equivalent_number_of_categorical_attributes": { + "type": "number", + "description": "Number of categorical attributes needed to optimally describe the target (under the assumption of independence among attributes). Equals target's \"entropy_of_values\" divided by \"mutual_information_of_categorical_attributes.mean\"." + }, + "equivalent_number_of_numeric_attributes": { + "type": "number", + "description": "Number of numeric attributes needed to optimally describe the target (under the assumption of independence among attributes). Equals target's \"entropy_of_values\" divided by \"mutual_information_of_numeric_attributes.mean\"." + }, + "equivalent_number_of_discrete_attributes": { + "type": "number", + "description": "Number of discrete attributes needed to optimally describe the target (under the assumption of independence among attributes). Equals target's \"entropy_of_values\" divided by \"mutual_information_of_discrete_attributes.mean\"." + }, + "equivalent_number_of_attributes": { + "type": "number", + "description": "Number of all (categorical and numeric) attributes needed to optimally describe the target (under the assumption of independence among attributes). Equals target's \"entropy_of_values\" divided by \"mutual_information_of_attributes.mean\"." + }, + "categorical_noise_to_signal_ratio": { + "type": "number", + "description": "An estimate of the amount of irrelevant information in the categorical attributes regarding the target. Equals (\"entropy_of_categorical_attributes.mean\" - \"mutual_information_of_categorical_attributes.mean\") divided by \"mutual_information_of_categorical_attributes.mean\"." + }, + "numeric_noise_to_signal_ratio": { + "type": "number", + "description": "An estimate of the amount of irrelevant information in the numeric attributes regarding the target. Equals (\"entropy_of_numeric_attributes.mean\" - \"mutual_information_of_numeric_attributes.mean\") divided by \"mutual_information_of_numeric_attributes.mean\"." + }, + "discrete_noise_to_signal_ratio": { + "type": "number", + "description": "An estimate of the amount of irrelevant information in the discrete attributes regarding the target. Equals (\"entropy_of_discrete_attributes.mean\" - \"mutual_information_of_discrete_attributes.mean\") divided by \"mutual_information_of_discrete_attributes.mean\"." + }, + "noise_to_signal_ratio": { + "type": "number", + "description": "An estimate of the amount of irrelevant information in all (categorical and numeric) attributes regarding the target. Equals (\"entropy_of_attributes.mean\" - \"mutual_information_of_attributes.mean\") divided by \"mutual_information_of_attributes.mean\"." + }, + "number_of_missing_values": { + "type": "integer", + "description": "Number of missing values." + }, + "ratio_of_missing_values": { + "type": "number", + "description": "Ratio of number of missing values to number of all values." + }, + "number_of_present_values": { + "type": "integer", + "description": "Number of present values." + }, + "ratio_of_present_values": { + "type": "number", + "description": "Ratio of number of present values to number of all values." + }, + "number_of_numeric_values": { + "type": "integer", + "description": "Number of values that are strictly integers or floats. The value NaN is not counted." + }, + "ratio_of_numeric_values": { + "type": "number", + "description": "Ratio of number of values that are strictly integers or floats to number of all values. The value NaN is not counted." + }, + "number_of_positive_numeric_values": { + "type": "integer", + "description": "Number of positive values." + }, + "ratio_of_positive_numeric_values": { + "type": "number", + "description": "Ratio of number of positive values to number of all values." + }, + "number_of_negative_numeric_values": { + "type": "integer", + "description": "Number of negative values." + }, + "ratio_of_negative_numeric_values": { + "type": "number", + "description": "Ratio of number of negative values to number of all values." + }, + "number_of_numeric_values_equal_0": { + "type": "integer", + "description": "Number of 0 or 0.0 values." + }, + "ratio_of_numeric_values_equal_0": { + "type": "number", + "description": "Ratio of number of 0 or 0.0 values to number of all values." + }, + "number_of_numeric_values_equal_1": { + "type": "integer", + "description": "Number of 1 or 1.0 values." + }, + "ratio_of_numeric_values_equal_1": { + "type": "number", + "description": "Ratio of number of 1 or 1.0 values to number of all values." + }, + "number_of_numeric_values_equal_-1": { + "type": "integer", + "description": "Number of -1 and -1.0." + }, + "ratio_of_numeric_values_equal_-1": { + "type": "number", + "description": "Ratio of number of -1 and -1.0 to number of all values." + }, + "number_of_outlier_numeric_values": { + "allOf": [{"$ref": "#/definitions/outliers"}], + "description": "Outliers of numeric values." + }, + "number_of_instances_with_missing_values": { + "type": "integer", + "description": "Number of instances with missing values in one or more attributes." + }, + "ratio_of_instances_with_missing_values": { + "type": "number", + "description": "Ratio of number of instances with missing values in one or more attributes to number of all instances." + }, + "number_of_instances_with_present_values": { + "type": "integer", + "description": "Number of instances with present values in one or more attributes." + }, + "ratio_of_instances_with_present_values": { + "type": "number", + "description": "Ratio of number of instances with present values in one or more attributes to number of all instances." + }, + "natural_language_of_attribute": { + "type": "array", + "description": "Natural language detection that contains pairs of language code and count.", + "items": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "ISO 639-1 language code, e.g., \"en\", \"es\", \"zh\"." + }, + "count": { + "type": "integer", + "description": "Number of values in a attribute with the given language code." + } + }, + "required": [ + "code", + "count" + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "length_of_string_values": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the length of string values." + }, + "token_count_in_string_values": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about the number of tokens per string value. Tokens are split by the space character." + }, + "numeric_char_density": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about numeric character density of string values. Density is defined to be the number of character that satisfies \"isdigit\" divided by the number characters in the string." + }, + "number_of_values_containing_numeric_char": { + "type": "integer", + "description": "Number of string values that contain at least one numeric character." + }, + "ratio_of_values_containing_numeric_char": { + "type": "number", + "description": "Ratio of number of string values that contain at least one numeric character to number of all string values." + }, + "number_of_tokens": { + "type": "integer", + "description": "Number of tokens in all string values. Tokens are split by the space character." + }, + "number_of_tokens_containing_numeric_char": { + "type": "integer", + "description": "Number of tokens in all string values that contain at least one numeric character." + }, + "ratio_of_tokens_containing_numeric_char": { + "type": "number", + "description": "Ratio of number of tokens in all string values that contain at least one numeric character to number of tokens in all string values." + }, + "number_of_tokens_split_by_punctuation": { + "type": "integer", + "description": "Number of tokens in all string values. Tokens are split by \"string.punctions\"." + }, + "number_of_tokens_split_by_punctuation_containing_numeric_char": { + "type": "integer", + "description": "Number of tokens in all string values that contain at least one numeric character." + }, + "ratio_of_tokens_split_by_punctuation_containing_numeric_char": { + "type": "number", + "description": "Ratio of number of tokens in all string values that contain at least one numeric character to number of tokens in all string values split by punctuation." + }, + "number_of_values_with_leading_spaces": { + "type": "integer", + "description": "Number of string values with leading whitespaces." + }, + "ratio_of_values_with_leading_spaces": { + "type": "number", + "description": "Ratio of number of string values with leading whitespaces to number of all string values." + }, + "number_of_values_with_trailing_spaces": { + "type": "integer", + "description": "Number of string values with trailing whitespaces." + }, + "ratio_of_values_with_trailing_spaces": { + "type": "number", + "description": "Ratio of number of string values with trailing whitespaces to number of all string values." + }, + "number_of_distinct_values": { + "type": "integer", + "description": "Number of distinct values. Missing values are ignored." + }, + "ratio_of_distinct_values": { + "type": "number", + "description": "Ratio of number of distinct values to number of all values. Missing values are ignored." + }, + "number_of_distinct_tokens": { + "type": "integer", + "description": "Number of distinct tokens in all string values. Tokens are split by the space character. Missing values are ignored." + }, + "ratio_of_distinct_tokens": { + "type": "number", + "description": "Ratio of number of distinct tokens in all string values to number of tokens in all string values. Tokens are split by the space character. Missing values are ignored." + }, + "number_of_distinct_tokens_split_by_punctuation": { + "type": "integer", + "description": "Number of distinct tokens in all string values. Tokens are split by \"string.punctions\". Missing values are ignored." + }, + "ratio_of_distinct_tokens_split_by_punctuation": { + "type": "number", + "description": "Ratio of number of distinct tokens in all string values to number of tokens in all string values. Tokens are split by \"string.punctions\". Missing values are ignored." + }, + "most_common_tokens": { + "type": "array", + "description": "Most common tokens and their counts and ratio. Tokens are split by the space character.", + "items": { + "type": "object", + "properties": { + "token": { + "type": "string", + "description": "Token string value." + }, + "count": { + "type": "integer", + "description": "Number of occurrences of this token in all string values." + }, + "ratio": { + "type": "number", + "description": "Ratio of number of occurrences of this token in all string values to number of tokens in all string tokens." + } + }, + "required": [ + "token", + "count" + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "most_common_alphanumeric_tokens": { + "type": "array", + "description": "Most common alphanumeric tokens and their counts and ratio. A token is alphanumeric if \"isalnum\" returns \"True\". Tokens are split by the space character.", + "items": { + "type": "object", + "properties": { + "token": { + "type": "string", + "description": "Token string value." + }, + "count": { + "type": "integer", + "description": "Number of occurrences of this token in all string values." + }, + "ratio": { + "type": "number", + "description": "Ratio of number of occurrences of this token in all string values to number of tokens in all string tokens." + } + }, + "required": [ + "token", + "count" + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "most_common_numeric_tokens": { + "type": "array", + "description": "Most common numeric tokens and their counts and ratio. Tokens are split by the space character.", + "items": { + "type": "object", + "properties": { + "token": { + "type": "string", + "description": "Token string value." + }, + "count": { + "type": "integer", + "description": "Number of occurrences of this token in all string values." + }, + "ratio": { + "type": "number", + "description": "Ratio of number of occurrences of this token in all string values to number of tokens in all string tokens." + } + }, + "required": [ + "token", + "count" + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "most_common_tokens_split_by_punctuation": { + "type": "array", + "description": "Most common tokens and their counts and ratio. Tokens are split by \"string.punctions\".", + "items": { + "type": "object", + "properties": { + "token": { + "type": "string", + "description": "Token string value." + }, + "count": { + "type": "integer", + "description": "Number of occurrences of this token in all string values." + }, + "ratio": { + "type": "number", + "description": "Ratio of number of occurrences of this token in all string values to number of tokens in all string tokens." + } + }, + "required": [ + "token", + "count" + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "most_common_punctuations": { + "type": "array", + "description": "The most common punctuations and their counts. Punctuations are defined by \"string.punctions\".", + "items": { + "type": "object", + "properties": { + "punctuation": { + "type": "string", + "description": "Punctuation string value." + }, + "count": { + "type": "integer", + "description": "Number of occurrence of this punctuation in all string values." + }, + "ratio": { + "type": "number", + "description": "Ratio of number of occurrences of this punctuation in all string values to number of characters in all string values." + }, + "punctuation_density_aggregate": { + "allOf": [{"$ref": "#/definitions/aggregate"}], + "description": "Aggregate statistics about punctuation density of string values for this punctuation. Punctuation density is the ratio of number of occurrences of this punctuation in the value to the number of characters in the value." + }, + "punctuation_density_outliers": { + "allOf": [{"$ref": "#/definitions/outliers"}], + "description": "Outliers of punctuation density of string values for this punctuation. Punctuation density is the ratio of number of occurrences of this punctuation in the value to the number of characters in the value." + } + }, + "required": [ + "punctuation", + "count" + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "most_common_raw_values": { + "type": "array", + "description": "Most common values and their counts and ratio.", + "items": { + "type": "object", + "properties": { + "value": { + "type": "string", + "description": "Value in its raw string format." + }, + "count": { + "type": "integer", + "description": "Number of occurrences of this value in all values." + }, + "ratio": { + "type": "number", + "description": "Ratio of number of occurrences of this value in all values to number of all values." + } + }, + "required": [ + "value", + "count" + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "default_accuracy": { + "type": "number", + "description": "The predictive accuracy obtained by always predicting the majority class." + }, + "pca": { + "type": "object", + "description": "The results of principal component analysis on the data using default hyper-parameters.", + "properties": { + "explained_variance_ratio_component_1": { + "type": "number", + "description": "The explained variance ratio of component 1." + }, + "explained_variance_ratio_component_2": { + "type": "number", + "description": "The explained variance ratio of component 2." + }, + "explained_variance_ratio_component_3": { + "type": "number", + "description": "The explained variance ratio of component 3." + }, + "eigenvalue_component_1": { + "type": "number", + "description": "The eigenvalue for component 1." + }, + "eigenvalue_component_2": { + "type": "number", + "description": "The eigenvalue for component 2." + }, + "eigenvalue_component_3": { + "type": "number", + "description": "The eigenvalue for component 3." + }, + "determinant_of_covariance": { + "type": "number", + "description": "The determinant of the covariance matrix." + }, + "primitive": { + "allOf": [{"$ref": "#/definitions/primitive_reference"}], + "description": "A primitive used to compute these metafeatures." + }, + "random_seed": { + "type": "integer", + "description": "Random seed used, if a primitive accepts a random seed." + } + }, + "required": [ + "primitive" + ], + "additionalProperties": true + }, + "oner": { + "type": "object", + "description": "The results of training of Weka's OneR algorithm (or equivalent implementation) on the data using default hyper-parameters.", + "properties": { + "accuracy": { + "type": "number", + "description": "The predictive accuracy. Determines how much information is contained in the most predictive attribute." + }, + "primitive": { + "allOf": [{"$ref": "#/definitions/primitive_reference"}], + "description": "A primitive used to compute these metafeatures." + }, + "random_seed": { + "type": "integer", + "description": "Random seed used, if a primitive accepts a random seed." + } + }, + "required": [ + "primitive" + ], + "additionalProperties": true + }, + "random_tree": { + "type": "object", + "description": "The results of training decision trees of various depths with random splits and other hyper-parameters set to defaults.", + "properties": { + "depth_1_error_rate": { + "type": "number", + "description": "The error rate resulting from training a depth 1 decision tree with a random split." + }, + "depth_1_kappa": { + "type": "number", + "description": "The kappa resulting from training a depth 1 decision tree with a random split." + }, + "depth_1_auc": { + "type": "number", + "description": "The auc resulting from training a depth 1 decision tree with a random split." + }, + "depth_2_error_rate": { + "type": "number", + "description": "The error rate resulting from training a depth 2 decision tree with a random split." + }, + "depth_2_kappa": { + "type": "number", + "description": "The kappa resulting from training a depth 2 decision tree with a random split." + }, + "depth_2_auc": { + "type": "number", + "description": "The auc resulting from training a depth 1 decision tree with a random split." + }, + "depth_3_error_rate": { + "type": "number", + "description": "The error rate resulting from training a depth 3 decision tree with a random split." + }, + "depth_3_kappa": { + "type": "number", + "description": "The kappa resulting from training a depth 3 decision tree with a random split." + }, + "depth_3_auc": { + "type": "number", + "description": "The auc resulting from training a depth 1 decision tree with a random split." + }, + "primitive": { + "allOf": [{"$ref": "#/definitions/primitive_reference"}], + "description": "A primitive used to compute these metafeatures." + }, + "random_seed": { + "type": "integer", + "description": "Random seed used, if a primitive accepts a random seed." + } + }, + "required": [ + "primitive" + ], + "additionalProperties": true + }, + "decision_stump": { + "type": "object", + "description": "The results of training a depth 1 decision tree on the data with the best split based on entropy and other hyper-parameters set to defaults.", + "properties": { + "error_rate": { + "type": "number", + "description": "The error rate resulting from training a depth 1 decision tree with the best split based on entropy." + }, + "kappa": { + "type": "number", + "description": "The kappa resulting from training a depth 1 decision tree with the best split based on entropy." + }, + "auc": { + "type": "number", + "description": "The auc resulting from training a depth 1 decision tree with the best split based on entropy." + }, + "primitive": { + "allOf": [{"$ref": "#/definitions/primitive_reference"}], + "description": "A primitive used to compute these metafeatures." + }, + "random_seed": { + "type": "integer", + "description": "Random seed used, if a primitive accepts a random seed." + } + }, + "required": [ + "primitive" + ], + "additionalProperties": true + }, + "naive_bayes": { + "type": "object", + "description": "The results of training a naive bayes classifier on the data using default hyper-parameters.", + "properties": { + "error_rate": { + "type": "number", + "description": "The error rate resulting from training a naive bayes classifier on the data." + }, + "kappa": { + "type": "number", + "description": "The kappa resulting from training a naive bayes classifier on the data." + }, + "auc": { + "type": "number", + "description": "The auc resulting from training a naive bayes classifier on the data." + }, + "primitive": { + "allOf": [{"$ref": "#/definitions/primitive_reference"}], + "description": "A primitive used to compute these metafeatures." + }, + "random_seed": { + "type": "integer", + "description": "Random seed used, if a primitive accepts a random seed." + } + }, + "required": [ + "primitive" + ], + "additionalProperties": true + }, + "linear_discriminant_analysis": { + "type": "object", + "description": "The results of doing linear discriminant analysis classification on the data using default hyper-parameters.", + "properties": { + "error_rate": { + "type": "number", + "description": "The error rate resulting from doing linear discriminant analysis classification on the data." + }, + "kappa": { + "type": "number", + "description": "The kappa resulting from doing linear discriminant analysis classification on the data." + }, + "auc": { + "type": "number", + "description": "The auc resulting from doing linear discriminant analysis classification on the data." + }, + "primitive": { + "allOf": [{"$ref": "#/definitions/primitive_reference"}], + "description": "A primitive used to compute these metafeatures." + }, + "random_seed": { + "type": "integer", + "description": "Random seed used, if a primitive accepts a random seed." + } + }, + "required": [ + "primitive" + ], + "additionalProperties": true + }, + "knn_1_neighbor": { + "type": "object", + "description": "The results of training a knn classifier on the data with k=1 and other hyper-parameters set to defaults.", + "properties": { + "error_rate": { + "type": "number", + "description": "The error rate resulting from training a knn classifier on the data with k=1." + }, + "kappa": { + "type": "number", + "description": "The kappa resulting from training a knn classifier on the data with k=1." + }, + "auc": { + "type": "number", + "description": "The auc resulting from training a knn classifier on the data with k=1." + }, + "primitive": { + "allOf": [{"$ref": "#/definitions/primitive_reference"}], + "description": "A primitive used to compute these metafeatures." + }, + "random_seed": { + "type": "integer", + "description": "Random seed used, if a primitive accepts a random seed." + } + }, + "required": [ + "primitive" + ], + "additionalProperties": true + }, + "c45_decision_tree": { + "type": "object", + "description": "The results of training a C4.5 decision tree (or equivalent implementation) on the data using default hyper-parameters.", + "properties": { + "error_rate": { + "type": "number", + "description": "The error rate resulting from training a C4.5 decision tree on the data." + }, + "kappa": { + "type": "number", + "description": "The kappa resulting from training a C4.5 decision tree on the data." + }, + "auc": { + "type": "number", + "description": "The auc resulting from training a C4.5 decision tree on the data." + }, + "primitive": { + "allOf": [{"$ref": "#/definitions/primitive_reference"}], + "description": "A primitive used to compute these metafeatures." + }, + "random_seed": { + "type": "integer", + "description": "Random seed used, if a primitive accepts a random seed." + } + }, + "required": [ + "primitive" + ], + "additionalProperties": true + }, + "rep_tree": { + "type": "object", + "description": "The results of training a decision tree using reduced-error pruning (implementation equivalent to Weka's REPTree) on the data using default hyper-parameters.", + "properties": { + "depth_1_error_rate": { + "type": "number", + "description": "The error rate resulting from training a decision tree using reduced-error pruning on the data with tree depth 1." + }, + "depth_1_kappa": { + "type": "number", + "description": "The kappa resulting from training a decision tree using reduced-error pruning on the data with tree depth 1." + }, + "depth_1_auc": { + "type": "number", + "description": "The auc resulting from training a decision tree using reduced-error pruning on the data with tree depth 1." + }, + "depth_2_error_rate": { + "type": "number", + "description": "The error rate resulting from training a decision tree using reduced-error pruning on the data with tree depth 2." + }, + "depth_2_kappa": { + "type": "number", + "description": "The kappa resulting from training a decision tree using reduced-error pruning on the data with tree depth 2." + }, + "depth_2_auc": { + "type": "number", + "description": "The auc resulting from training a decision tree using reduced-error pruning on the data with tree depth 2." + }, + "depth_3_error_rate": { + "type": "number", + "description": "The error rate resulting from training a decision tree using reduced-error pruning on the data with tree depth 3." + }, + "depth_3_kappa": { + "type": "number", + "description": "The kappa resulting from training a decision tree using reduced-error pruning on the data with tree depth 3." + }, + "depth_3_auc": { + "type": "number", + "description": "The auc resulting from training a decision tree using reduced-error pruning on the data with tree depth 3." + }, + "primitive": { + "allOf": [{"$ref": "#/definitions/primitive_reference"}], + "description": "A primitive used to compute these metafeatures." + }, + "random_seed": { + "type": "integer", + "description": "Random seed used, if a primitive accepts a random seed." + } + }, + "required": [ + "primitive" + ], + "additionalProperties": true + }, + "jrip": { + "type": "object", + "description": "The results of training a propositional rule learner (implementation equivalent to Weka's JRip), Repeated Incremental Pruning to Produce Error Reduction (RIPPER), which was proposed by William W. Cohen as an optimized version of IREP.", + "properties": { + "error_rate": { + "type": "number", + "description": "The error rate resulting from training a propositional rule learner." + }, + "kappa": { + "type": "number", + "description": "The kappa rate resulting from training a propositional rule learner." + }, + "auc": { + "type": "number", + "description": "The auc resulting from training a propositional rule learner." + }, + "primitive": { + "allOf": [{"$ref": "#/definitions/primitive_reference"}], + "description": "A primitive used to compute these metafeatures." + }, + "random_seed": { + "type": "integer", + "description": "Random seed used, if a primitive accepts a random seed." + } + }, + "required": [ + "primitive" + ], + "additionalProperties": true + }, + "naive_bayes_tree": { + "type": "object", + "description": "A decision tree with naive bayes classifiers at the leaves.", + "properties": { + "error_rate": { + "type": "number", + "description": "The error rate resulting from training with the naive bayes tree algorithm." + }, + "kappa": { + "type": "number", + "description": "The kappa rate resulting from training with the naive bayes tree algorithm." + }, + "auc": { + "type": "number", + "description": "The auc resulting from training with the naive bayes tree algorithm." + }, + "primitive": { + "allOf": [{"$ref": "#/definitions/primitive_reference"}], + "description": "A primitive used to compute these metafeatures." + }, + "random_seed": { + "type": "integer", + "description": "Random seed used, if a primitive accepts a random seed." + } + }, + "required": [ + "primitive" + ], + "additionalProperties": true + } + }, + "additionalProperties": true + }, + "docker_image": { + "description": "A reference to a docker image, including a name and a digest.", + "type": "object", + "properties": { + "image_name": { + "type": "string", + "description": "Docker image name including a label, and optionally prefixed with a registry." + }, + "image_digest": { + "type": "string", + "description": "Docker image digest.", + "pattern": "^sha256:[a-fA-F0-9]{64}$" + } + }, + "required": [ + "image_name", + "image_digest" + ], + "additionalProperties": true + }, + "installation": { + "type": "array", + "description": "Installation instructions for a primitive. Everything listed has to be installed, in order listed, for a primitive to work.", + "items": { + "type": "object", + "oneOf": [ + { + "properties": { + "type": { + "type": "string", + "enum": ["PIP"], + "description": "A Python package." + }, + "package": { + "type": "string", + "description": "Python package name." + }, + "version": { + "allOf": [{"$ref": "#/definitions/version"}], + "description": "Exact version string." + }, + "registry": { + "type": "string" + } + }, + "required": [ + "package", + "type", + "version" + ] + }, + { + "properties": { + "type": { + "type": "string", + "enum": ["PIP"], + "description": "A Python package. It should be installed with pip's \"--editable\" argument enabled." + }, + "package_uri": { + "type": "string", + "description": "Python package's canonical URI for installation with an exact version of the package, ideally git commit hash. If it is a git URI, \"#egg=package_name\" URI suffix is required." + } + }, + "required": [ + "package_uri", + "type" + ] + }, + { + "allOf": [ + { + "properties": { + "type": { + "type": "string", + "enum": ["DOCKER"], + "description": "A Docker image." + }, + "key": { + "type": "string", + "description": "When this Docker image runs, its address should be exposed to the primitive under this key." + } + }, + "required": [ + "type", + "key" + ] + }, + { + "$ref": "#/definitions/docker_image" + } + ] + }, + { + "properties": { + "type": { + "type": "string", + "enum": ["UBUNTU"], + "description": "A system package." + }, + "package": { + "type": "string", + "description": "Ubuntu package name." + }, + "version": { + "type": "string", + "description": "Exact version string. While the version is required it is not required to install exactly this version of the package with a primitive because generally it is hard to get a hold of an old version to install (old packages get removed or moved to an archive). Knowing a version author of a primitive used can help with debugging to maybe understand why a primitive is misbehaving." + } + }, + "required": [ + "package", + "type", + "version" + ] + }, + { + "properties": { + "type": { + "type": "string", + "enum": ["FILE"], + "description": "A file to be downloaded and then provided as a volume to the primitive during its run. Download should be equivalent to the example: \"curl https://example.com/file > /path/to/volume_file\"." + }, + "key": { + "type": "string", + "description": "A downloaded file path should be exposed to the primitive under this key." + }, + "file_uri": { + "type": "string", + "description": "Where to download the file from.", + "format": "uri" + }, + "file_digest": { + "type": "string", + "description": "A SHA256 hexadecimal digest of the file.", + "pattern": "^[a-fA-F0-9]{64}$" + } + }, + "required": [ + "key", + "type", + "file_uri", + "file_digest" + ] + }, + { + "properties": { + "type": { + "type": "string", + "enum": ["TGZ"], + "description": "A gzipped tar file to be downloaded, extracted to a directory, which is then provided as a volume to the primitive during its run. Extraction should be equivalent to the example: \"curl https://example.com/file.tgz | tar -xz -C /path/to/volume_dir\"." + }, + "key": { + "type": "string", + "description": "An extracted directory path should be exposed to the primitive under this key." + }, + "file_uri": { + "type": "string", + "description": "Where to download the file from.", + "format": "uri" + }, + "file_digest": { + "type": "string", + "description": "A SHA256 hexadecimal digest of the file.", + "pattern": "^[a-fA-F0-9]{64}$" + } + }, + "required": [ + "key", + "type", + "file_uri", + "file_digest" + ] + } + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "primitive_code": { + "type": "object", + "description": "Metadata describing the primitive's code.", + "properties": { + "class_type_arguments": { + "type": "object", + "description": "A map between type variables in primitive interfaces and their specified types for this primitive.", + "additionalProperties": { + "$ref": "#/definitions/structural_type" + } + }, + "interfaces_version": { + "description": "Version of d3m package in use by the primitive.", + "allOf": [{"$ref": "#/definitions/version"}] + }, + "interfaces": { + "type": "array", + "description": "A list of Python primitive interface classes used by the primitive in method resolution order.", + "items": { + "type": "string" + }, + "minItems": 1 + }, + "params": { + "type": "object", + "description": "A map between primitive's parameter names and their types.", + "additionalProperties": { + "$ref": "#/definitions/structural_type" + } + }, + "hyperparams": { + "$ref": "#/definitions/hyperparams_configuration" + }, + "arguments": { + "type": "object", + "description": "A map describing all arguments which the primitive as a whole accepts, mapping the name of the argument to its description.", + "additionalProperties": { + "type": "object", + "properties": { + "type": { + "$ref": "#/definitions/structural_type" + }, + "kind": { + "type": "string", + "oneOf": [ + {"enum": ["RUNTIME"], "description": "Arguments which are meaningful only for a runtime executing a pipeline."}, + {"enum": ["PIPELINE"], "description": "Arguments which can be fulfilled by other primitives in a pipeline."}, + {"enum": ["HYPERPARAMETER"], "description": "Arguments which are overriding a hyper-parameter value for a method call."} + ] + }, + "default": { + "allOf": [{"$ref": "#/definitions/python_value"}], + "description": "A default value. Omitted if an argument has no default value." + } + }, + "required": [ + "type", + "kind" + ], + "additionalProperties": true + } + }, + "class_methods": { + "type": "object", + "description": "A map between primitive's class method names and their descriptions.", + "additionalProperties": { + "type": "object", + "properties": { + "description": { + "$ref": "#/definitions/description" + }, + "arguments": { + "type": "object", + "additionalProperties": { + "type": "object", + "properties": { + "type": { + "$ref": "#/definitions/structural_type" + }, + "default": { + "allOf": [{"$ref": "#/definitions/python_value"}], + "description": "A default value. Omitted if an argument has no default value." + } + }, + "additionalProperties": true, + "required": [ + "type" + ] + } + }, + "returns": { + "$ref": "#/definitions/structural_type" + } + }, + "required": [ + "returns" + ], + "additionalProperties": true + } + }, + "instance_methods": { + "type": "object", + "description": "A map between primitive's instance method names and their descriptions.", + "additionalProperties": { + "type": "object", + "properties": { + "kind": { + "type": "string", + "oneOf": [ + {"enum": ["PRODUCE"], "description": "Methods which outputs can be inputs to another primitive."}, + {"enum": ["OTHER"], "description": "Methods used by the runtime."} + ] + }, + "description": { + "$ref": "#/definitions/description" + }, + "arguments": { + "type": "array", + "description": "A list of argument names this method accepts. Their description can be found in primitive's \"arguments\" map.", + "items": { + "type": "string" + } + }, + "returns": { + "$ref": "#/definitions/structural_type" + }, + "singleton": { + "type": "boolean", + "description": "Is a produce method a singleton produce method?" + }, + "inputs_across_samples": { + "type": "array", + "description": "List of inputs a produce method uses across samples and not sample by sample.", + "items": { + "type": "string" + } + } + }, + "required": [ + "kind", + "arguments", + "returns" + ], + "additionalProperties": true + } + }, + "class_attributes": { + "type": "object", + "description": "A map between primitive's class attribute names and their types.", + "additionalProperties": { + "$ref": "#/definitions/structural_type" + } + }, + "instance_attributes": { + "type": "object", + "description": "A map between primitive's instance attribute names and their types.", + "additionalProperties": { + "$ref": "#/definitions/structural_type" + } + } + }, + "required": [ + "class_type_arguments", + "interfaces_version", + "interfaces" + ], + "additionalProperties": true + }, + "hyperparams_configuration": { + "type": "object", + "description": "A map describing the hyper-parameter configuration of the primitive, mapping the name of the hyper-parameter to its description.", + "additionalProperties": { + "$ref": "#/definitions/hyperparameter" + } + }, + "hyperparameter": { + "type": "object", + "description": "Description of a hyper-parameter.", + "properties": { + "type": { + "allOf": [{"$ref": "#/definitions/python_type"}], + "description": "A Python type of the hyper-parameter description itself." + }, + "default": { + "allOf": [{"$ref": "#/definitions/python_value"}], + "description": "A default value." + }, + "structural_type": { + "$ref": "#/definitions/structural_type" + }, + "semantic_types": { + "$ref": "#/definitions/semantic_types" + }, + "description": { + "$ref": "#/definitions/description" + }, + "lower": { + "$ref": "#/definitions/python_value" + }, + "upper": { + "$ref": "#/definitions/python_value" + }, + "upper_inclusive": { + "type": "boolean" + }, + "q": { + "type": "number" + }, + "mu": { + "type": "number" + }, + "sigma": { + "type": "number" + }, + "values": { + "type": "array", + "items": { + "$ref": "#/definitions/python_value" + } + }, + "configuration": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/hyperparameter" + } + }, + "primitive_families": { + "type": "array", + "items": { + "type": "string" + } + }, + "algorithm_types": { + "type": "array", + "items": { + "type": "string" + } + }, + "choices": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/hyperparams_configuration" + } + }, + "elements": { + "anyOf": [ + { + "$ref": "#/definitions/hyperparameter" + }, + { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/hyperparameter" + } + } + ] + }, + "is_configuration": { + "type": "boolean" + }, + "min_size": { + "type": "integer" + }, + "max_size": { + "type": "integer" + } + }, + "required": [ + "type", + "default", + "structural_type", + "semantic_types" + ], + "additionalProperties": true + }, + "structural_type": { + "$ref": "#/definitions/python_type" + }, + "media_types": { + "type": "array", + "description": "Media type of the value in its extended form defining encoding, e.g., \"text/plain; charset=utf-8\".", + "items": { + "type": "string" + }, + "minItems": 1 + }, + "sampling_rate": { + "type": "number", + "description": "Sampling rate (frequency) is the number of samples per second." + }, + "time_granularity": { + "type": "object", + "properties": { + "value": { + "type": "number" + }, + "unit": { + "enum": [ + "SECONDS", + "MINUTES", + "DAYS", + "WEEKS", + "MONTHS", + "YEARS", + "UNSPECIFIED" + ] + } + }, + "required": [ + "value", + "unit" + ], + "additionalProperties": true + }, + "stored_size": { + "type": "integer", + "description": "Size in bytes when or if stored to disk." + }, + "approximate_stored_size": { + "type": "integer", + "description": "Approximate size in bytes when or if stored to disk." + }, + "semantic_types": { + "type": "array", + "description": "A list of canonical URIs defining semantic types. Some commonly used URIs are listed as possible values here, but you can use any URI representing a semantic type.", + "items": { + "anyOf": [ + {"enum": ["http://schema.org/ImageObject"], "description": "Value is an image."}, + {"enum": ["http://schema.org/VideoObject"], "description": "Value is a video."}, + {"enum": ["http://schema.org/AudioObject"], "description": "Value is an audio clip."}, + {"enum": ["http://schema.org/Text"], "description": "Value is text/string."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/Speech"], "description": "Value is an audio clip of human speech."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/Graph"], "description": "Value is a graph structure or a node list of a graph structure."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/EdgeList"], "description": "Value is an edge list of a graph structure."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/Table"], "description": "Value is tabular data."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/Timeseries"], "description": "Value is time-series data."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/UnspecifiedStructure"], "description": "Value has unspecified structure."}, + {"enum": ["http://schema.org/Boolean"], "description": "Value represents a boolean."}, + {"enum": ["http://schema.org/Integer"], "description": "Value represents an integer."}, + {"enum": ["http://schema.org/Float"], "description": "Value represents a float."}, + {"enum": ["http://schema.org/DateTime"], "description": "Value represents a timestamp."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/FloatVector"], "description": "Value represents a vector of floats.", "parents": ["http://schema.org/DataType"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/JSON"], "description": "Value represents a JSON object.", "parents": ["http://schema.org/DataType"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/GeoJSON"], "description": "Value represents a GeoJSON object.", "parents": ["https://metadata.datadrivendiscovery.org/types/JSON"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/CategoricalData"], "description": "Value represents categorical data."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/OrdinalData"], "description": "Value represents ordinal data."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"], "description": "A column can have a role in a table."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/PrimaryKey"], "description": "Value serves as a primary key.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/PrimaryMultiKey"], "description": "Value serves as a primary key without uniqueness constraint to allow the same row to be repeated multiple times.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/UniqueKey"], "description": "Value serves as an unique key, i.e., it satisfies the uniqueness constraint among other values.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/SuggestedGroupingKey"], "description": "Value serves as a potential grouping key to group rows (samples) together. Used in time-series datasets containing multiple time-series to hint how to identify individual time-series. If there are multiple columns with this semantic type the relation between them is unspecified, they can be used individually or in combination.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/GroupingKey"], "description": "Value serves as an active grouping key to group rows (samples) together. Used in time-series datasets containing multiple time-series to identify individual time-series. Each column with this semantic type should be used individually and if multiple columns with this semantic type exist, each column represent a different grouping.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/Attribute"], "description": "Value serves as an attribute (input feature) to fit on or be used for analysis.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/ConstructedAttribute"], "description": "Value serves as a constructed attribute (input feature). This is set by primitives when constructing attributes. It should not be used for fitting.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/SuggestedTarget"], "description": "Value serves as a potential target variable for a problem. This is a property of input data.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/RedactedTarget"], "description": "Value is redacted, but would otherwise be a target variable for a problem. This is a property of input data.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/Target"], "description": "Value serves as a target variable for a problem.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/PredictedTarget"], "description": "Value serves as a predict target variable for a problem. This is set by primitives when predicting targets.", "parents": ["https://metadata.datadrivendiscovery.org/types/Target"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"], "description": "Value serves as a true target variable for a problem. This is set by a runtime based on problem description.", "parents": ["https://metadata.datadrivendiscovery.org/types/Target"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/Score"], "description": "Value is a prediction score computed by comparing predicted and true target.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/Confidence"], "description": "Value serves as a confidence of a predicted target variable. \"confidence_for\" metadata can be used to reference for which target column(s) this column is confidence for.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/Rank"], "description": "Value serves as a rank of a predicted target variable. \"rank_for\" metadata can be used to reference for which target column(s) this column is rank for.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/SuggestedPrivilegedData"], "description": "Value serves as a potential privileged (available during fitting but not producing) attribute.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/RedactedPrivilegedData"], "description": "Value is redacted, but would otherwise be a privileged attribute.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/PrivilegedData"], "description": "Value serves as a privileged (available during fitting but not producing) attribute.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/EdgeSource"], "description": "Value serves as a source of a graph edge.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/DirectedEdgeSource"], "description": "Value serves as a source of a directed graph edge.", "parents": ["https://metadata.datadrivendiscovery.org/types/EdgeSource"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/UndirectedEdgeSource"], "description": "Value serves as a source of a undirected graph edge.", "parents": ["https://metadata.datadrivendiscovery.org/types/EdgeSource"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/SimpleEdgeSource"], "description": "Value serves as a source of a simple graph edge.", "parents": ["https://metadata.datadrivendiscovery.org/types/EdgeSource"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/MultiEdgeSource"], "description": "Value serves as a source of a multigraph edge.", "parents": ["https://metadata.datadrivendiscovery.org/types/EdgeSource"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/EdgeTarget"], "description": "Value serves as a target of a graph edge.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/DirectedEdgeTarget"], "description": "Value serves as a target of a directed graph edge.", "parents": ["https://metadata.datadrivendiscovery.org/types/EdgeTarget"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/UndirectedEdgeTarget"], "description": "Value serves as a target of a undirected graph edge.", "parents": ["https://metadata.datadrivendiscovery.org/types/EdgeTarget"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/SimpleEdgeTarget"], "description": "Value serves as a target of a simple graph edge.", "parents": ["https://metadata.datadrivendiscovery.org/types/EdgeTarget"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/MultiEdgeTarget"], "description": "Value serves as a target of a multigraph edge.", "parents": ["https://metadata.datadrivendiscovery.org/types/EdgeTarget"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/Time"], "description": "Value represents time.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/Location"], "description": "Value represents a location.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/Boundary"], "description": "Value represents a boundary.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/Interval"], "description": "Value represents an interval as a pair of start and end.", "parents": ["https://metadata.datadrivendiscovery.org/types/Boundary"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/IntervalStart"], "description": "Value represents a start of an interval.", "parents": ["https://metadata.datadrivendiscovery.org/types/Boundary"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/IntervalEnd"], "description": "Value represents an end of an interval.", "parents": ["https://metadata.datadrivendiscovery.org/types/Boundary"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/BoundingPolygon"], "description": "Value represents a bounding polygon as a series of (X, Y) coordinate pairs of vertices in counter-clockwise order.", "parents": ["https://metadata.datadrivendiscovery.org/types/Boundary"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/InstanceWeight"], "description": "Value serves as a weight for an instance.", "parents": ["https://metadata.datadrivendiscovery.org/types/ColumnRole"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/UnknownType"], "description": "It is not known what the value represents."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/FileName"], "description": "Value is a filename."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/DimensionType"], "description": "Value represents a dimension."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/DatasetResource"], "description": "Value is a dataset resource.", "parents": ["https://metadata.datadrivendiscovery.org/types/DimensionType"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/TabularRow"], "description": "Value is a row in tabular data.", "parents": ["https://metadata.datadrivendiscovery.org/types/DimensionType"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/TabularColumn"], "description": "Value is a column in tabular data.", "parents": ["https://metadata.datadrivendiscovery.org/types/DimensionType"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/MissingData"], "description": "Value is missing."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/InvalidData"], "description": "Value is present, but is invalid."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/HyperParameter"], "description": "Value is a hyper-parameter."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/TuningParameter"], "description": "Hyper-parameter is a tuning parameter of the primitive.", "parents": ["https://metadata.datadrivendiscovery.org/types/HyperParameter"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/ControlParameter"], "description": "Hyper-parameter is a control parameter of the primitive.", "parents": ["https://metadata.datadrivendiscovery.org/types/HyperParameter"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter"], "description": "Hyper-parameter is a parameter which controls the use of resources by the primitive.", "parents": ["https://metadata.datadrivendiscovery.org/types/HyperParameter"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/CPUResourcesUseParameter"], "description": "Hyper-parameter is a parameter which controls the use of CPU resources (cores) by the primitive.", "parents": ["https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/MetafeatureParameter"], "description": "Hyper-parameter controls which meta-feature is computed by the primitive.", "parents": ["https://metadata.datadrivendiscovery.org/types/HyperParameter"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/ChoiceParameter"], "description": "Hyper-parameter is selecting one choice among multiple hyper-parameters space choices.", "parents": ["https://metadata.datadrivendiscovery.org/types/HyperParameter"]}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint"], "description": "Resource is a dataset entry point."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/FilesCollection"], "description": "Resource is a files collection."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/TokenizableIntoNumericAndAlphaTokens"], "description": "Value can be tokenized into pure numeric tokens (satisfies \"isdigit\") and pure alphabetic tokens(satisfies \"isalpha\"). E.g., value \"123abc456\" can be tokenized into (\"123\", \"abc\", \"456\")."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/TokenizableByPunctuation"], "description": "Value can be tokenized by splitting on punctuation. E.g., value \"ab_cd;12\" can be tokenized into (\"ab\", \"cd\", \"12\")."}, + {"enum": ["https://metadata.datadrivendiscovery.org/types/AmericanPhoneNumber"], "description": "Value can be recognized as an American style phone number, e.g., \"(310)822-1511\" and \"1-310-822-1511\"."}, + {"enum": ["http://schema.org/email"], "description": "Value is an email address."}, + {"enum": ["http://schema.org/URL"], "description": "Value represents a URL."}, + {"enum": ["http://schema.org/address"], "description": "Value is an address, broadly defined."}, + {"enum": ["http://schema.org/State"], "description": "Value is a state, could be US or foreign."}, + {"enum": ["http://schema.org/City"], "description": "Value is a city, could be US or foreign."}, + {"enum": ["http://schema.org/Country"], "description": "Value is a country."}, + {"enum": ["http://schema.org/addressCountry"], "description": "Value is a country code."}, + {"enum": ["http://schema.org/postalCode"], "description": "Value is a US postal code."}, + {"enum": ["http://schema.org/latitude"], "description": "Value represents a latitude."}, + {"enum": ["http://schema.org/longitude"], "description": "Value represents a longitude."}, + { + "type": "string", + "description": "A URI not listed among commonly used URIs. Please feel encouraged to open a merge request adding semantic types you are using so that also others can learn about them.", + "format": "uri" + } + ] + } + }, + "location_uris": { + "type": "array", + "description": "A list of URIs where the value is stored.", + "items": { + "type": "string", + "format": "uri" + } + }, + "location_base_uris": { + "type": "array", + "description": "A list of URIs which can be used as a base to determine where the value is stored.", + "items": { + "type": "string", + "format": "uri" + } + }, + "source": { + "type": "object", + "description": "Information about the source. Author and other information how the value came to be.", + "properties": { + "name": { + "$ref": "#/definitions/name" + }, + "contact": { + "type": "string", + "description": "An URI to contact the source.", + "format": "uri" + }, + "uris": { + "type": "array", + "description": "A list of URIs where the value is coming from, e.g., website with a dataset, or source code for a primitive.", + "items": { + "type": "string", + "format": "uri" + } + }, + "published": { + "allOf": [{"$ref": "#/definitions/timestamp"}], + "description": "A timestamp when was the value made available." + }, + "license": { + "type": "string", + "description": "License under which the value is available." + }, + "citation": { + "type": "string", + "description": "Citation of the source." + }, + "human_subjects_research": { + "type": "boolean", + "description": "Does value contain human subjects data or not." + }, + "redacted": { + "type": "boolean", + "description": "Has the value been redacted." + }, + "from": { + "type": "object", + "oneOf": [ + { + "properties": { + "type": { + "type": "string", + "enum": ["REDACTED"], + "description": "The value has been redacted from the referenced value." + }, + "dataset": { + "$ref": "#/definitions/dataset_reference" + } + }, + "required": [ + "dataset", + "type" + ] + }, + { + "properties": { + "type": { + "type": "string", + "enum": ["REDACTED"], + "description": "The value has been redacted from the referenced value." + }, + "problem": { + "$ref": "#/definitions/problem_reference" + } + }, + "required": [ + "problem", + "type" + ] + }, + { + "properties": { + "type": { + "type": "string", + "enum": ["PIPELINE"], + "description": "The pipeline has been derived from another pipeline or pipelines." + }, + "pipelines": { + "type": "array", + "description": "A list of pipelines used to derive the pipeline.", + "items": { + "$ref": "#/definitions/pipeline_reference" + }, + "minItems": 1 + } + }, + "required": [ + "pipelines", + "type" + ] + } + ], + "additionalProperties": true + } + }, + "additionalProperties": true + }, + "keywords": { + "type": "array", + "description": "A list of keywords. Strings in an unspecified language and vocabulary.", + "items": { + "type": "string" + } + }, + "foreign_key": { + "type": "object", + "description": "Columns in a table in a dataset resource can reference other resources.", + "oneOf": [ + { + "properties": { + "type": { + "type": "string", + "enum": ["COLUMN"], + "description": "The foreign key is referencing a column in a table in a dataset resource." + }, + "resource_id": { + "$ref": "#/definitions/resource_id" + }, + "column_index": { + "$ref": "#/definitions/column_index" + } + }, + "required": [ + "type", + "resource_id", + "column_index" + ] + }, + { + "properties": { + "type": { + "type": "string", + "enum": ["COLUMN"], + "description": "The foreign key is referencing a column in a table in a dataset resource." + }, + "resource_id": { + "$ref": "#/definitions/resource_id" + }, + "column_name": { + "$ref": "#/definitions/column_name" + } + }, + "required": [ + "type", + "resource_id", + "column_name" + ] + }, + { + "properties": { + "type": { + "type": "string", + "enum": ["NODE_ATTRIBUTE"], + "description": "The foreign key is referencing a node attribute in a dataset resource, a graph." + }, + "resource_id": { + "$ref": "#/definitions/resource_id" + }, + "node_attribute": { + "$ref": "#/definitions/column_name" + } + }, + "required": [ + "type", + "resource_id", + "node_attribute" + ] + }, + { + "properties": { + "type": { + "type": "string", + "enum": ["EDGE_ATTRIBUTE"], + "description": "The foreign key is referencing an edge attribute in a dataset resource, a graph." + }, + "resource_id": { + "$ref": "#/definitions/resource_id" + }, + "edge_attribute": { + "$ref": "#/definitions/column_name" + } + }, + "required": [ + "type", + "resource_id", + "edge_attribute" + ] + }, + { + "properties": { + "type": { + "type": "string", + "enum": ["RESOURCE"], + "description": "The foreign key is referencing another dataset resource. The value is resource ID." + } + }, + "required": [ + "type" + ] + } + ] + }, + "boundary_for": { + "type": "object", + "description": "A column in a table can be a boundary for another column in the same table or a table in another dataset resource.", + "oneOf": [ + { + "properties": { + "resource_id": { + "$ref": "#/definitions/resource_id" + }, + "column_index": { + "$ref": "#/definitions/column_index" + } + }, + "required": [ + "column_index" + ] + }, + { + "properties": { + "resource_id": { + "$ref": "#/definitions/resource_id" + }, + "column_name": { + "$ref": "#/definitions/column_name" + } + }, + "required": [ + "column_name" + ] + } + ] + }, + "confidence_for": { + "type": "object", + "description": "A column in a table can be a confidence for other columns in the same table or a table in another dataset resource.", + "oneOf": [ + { + "properties": { + "resource_id": { + "$ref": "#/definitions/resource_id" + }, + "column_indices": { + "type": "array", + "items": { + "$ref": "#/definitions/column_index" + }, + "minItems": 1 + } + }, + "required": [ + "column_indices" + ] + }, + { + "properties": { + "resource_id": { + "$ref": "#/definitions/resource_id" + }, + "column_names": { + "type": "array", + "items": { + "$ref": "#/definitions/column_name" + }, + "minItems": 1 + } + }, + "required": [ + "column_names" + ] + } + ] + }, + "rank_for": { + "type": "object", + "description": "A column in a table can be a rank for other columns in the same table or a table in another dataset resource.", + "oneOf": [ + { + "properties": { + "resource_id": { + "$ref": "#/definitions/resource_id" + }, + "column_indices": { + "type": "array", + "items": { + "$ref": "#/definitions/column_index" + }, + "minItems": 1 + } + }, + "required": [ + "column_indices" + ] + }, + { + "properties": { + "resource_id": { + "$ref": "#/definitions/resource_id" + }, + "column_names": { + "type": "array", + "items": { + "$ref": "#/definitions/column_name" + }, + "minItems": 1 + } + }, + "required": [ + "column_names" + ] + } + ] + }, + "algorithm_types": { + "type": "array", + "description": "Algorithm type describes the underlying implementation of the primitive. It uses controlled, standardized, but open vocabulary which means that if types which would best describe your primitive are missing, please feel encouraged to open a merge request adding them.", + "items": { + "oneOf": [ + {"enum": ["RULE_BASED_FILTER"]}, + + {"enum": ["DUPLICATION_VALIDATION"]}, + {"enum": ["CONTINUITY_VALIDATION"]}, + {"enum": ["HP_FILTER"], "description": "https://en.wikipedia.org/wiki/Hodrick–Prescott_filter"}, + {"enum": ["BK_FILTER"]}, + {"enum": ["TEMPORAL_REGULARIZED_MATRIX_FACTORIZATION"]}, + {"enum": ["ANGLE_BASE_OUTLIER_DETECTION"]}, + {"enum": ["HISTOGRAM_BASED_OUTLIER_DETECTION"]}, + {"enum": ["ISOLATION_FOREST"]}, + {"enum": ["SUBSPACE_OUTLIER_DETECTION"]}, + {"enum": ["AUTOCORRELATION"], "description": "https://en.wikipedia.org/wiki/Autocorrelation"}, + {"enum": ["CATEGORICAL_TO_BINARY"]}, + {"enum": ["DISCRETE_COSINE_TRANSFORM"], "description": "https://en.wikipedia.org/wiki/Discrete_cosine_transform"}, + {"enum": ["FAST_FOURIER_TRANSFORM"], "description": "https://en.wikipedia.org/wiki/Fast_Fourier_transform"}, + {"enum": ["HOLT_SMOOTHING"], "description": "https://medium.com/datadriveninvestor/how-to-build-exponential-smoothing-models-using-python-simple-exponential-smoothing-holt-and-da371189e1a1"}, + {"enum": ["HOLT_WINTERS_EXPONENTIAL_SMOOTHING"], "description": "https://medium.com/datadriveninvestor/how-to-build-exponential-smoothing-models-using-python-simple-exponential-smoothing-holt-and-da371189e1a1"}, + {"enum": ["MATRIX_PROFILE"], "description": "https://en.wikipedia.org/wiki/Matrix_profile"}, + {"enum": ["MEAN_AVERAGE_TRANSFORM"], "description": "https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rolling.html"}, + {"enum": ["MOVING_AVERAGE_TRANSFORM"], "description": "https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rolling.html"}, + {"enum": ["NON_NEGATIVE_MATRIX_FACTORIZATION"], "description":"https://en.wikipedia.org/wiki/Non-negative_matrix_factorization"}, + {"enum": ["PYOD_COF"]}, + {"enum": ["SIMPLE_EXPONENTIAL_SMOOTHING"], "description": "https://medium.com/datadriveninvestor/how-to-build-exponential-smoothing-models-using-python-simple-exponential-smoothing-holt-and-da371189e1a1"}, + {"enum": ["SUM_CODING"]}, + {"enum": ["TIME_INTERVAL_TRANSFORM"], "description": "https://en.wikipedia.org/wiki/Time_interval_transform"}, + {"enum": ["VARIATIONAL_AUTO_ENCODER"],"description":"https://www.jeremyjordan.me/variational-autoencoders/"}, + {"enum": ["ACCURACY_SCORE"], "description": "https://en.wikipedia.org/wiki/Accuracy_and_precision"}, + {"enum": ["ADABOOST"], "description": "https://en.wikipedia.org/wiki/AdaBoost"}, + {"enum": ["ADAPTIVE_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Adaptive_algorithm"}, + {"enum": ["AGGREGATE_FUNCTION"], "description": "https://en.wikipedia.org/wiki/Aggregate_function"}, + {"enum": ["ALMEIDA_PINEDA_RECURRENT_BACKPROPAGATION"], "description": "https://en.wikipedia.org/wiki/Almeida%E2%80%93Pineda_recurrent_backpropagation"}, + {"enum": ["ALOPEX"], "description": "https://en.wikipedia.org/wiki/ALOPEX"}, + {"enum": ["ALTERNATING_DECISION_TREE"], "description": "https://en.wikipedia.org/wiki/Alternating_decision_tree"}, + {"enum": ["ANT_COLONY_OPTIMIZATION"], "description": "https://en.wikipedia.org/wiki/Ant_colony_optimization_algorithms"}, + {"enum": ["APPROXIMATE_DATA_AUGMENTATION"], "description": "Augmenting data approximately using data that has the best matching score."}, + {"enum": ["ARRAY_CONCATENATION"]}, + {"enum": ["ARRAY_SLICING"], "description": "https://en.wikipedia.org/wiki/Array_slicing"}, + {"enum": ["ASSOCIATION_RULE_LEARNING"], "description": "https://en.wikipedia.org/wiki/Association_rule_learning"}, + {"enum": ["ASSOCIATIVE_NEURAL_NETWORK"]}, + {"enum": ["ATTRACTOR_NETWORK"], "description": "https://en.wikipedia.org/wiki/Attractor_network"}, + {"enum": ["AUDIO_MIXING"], "description": "https://en.wikipedia.org/wiki/Audio_mixing_(recorded_music)"}, + {"enum": ["AUDIO_STREAM_MANIPULATION"], "description": "https://en.wikipedia.org/wiki/Audio_signal_processing"}, + {"enum": ["AUGMENTED_LAGRANGIAN_METHOD"], "description": "https://en.wikipedia.org/wiki/Augmented_Lagrangian_method"}, + {"enum": ["AUTOENCODER"], "description": "https://en.wikipedia.org/wiki/Autoencoder"}, + {"enum": ["AUTOREGRESSIVE_INTEGRATED_MOVING_AVERAGE"], "description": "https://en.wikipedia.org/wiki/Autoregressive_integrated_moving_average"}, + {"enum": ["BACKWARD_DIFFERENCE_CODING"], "description": "https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/#backward"}, + {"enum": ["BAG_OF_WORDS_MODEL"], "description": "https://en.wikipedia.org/wiki/Bag-of-words_model"}, + {"enum": ["BATCH_NORMALIZATION"]}, + {"enum": ["BAYESIAN_LINEAR_REGRESSION"], "description": "https://en.wikipedia.org/wiki/Bayesian_linear_regression"}, + {"enum": ["BAYESIAN_MODEL_AVERAGING"], "description": "https://en.wikipedia.org/wiki/Bootstrap_aggregating"}, + {"enum": ["BAYESIAN_NETWORK"], "description": "https://en.wikipedia.org/wiki/Bayesian_network"}, + {"enum": ["BAYESIAN_OPTIMIZATION"]}, + {"enum": ["BELIEF_PROPAGATION"], "description": "https://en.wikipedia.org/wiki/Belief_propagation"}, + {"enum": ["BERT"], "description": "https://arxiv.org/abs/1810.04805"}, + {"enum": ["BINARY_CLASSIFICATION"], "description": "https://en.wikipedia.org/wiki/Binary_classification"}, + {"enum": ["BIRCH"], "description": "https://en.wikipedia.org/wiki/Bayesian_optimization"}, + {"enum": ["BOLTZMANN_MACHINE"], "description": "https://en.wikipedia.org/wiki/BIRCH"}, + {"enum": ["BOOSTING"], "description": "https://en.wikipedia.org/wiki/Boltzmann_machine"}, + {"enum": ["BOOTSTRAP_AGGREGATING"], "description": "https://en.wikipedia.org/wiki/Boosting_(machine_learning)"}, + {"enum": ["BOOTSTRAPPING"], "description": "https://en.wikipedia.org/wiki/Bootstrapping_(statistics)"}, + {"enum": ["BRANCH_AND_BOUND"], "description": "https://en.wikipedia.org/wiki/Branch_and_bound"}, + {"enum": ["BREADTH_FIRST_SEARCH"], "description": "https://en.wikipedia.org/wiki/Breadth-first_search"}, + {"enum": ["BRIER_SCORE"], "description": "https://en.wikipedia.org/wiki/Brier_score"}, + {"enum": ["BROOKS_IYENGAR"], "description": "https://en.wikipedia.org/wiki/Brooks%E2%80%93Iyengar_algorithm"}, + {"enum": ["BROWNBOOST"], "description": "https://en.wikipedia.org/wiki/BrownBoost"}, + {"enum": ["C45"], "description": "https://en.wikipedia.org/wiki/C4.5_algorithm"}, + {"enum": ["C50"]}, + {"enum": ["CANONICAL_CORRELATION_ANALYSIS"], "description": "https://en.wikipedia.org/wiki/Canonical_correlation"}, + {"enum": ["CASCADE_CORRELATION_NETWORK"]}, + {"enum": ["CASE_BASED_REASONING"], "description": "https://en.wikipedia.org/wiki/Case-based_reasoning"}, + {"enum": ["CATEGORY_ENCODER"]}, + {"enum": ["CAUSAL_ANALYSIS"], "description": "https://en.wikipedia.org/wiki/Causal_analysis"}, + {"enum": ["CLASSIFIER_CHAINS"], "description": "https://en.wikipedia.org/wiki/Classifier_chains"}, + {"enum": ["CN2"], "description": "https://en.wikipedia.org/wiki/CN2_algorithm"}, + {"enum": ["COBWEB"], "description": "https://en.wikipedia.org/wiki/Cobweb_(clustering)"}, + {"enum": ["COEFFICIENT_OF_DETERMINATION"], "description": "https://en.wikipedia.org/wiki/Coefficient_of_determination"}, + {"enum":["COLUMN_FILTER"], "description": "https://en.wikipedia.org/wiki/Column_filter"}, + {"enum": ["COLOR_SPACE_CONVERSION"], "description": "https://en.wikipedia.org/wiki/Color_space"}, + {"enum": ["COMMITTEE_MACHINE"], "description": "https://en.wikipedia.org/wiki/Committee_machine"}, + {"enum": ["COMPOSITIONAL_PATTERN_PRODUCING_NETWORK"], "description": "https://en.wikipedia.org/wiki/Compositional_pattern-producing_network"}, + {"enum": ["COMPUTER_ALGEBRA"], "description": "https://en.wikipedia.org/wiki/Computer_algebra"}, + {"enum": ["CONDITIONAL_RANDOM_FIELD"], "description": "https://en.wikipedia.org/wiki/Conditional_random_field"}, + {"enum": ["CONTEXTUAL_BANDIT"]}, + {"enum": ["CONVOLUTIONAL_NEURAL_NETWORK"], "description": "https://en.wikipedia.org/wiki/Convolutional_neural_network"}, + {"enum": ["CONVOLUTIONAL_NEURAL_NETWORK_LAYER"], "description": "https://en.wikipedia.org/wiki/Convolutional_neural_network#Convolutional_layer"}, + {"enum": ["COORDINATE_DESCENT"], "description": "https://en.wikipedia.org/wiki/Coordinate_descent"}, + {"enum": ["CORRELATION_CLUSTERING"], "description": "https://en.wikipedia.org/wiki/Correlation_clustering"}, + {"enum": ["CORTICAL_LEARNING"]}, + {"enum": ["COTRAINING"], "description": "https://en.wikipedia.org/wiki/Co-training"}, + {"enum": ["CROSS_ENTROPY"], "description": "https://en.wikipedia.org/wiki/Cross_entropy"}, + {"enum": ["CROSS_ENTROPY_METHOD"], "description": "https://en.wikipedia.org/wiki/Cross-entropy_method"}, + {"enum": ["CROSS_VALIDATION"], "description": "https://en.wikipedia.org/wiki/Cross-validation_(statistics)"}, + {"enum": ["CULTURAL_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Cultural_algorithm"}, + {"enum": ["DATA_CONVERSION"], "description": "https://en.wikipedia.org/wiki/Data_conversion"}, + {"enum": ["DATA_DENORMALIZATION"], "description": "https://en.wikipedia.org/wiki/Denormalization"}, + {"enum": ["DATA_MAPPING"], "description": "https://en.wikipedia.org/wiki/Data_mapping"}, + {"enum": ["DATA_NORMALIZATION"], "description": "https://en.wikipedia.org/wiki/Database_normalization"}, + {"enum": ["DATA_PROFILING"], "description": "https://en.wikipedia.org/wiki/Data_profiling"}, + {"enum": ["DATA_RETRIEVAL"], "description": "Obtaining additional data for augmentation"}, + {"enum": ["DATA_SPLITTING"], "description": "https://en.wikipedia.org/wiki/Training,_test,_and_validation_sets"}, + {"enum": ["DATA_STREAM_CLUSTERING"], "description": "https://en.wikipedia.org/wiki/Data_stream_clustering"}, + {"enum": ["DATA_STREAM_MINING"], "description": "https://en.wikipedia.org/wiki/Data_stream_mining"}, + {"enum": ["DATA_STRUCTURE_ALIGNMENT"], "description": "https://en.wikipedia.org/wiki/Data_structure_alignment"}, + {"enum": ["DBSCAN"], "description": "https://en.wikipedia.org/wiki/DBSCAN"}, + {"enum": ["DECISION_STUMP"], "description": "https://en.wikipedia.org/wiki/Decision_stump"}, + {"enum": ["DECISION_TREE"], "description": "https://en.wikipedia.org/wiki/Decision_tree"}, + {"enum": ["DEEP_BELIEF_NETWORK"], "description": "https://en.wikipedia.org/wiki/Deep_belief_network"}, + {"enum": ["DEEP_FEATURE_SYNTHESIS"], "description": "https://groups.csail.mit.edu/EVO-DesignOpt/groupWebSite/uploads/Site/DSAA_DSM_2015.pdf"}, + {"enum": ["DEEPLOG"], "description": "https://en.wikipedia.org/wiki/Deeplog"}, + {"enum": ["DEEP_NEURAL_NETWORK"], "description": "https://en.wikipedia.org/wiki/Deep_learning#Deep_neural_networks"}, + {"enum": ["DEINTERLACING"], "description": "https://en.wikipedia.org/wiki/Deinterlacing"}, + {"enum": ["DENSE_NEURAL_NETWORK_LAYER"]}, + {"enum": ["DISCRETIZATION"], "description": "https://en.wikipedia.org/wiki/Discretization"}, + {"enum": ["DPLL"], "description": "https://en.wikipedia.org/wiki/DPLL_algorithm"}, + {"enum": ["DROPOUT"], "description": "https://en.wikipedia.org/wiki/Dropout_(neural_networks)"}, + {"enum": ["DYNAMIC_NEURAL_NETWORK"]}, + {"enum": ["DYNAMIC_TIME_WARPING"], "description": "https://en.wikipedia.org/wiki/Dynamic_time_warping"}, + {"enum": ["EAGER_LEARNING"], "description": "https://en.wikipedia.org/wiki/Eager_learning"}, + {"enum": ["ECHO_STATE_NETWORK"], "description": "https://en.wikipedia.org/wiki/Echo_state_network"}, + {"enum": ["ECLAT"]}, + {"enum": ["EDGERANK"], "description": "https://en.wikipedia.org/wiki/EdgeRank"}, + {"enum": ["ELASTIC_NET_REGULARIZATION"], "description": "https://en.wikipedia.org/wiki/Elastic_net_regularization"}, + {"enum": ["ENCODE_BINARY"], "description": "https://en.wikipedia.org/wiki/Binary_code"}, + {"enum": ["ENCODE_ONE_HOT"], "description": "https://en.wikipedia.org/wiki/One-hot"}, + {"enum": ["ENCODE_ORDINAL"]}, + {"enum": ["ENCODE_UNARY"], "description": "https://en.wikipedia.org/wiki/Unary_numeral_system"}, + {"enum": ["EQUI_JOIN"], "description": "https://en.wikipedia.org/wiki/Join_(SQL)#Equi-join"}, + {"enum": ["ENSEMBLE_LEARNING"], "description": "https://en.wikipedia.org/wiki/Ensemble_learning"}, + {"enum": ["EVOLUTIONARY_ACQUISITION_OF_NEURAL_TOPOLOGIES"], "description": "https://en.wikipedia.org/wiki/Evolutionary_acquisition_of_neural_topologies"}, + {"enum": ["EVOLUTIONARY_MULTIMODAL_OPTIMIZATION"], "description": "https://en.wikipedia.org/wiki/Evolutionary_multimodal_optimization"}, + {"enum": ["EXPECTATION_MAXIMIZATION_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm"}, + {"enum": ["EXTENSION_NEURAL_NETWORK"], "description": "https://en.wikipedia.org/wiki/Extension_neural_network"}, + {"enum": ["EXTREME_LEARNING_MACHINE"], "description": "https://en.wikipedia.org/wiki/Extreme_learning_machine"}, + {"enum": ["F1_SCORE"], "description": "https://en.wikipedia.org/wiki/F1_score"}, + {"enum": ["FALSE_NEAREST_NEIGHBOR"], "description": "https://en.wikipedia.org/wiki/False_nearest_neighbor_algorithm"}, + {"enum": ["FASTICA"], "description": "https://en.wikipedia.org/wiki/FastICA"}, + {"enum": ["FEATURE_SCALING"], "description": "https://en.wikipedia.org/wiki/Feature_scaling"}, + {"enum": ["FEEDFORWARD_NEURAL_NETWORK"], "description": "https://en.wikipedia.org/wiki/Feedforward_neural_network"}, + {"enum": ["FELLEGI_SUNTER_ALGORITHM"]}, + {"enum": ["FILE_MANIPULATION"], "description": "https://en.wikipedia.org/wiki/Computer_file"}, + {"enum": ["FISHER_KERNEL"], "description": "https://en.wikipedia.org/wiki/Fisher_kernel"}, + {"enum": ["FLATTEN_NEURAL_NETWORK_LAYER"]}, + {"enum": ["FORWARD_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Forward_algorithm"}, + {"enum": ["FORWARD_BACKWARD_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Forward%E2%80%93backward_algorithm"}, + {"enum": ["FORWARD_DIFFERENCE_CODING"], "description": "https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/#forward"}, + {"enum": ["FRANK_WOLFE_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Frank%E2%80%93Wolfe_algorithm"}, + {"enum": ["FREQUENCY_TRANSFORM"], "description": "https://en.wikipedia.org/wiki/Frequency_domain"}, + {"enum": ["FUZZY_CLUSTERING"], "description": "https://en.wikipedia.org/wiki/Fuzzy_clustering"}, + {"enum": ["GAUSSIAN_BLUR"], "description": "https://en.wikipedia.org/wiki/Gaussian_blur"}, + {"enum": ["GAUSSIAN_PROCESS"], "description": "https://en.wikipedia.org/wiki/Gaussian_process"}, + {"enum": ["GENERALIZED_HEBBIAN_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Generalized_Hebbian_Algorithm"}, + {"enum": ["GENERATIVE_TOPOGRAPHIC_MAP"], "description": "https://en.wikipedia.org/wiki/Generative_topographic_map"}, + {"enum": ["GENETIC_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Genetic_algorithm"}, + {"enum": ["GENETIC_ALGORITHM_FOR_RULE_SET_PRODUCTION"], "description": "https://en.wikipedia.org/wiki/Genetic_Algorithm_for_Rule_Set_Production"}, + {"enum": ["GENETIC_PROGRAMMING"], "description": "https://en.wikipedia.org/wiki/Genetic_programming"}, + {"enum": ["GENETIC_SCALE_RECURRENT_NEURAL_NETWORK"]}, + {"enum": ["GLOVE"], "description": "https://en.wikipedia.org/wiki/GloVe_(machine_learning)"}, + {"enum": ["GRADIENT_BOOSTING"], "description": "https://en.wikipedia.org/wiki/Gradient_boosting"}, + {"enum": ["GRADIENT_DESCENT"], "description": "https://en.wikipedia.org/wiki/Gradient_descent"}, + {"enum": ["GRAPHICAL_LASSO"], "description": "https://en.wikipedia.org/wiki/Graphical_lasso"}, + {"enum": ["GROWING_SELF_ORGANIZING_MAP"], "description": "https://en.wikipedia.org/wiki/Growing_self-organizing_map"}, + {"enum": ["HARD_CLUSTERING"]}, + {"enum": ["HASHING"], "description": "https://en.wikipedia.org/wiki/Hash_function"}, + {"enum": ["HELMERT_CODING"], "description": "https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/#HELMERT"}, + {"enum": ["HEURISTIC"], "description": "https://en.wikipedia.org/wiki/Heuristic"}, + {"enum": ["HIDDEN_MARKOV_MODEL"], "description": "https://en.wikipedia.org/wiki/Hidden_Markov_model"}, + {"enum": ["HIDDEN_SEMI_MARKOV_MODEL"], "description": "https://en.wikipedia.org/wiki/Hidden_semi-Markov_model"}, + {"enum": ["HIERARCHICAL_CLUSTERING"], "description": "https://en.wikipedia.org/wiki/Hierarchical_clustering"}, + {"enum": ["HIERARCHICAL_TEMPORAL_MEMORY"], "description": "https://en.wikipedia.org/wiki/Hierarchical_temporal_memory"}, + {"enum": ["HIGHER_ORDER_SINGULAR_VALUE_DECOMPOSITION"], "description": "https://en.wikipedia.org/wiki/Higher-order_singular_value_decomposition"}, + {"enum": ["HOLDOUT"], "description": "https://en.wikipedia.org/wiki/Cross-validation_(statistics)#Holdout_method"}, + {"enum": ["HOLOGRAPHIC_ASSOCIATIVE_MEMORY"], "description": "https://en.wikipedia.org/wiki/Holographic_associative_memory"}, + {"enum": ["HOPFIELD_NETWORK"], "description": "https://en.wikipedia.org/wiki/Hopfield_network"}, + {"enum": ["HOSHEN_KOPELMAN_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Hoshen%E2%80%93Kopelman_algorithm"}, + {"enum": ["HYPER_BASIS_FUNCTION_NETWORK"], "description": "https://en.wikipedia.org/wiki/Hyper_basis_function_network"}, + {"enum": ["HYPERNEAT"], "description": "https://en.wikipedia.org/wiki/HyperNEAT"}, + {"enum": ["ID3"], "description": "https://en.wikipedia.org/wiki/ID3"}, + {"enum": ["IDENTITY_FUNCTION"], "description": "https://en.wikipedia.org/wiki/Identity_function"}, + {"enum": ["IMAGE_CROPPING"], "description": "https://en.wikipedia.org/wiki/Cropping_(image)"}, + {"enum": ["IMAGE_PADDING"]}, + {"enum": ["IMAGE_ROTATION"]}, + {"enum": ["IMAGE_SCALING"], "description": "https://en.wikipedia.org/wiki/Image_scaling"}, + {"enum": ["IMAGE_TRANSFORM"]}, + {"enum": ["IMAGENET"], "description": "https://en.wikipedia.org/wiki/ImageNet"}, + {"enum": ["IMPUTATION"], "description": "https://en.wikipedia.org/wiki/Imputation_(statistics)"}, + {"enum": ["INDEPENDENT_COMPONENT_ANALYSIS"], "description": "https://en.wikipedia.org/wiki/Independent_component_analysis"}, + {"enum": ["INFORMATION_ENTROPY"], "description": "https://en.wikipedia.org/wiki/Entropy_(information_theory)"}, + {"enum": ["INFORMATION_FUZZY_NETWORKS"], "description": "https://en.wikipedia.org/wiki/Information_fuzzy_networks"}, + {"enum": ["INFORMATION_THEORETIC_METAFEATURE_EXTRACTION"]}, + {"enum": ["INSTANCE_BASED_LEARNING"], "description": "https://en.wikipedia.org/wiki/Instance-based_learning"}, + {"enum": ["INSTANTANEOUSLY_TRAINED_NEURAL_NETWORKS"], "description": "https://en.wikipedia.org/wiki/Instantaneously_trained_neural_networks"}, + {"enum": ["ISOMAP"], "description": "https://en.wikipedia.org/wiki/Isomap"}, + {"enum": ["ITERATIVE_LABELING"], "description": "Algorithms iteratively label unlabeled examples for semi-supervised learning."}, + {"enum": ["IVECTOR_EXTRACTION"], "description": "I-vector extration. Dehak, Najim & Kenny, Patrick & Dehak, R & Dumouchel, Pierre & Ouellet, Pierre. (2011). Front-End Factor Analysis for Speaker Verification. Audio, Speech, and Language Processing, IEEE Transactions on. 19. 788 - 798. 10.1109/TASL.2010.2064307."}, + {"enum": ["JACCARD_INDEX"], "description": "https://en.wikipedia.org/wiki/Jaccard_index"}, + {"enum": ["JUNCTION_TREE_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Junction_tree_algorithm"}, + {"enum": ["K_FOLD"], "description": "https://en.wikipedia.org/wiki/Cross-validation_(statistics)#k-fold_cross-validation"}, + {"enum": ["K_MEANS_CLUSTERING"], "description": "https://en.wikipedia.org/wiki/K-means_clustering"}, + {"enum": ["K_MEANS_PLUS_PLUS"], "description": "https://en.wikipedia.org/wiki/K-means%2B%2B"}, + {"enum": ["K_NEAREST_NEIGHBORS"], "description": "https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm"}, + {"enum": ["K_Q_FLATS"], "description": "https://en.wikipedia.org/wiki/K_q-flats"}, + {"enum": ["K_SVD"], "description": "https://en.wikipedia.org/wiki/K-SVD"}, + {"enum": ["KERNEL_ADAPTIVE_FILTER"], "description": "https://en.wikipedia.org/wiki/Kernel_adaptive_filter"}, + {"enum": ["KERNEL_INDEPENDENT_COMPONENT_ANALYSIS"], "description": "https://en.wikipedia.org/wiki/Kernel-independent_component_analysis"}, + {"enum": ["KERNEL_METHOD"], "description": "https://en.wikipedia.org/wiki/Kernel_method"}, + {"enum": ["KERNEL_PERCEPTRON"], "description": "https://en.wikipedia.org/wiki/Kernel_perceptron"}, + {"enum": ["KERNEL_PRINCIPAL_COMPONENT_ANALYSIS"], "description": "https://en.wikipedia.org/wiki/Kernel_principal_component_analysis"}, + {"enum": ["KERNEL_RANDOM_FOREST"], "description": "https://en.wikipedia.org/wiki/Random_forest#Kernel_random_forest"}, + {"enum": ["LANDMARKING_METAFEATURE_EXTRACTION"]}, + {"enum": ["LARGE_MARGIN_NEAREST_NEIGHBOR"], "description": "https://en.wikipedia.org/wiki/Large_margin_nearest_neighbor"}, + {"enum": ["LASSO"], "description": "https://en.wikipedia.org/wiki/Lasso_(statistics)"}, + {"enum": ["LATENT_DIRICHLET_ALLOCATION"], "description": "https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation"}, + {"enum": ["LATENT_SEMANTIC_ANALYSIS"], "description": "https://en.wikipedia.org/wiki/Latent_semantic_analysis"}, + {"enum": ["LEARNING_USING_PRIVILEGED_INFORMATION"], "description": "Algorithm can leverage privileged information available in training data but absent in test data."}, + {"enum": ["LEARNING_VECTOR_QUANTIZATION"], "description": "https://en.wikipedia.org/wiki/Learning_vector_quantization"}, + {"enum": ["LEAST_SQUARES_SUPPORT_VECTOR_MACHINE"], "description": "https://en.wikipedia.org/wiki/Least_squares_support_vector_machine"}, + {"enum": ["LEAVE_ONE_OUT"], "description": "https://en.wikipedia.org/wiki/Cross-validation_(statistics)#Leave-one-out_cross-validation"}, + {"enum": ["LIGHTGBM"]}, + {"enum": ["LIMITED_MEMORY_BFGS"], "description": "https://en.wikipedia.org/wiki/Limited-memory_BFGS"}, + {"enum": ["LINDE_BUZO_GRAY_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Linde%E2%80%93Buzo%E2%80%93Gray_algorithm"}, + {"enum": ["LINEAR_DISCRIMINANT_ANALYSIS"], "description": "https://en.wikipedia.org/wiki/Linear_discriminant_analysis"}, + {"enum": ["LINEAR_FILTER"], "description": "https://en.wikipedia.org/wiki/Linear_filter"}, + {"enum": ["LINEAR_REGRESSION"], "description": "https://en.wikipedia.org/wiki/Linear_regression"}, + {"enum": ["LOBPCG"], "description": "https://en.wikipedia.org/wiki/LOBPCG"}, + {"enum": ["LOCAL_OUTLIER_FACTOR"], "description": "https://en.wikipedia.org/wiki/Local_outlier_factor"}, + {"enum": ["LOCAL_SEARCH"], "description": "https://en.wikipedia.org/wiki/Local_search_(optimization)"}, + {"enum": ["LOGISTIC_MODEL_TREE"], "description": "https://en.wikipedia.org/wiki/Logistic_model_tree"}, + {"enum": ["LOGISTIC_REGRESSION"], "description": "https://en.wikipedia.org/wiki/Logistic_regression"}, + {"enum": ["LOGITBOOST"], "description": "https://en.wikipedia.org/wiki/LogitBoost"}, + {"enum": ["LONG_SHORT_TERM_MEMORY"], "description": "https://en.wikipedia.org/wiki/Long_short-term_memory"}, + {"enum": ["LOW_RANK_MATRIX_APPROXIMATIONS"], "description": "https://en.wikipedia.org/wiki/Low-rank_matrix_approximations"}, + {"enum": ["LPBOOST"], "description": "https://en.wikipedia.org/wiki/LPBoost"}, + {"enum": ["MAP"], "description": "https://en.wikipedia.org/wiki/Map_(higher-order_function)"}, + {"enum": ["MARGIN_CLASSIFIER"], "description": "https://en.wikipedia.org/wiki/Margin_classifier"}, + {"enum": ["MARGIN_INFUSED_RELAXED_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Margin-infused_relaxed_algorithm"}, + {"enum": ["MARKOV_CHAIN"], "description": "https://en.wikipedia.org/wiki/Markov_chain"}, + {"enum": ["MARKOV_CHAIN_MONTE_CARLO"], "description": "https://en.wikipedia.org/wiki/Markov_chain_Monte_Carlo"}, + {"enum": ["MARKOV_DECISION_PROCESS"], "description": "https://en.wikipedia.org/wiki/Markov_decision_process"}, + {"enum": ["MARKOV_LOGIC_NETWORK"], "description": "https://en.wikipedia.org/wiki/Markov_logic_network"}, + {"enum": ["MARKOV_MODEL"], "description": "https://en.wikipedia.org/wiki/Markov_model"}, + {"enum": ["MARKOV_RANDOM_FIELD"], "description": "https://en.wikipedia.org/wiki/Markov_random_field"}, + {"enum": ["MAX_POOLING_NEURAL_NETWORK_LAYER"]}, + {"enum": ["MEAN_ABSOLUTE_ERROR"], "description": "https://en.wikipedia.org/wiki/Mean_absolute_error"}, + {"enum": ["MEAN_SHIFT"], "description": "https://en.wikipedia.org/wiki/Mean_shift"}, + {"enum": ["MEAN_SQUARED_ERROR"], "description": " https://en.wikipedia.org/wiki/Mean_squared_error"}, + {"enum": ["MEMETIC_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Memetic_algorithm"}, + {"enum": ["MEMORY_PREDICTION_FRAMEWORK"], "description": "https://en.wikipedia.org/wiki/Memory-prediction_framework"}, + {"enum": ["MERSENNE_TWISTER"], "description": "https://en.wikipedia.org/wiki/Mersenne_Twister"}, + {"enum": ["MFCC_FEATURE_EXTRACTION"], "description": "The HTK Book, http://www.dsic.upv.es/docs/posgrado/20/RES/materialesDocentes/alejandroViewgraphs/htkbook.pdf"}, + {"enum": ["MIN_CONFLICTS_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Min-conflicts_algorithm"}, + {"enum": ["MINIMUM_REDUNDANCY_FEATURE_SELECTION"], "description": "https://en.wikipedia.org/wiki/Minimum_redundancy_feature_selection"}, + {"enum": ["MINMAX_SCALER"]}, + {"enum": ["MM_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/MM_algorithm"}, + {"enum": ["MODEL_BASED_METAFEATURE_EXTRACTION"]}, + {"enum": ["MODULAR_NEURAL_NETWORK"], "description": "https://en.wikipedia.org/wiki/Modular_neural_network"}, + {"enum": ["MOMENTUM_CONTRAST"], "description": "Momentum Contrast for Unsupervised Visual Representation Learning, https://arxiv.org/pdf/1911.05722.pdf, He et al. FAIR"}, + {"enum": ["MONTE_CARLO_TREE_SEARCH"], "description": "https://en.wikipedia.org/wiki/Monte_Carlo_tree_search"}, + {"enum": ["MORAVEC_CORNER_DETECTION_ALGORITHM"]}, + {"enum": ["MOTION_COMPENSATION"], "description": "https://en.wikipedia.org/wiki/Motion_compensation"}, + {"enum": ["MULTI_ARMED_BANDIT"], "description": "https://en.wikipedia.org/wiki/Multi-armed_bandit"}, + {"enum": ["MULTICLASS_CLASSIFICATION"], "description": "https://en.wikipedia.org/wiki/Multiclass_classification"}, + {"enum": ["MULTILABEL_CLASSIFICATION"], "description": "https://en.wikipedia.org/wiki/Multi-label_classification"}, + {"enum": ["MULTILAYER_PERCEPTRON"], "description": "https://en.wikipedia.org/wiki/Multilayer_perceptron"}, + {"enum": ["MULTINOMIAL_LOGISTIC_REGRESSION"], "description": "https://en.wikipedia.org/wiki/Multinomial_logistic_regression"}, + {"enum": ["MULTINOMIAL_NAIVE_BAYES"], "description": "http://scikit-learn.org/stable/modules/naive_bayes.html#multinomial-naive-bayes, https://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html"}, + {"enum": ["MULTIPLICATIVE_WEIGHT_UPDATE_METHOD"], "description": "https://en.wikipedia.org/wiki/Multiplicative_weight_update_method"}, + {"enum": ["MULTIVARIATE_REGRESSION"], "description": "https://en.wikipedia.org/wiki/Multi-label_classification"}, + {"enum": ["MUTUAL_INFORMATION"], "description": "https://en.wikipedia.org/wiki/Mutual_information"}, + {"enum": ["N_GRAM"], "description": "https://en.wikipedia.org/wiki/N-gram"}, + {"enum": ["NAIVE_BAYES_CLASSIFIER"], "description": "https://en.wikipedia.org/wiki/Naive_Bayes_classifier"}, + {"enum": ["NEAREST_CENTROID_CLASSIFIER"], "description": "https://en.wikipedia.org/wiki/Nearest_centroid_classifier"}, + {"enum": ["NEIGHBOURHOOD_COMPONENTS_ANALYSIS"], "description": "https://en.wikipedia.org/wiki/Neighbourhood_components_analysis"}, + {"enum": ["NEURAL_NETWORK_BACKPROPAGATION"], "description": "https://en.wikipedia.org/wiki/Backpropagation"}, + {"enum": ["NEURO_FUZZY_NETWORK"], "description": "https://en.wikipedia.org/wiki/Neuro-fuzzy"}, + {"enum": ["NEUROEVOLUTION_OF_AUGMENTED_TOPOLOGIES"], "description": "https://en.wikipedia.org/wiki/Neuroevolution_of_augmenting_topologies"}, + {"enum": ["NOISE_REDUCTION"], "description": "https://en.wikipedia.org/wiki/Noise_reduction"}, + {"enum": ["NONOVERLAPPING_COMMUNITY_DETECTION"]}, + {"enum": ["NORMAL_DISTRIBUTION"], "description": "https://en.wikipedia.org/wiki/Normal_distribution"}, + {"enum": ["NUMERICAL_METHOD"], "description": "https://en.wikipedia.org/wiki/Numerical_method"}, + {"enum": ["ONE_RULE"]}, + {"enum": ["ONE_SHOT_ASSOCIATIVE_MEMORY"]}, + {"enum": ["ONE_SHOT_LEARNING"], "description": "https://en.wikipedia.org/wiki/One-shot_learning"}, + {"enum": ["OPTICS_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/OPTICS_algorithm"}, + {"enum": ["OPTIMISTIC_KNOWLEDGE_GRADIENT"], "description": "https://en.wikipedia.org/wiki/Optimistic_knowledge_gradient"}, + {"enum": ["ORTHOGONAL_POLYNOMIAL_CODING"], "description": "https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/#ORTHOGONAL"}, + {"enum": ["OVERLAPPING_CLUSTERING"]}, + {"enum": ["OVERLAPPING_COMMUNITY_DETECTION"]}, + {"enum": ["PACHINKO_ALLOCATION"], "description": "https://en.wikipedia.org/wiki/Pachinko_allocation"}, + {"enum": ["PAGERANK"], "description": "https://en.wikipedia.org/wiki/PageRank"}, + {"enum": ["PARAMETRIC_TRAJECTORY_MODELING"], "description": "Gish, H. and Ng, K., 1996, October. Parametric trajectory models for speech recognition. In Spoken Language, 1996. ICSLP 96. Proceedings., Fourth International Conference on (Vol. 1, pp. 466-469). IEEE."}, + {"enum": ["PARTIAL_LEAST_SQUARES_REGRESSION"], "description": "https://en.wikipedia.org/wiki/Partial_least_squares_regression"}, + {"enum": ["PARTICLE_SWARM_OPTIMIZATION"], "description": "https://en.wikipedia.org/wiki/Particle_swarm_optimization"}, + {"enum": ["PASSIVE_AGGRESSIVE"], "description": "http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf"}, + {"enum": ["PERCEPTRON"], "description": "https://en.wikipedia.org/wiki/Perceptron"}, + {"enum": ["PHYSICAL_NEURAL_NETWORK"], "description": "https://en.wikipedia.org/wiki/Physical_neural_network"}, + {"enum": ["PIXELATION"], "description": "https://en.wikipedia.org/wiki/Pixelation"}, + {"enum": ["POLYNOMIAL_NEURAL_NETWORK"]}, + {"enum": ["POLYNOMIAL_REGRESSION"], "description": "https://en.wikipedia.org/wiki/Polynomial_regression"}, + {"enum": ["POPULATION_BASED_INCREMENTAL_LEARNING"], "description": "https://en.wikipedia.org/wiki/Population-based_incremental_learning"}, + {"enum": ["PREFRONTAL_CORTEX_BASAL_GANGLIA_WORKING_MEMORY"], "description": "https://en.wikipedia.org/wiki/Prefrontal_cortex_basal_ganglia_working_memory"}, + {"enum": ["PRINCIPAL_COMPONENT_ANALYSIS"], "description": "https://en.wikipedia.org/wiki/Principal_component_analysis"}, + {"enum": ["PROBABILISTIC_DATA_CLEANING"]}, + {"enum": ["PROBABILISTIC_LATENT_SEMANTIC_ANALYSIS"], "description": "https://en.wikipedia.org/wiki/Probabilistic_latent_semantic_analysis"}, + {"enum": ["PROBABILISTIC_NEURAL_NETWORK"], "description": "https://en.wikipedia.org/wiki/Probabilistic_neural_network"}, + {"enum": ["PRUNING"], "description": "https://en.wikipedia.org/wiki/Pruning_(decision_trees)"}, + {"enum": ["PSIPRED"], "description": "https://en.wikipedia.org/wiki/PSIPRED"}, + {"enum": ["Q_LEARNING"], "description": "https://en.wikipedia.org/wiki/Q-learning"}, + {"enum": ["QUADRATIC_DISCRIMINANT_ANALYSIS"], "description": "https://en.wikipedia.org/wiki/Quadratic_classifier#Quadratic_discriminant_analysis"}, + {"enum": ["QUANTUM_NEURAL_NETWORK"], "description": "https://en.wikipedia.org/wiki/Quantum_neural_network"}, + {"enum": ["QUICKPROP"], "description": "https://en.wikipedia.org/wiki/Quickprop"}, + {"enum": ["RADIAL_BASIS_FUNCTION_NETWORK"], "description": "https://en.wikipedia.org/wiki/Radial_basis_function_network"}, + {"enum": ["RANDOM_FOREST"], "description": "https://en.wikipedia.org/wiki/Random_forest"}, + {"enum": ["RANDOM_GRAPH"], "description": "https://en.wikipedia.org/wiki/Random_graph"}, + {"enum": ["RANDOM_PROJECTION"], "description": "https://en.wikipedia.org/wiki/Random_projection"}, + {"enum": ["RANDOM_SUBSPACE_METHOD"], "description": "https://en.wikipedia.org/wiki/Random_subspace_method"}, + {"enum": ["RANDOM_WALK"], "description": "https://en.wikipedia.org/wiki/Random_walk"}, + {"enum": ["RANDOMIZED_WEIGHTED_MAJORITY_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Randomized_weighted_majority_algorithm"}, + {"enum": ["RANKBRAIN"], "description": "https://en.wikipedia.org/wiki/RankBrain"}, + {"enum": ["RANKING_SVM"], "description": "https://en.wikipedia.org/wiki/Ranking_SVM"}, + {"enum": ["RAPIDLY_EXPLORING_RANDOM_TREE"], "description": "https://en.wikipedia.org/wiki/Rapidly-exploring_random_tree"}, + {"enum": ["RECEIVER_OPERATING_CHARACTERISTIC"], "description": "https://en.wikipedia.org/wiki/Receiver_operating_characteristic"}, + {"enum": ["RECURRENT_NEURAL_NETWORK"], "description": "https://en.wikipedia.org/wiki/Recurrent_neural_network"}, + {"enum": ["RECURSIVE_LEAST_SQUARES"], "description": "https://en.wikipedia.org/wiki/Recursive_least_squares_filter"}, + {"enum": ["RECURSIVE_PARTITIONING"], "description": "https://en.wikipedia.org/wiki/Recursive_partitioning"}, + {"enum": ["REGULARIZATION_BY_SPECTRAL_FILTERING"], "description": "https://en.wikipedia.org/wiki/Regularization_by_spectral_filtering"}, + {"enum": ["REGULARIZED_LEAST_SQUARES"], "description": "https://en.wikipedia.org/wiki/Regularized_least_squares"}, + {"enum": ["REGULATORY_FEEDBACK_NETWORK"], "description": "https://en.wikipedia.org/wiki/Regulatory_feedback_network"}, + {"enum": ["REINFORCE_ALGORITHM"]}, + {"enum": ["REJECTION_SAMPLING"], "description": "https://en.wikipedia.org/wiki/Rejection_sampling"}, + {"enum": ["RELATIONAL_ALGEBRA"], "description": "https://en.wikipedia.org/wiki/Relational_algebra"}, + {"enum": ["RELATIONAL_DATA_MINING"], "description": "https://en.wikipedia.org/wiki/Relational_data_mining"}, + {"enum": ["RELIEF"], "description": "https://en.wikipedia.org/wiki/Relief_(feature_selection)"}, + {"enum": ["RESTRICTED_BOLTZMANN_MACHINE"], "description": "https://en.wikipedia.org/wiki/Restricted_Boltzmann_machine"}, + {"enum": ["RETINANET"], "description": "https://arxiv.org/abs/1708.02002"}, + {"enum": ["REVERSE_HELMERT_CODING"], "description": "https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/#reverse"}, + {"enum": ["REVERSE_MONTE_CARLO"], "description": "https://en.wikipedia.org/wiki/Reverse_Monte_Carlo"}, + {"enum": ["RIPPER"], "description": "https://en.wikipedia.org/wiki/Repeated_incremental_pruning_to_produce_error_reduction_(RIPPER)"}, + {"enum": ["ROBUST_PRINCIPAL_COMPONENT_ANALYSIS"], "description": "https://en.wikipedia.org/wiki/Robust_principal_component_analysis"}, + {"enum": ["RPROP"], "description": "https://en.wikipedia.org/wiki/Rprop"}, + {"enum": ["RULE_BASED_MACHINE_LEARNING"], "description": "https://en.wikipedia.org/wiki/Rule-based_machine_learning"}, + {"enum": ["SAMPLE_MERGING"]}, + {"enum": ["SAMPLE_SELECTION"]}, + {"enum": ["SELF_ORGANIZING_MAP"], "description": "https://en.wikipedia.org/wiki/Self-organizing_map"}, + {"enum": ["SEMIDEFINITE_EMBEDDING"], "description": "https://en.wikipedia.org/wiki/Semidefinite_embedding"}, + {"enum": ["SIGNAL_DITHERING"], "description": "https://en.wikipedia.org/wiki/Dither"}, + {"enum": ["SIGNAL_ENERGY"], "description": "https://en.wikipedia.org/wiki/Energy_(signal_processing)"}, + {"enum": ["SIGNAL_TO_NOISE_RATIO"], "description": "https://en.wikipedia.org/wiki/Signal-to-noise_ratio"}, + {"enum": ["SIMULATED_ANNEALING"], "description": "https://en.wikipedia.org/wiki/Simulated_annealing"}, + {"enum": ["SINGULAR_VALUE_DECOMPOSITION"], "description": "https://en.wikipedia.org/wiki/Singular-value_decomposition"}, + {"enum": ["SMOOTHED_ANALYSIS"], "description": "https://en.wikipedia.org/wiki/Smoothed_analysis"}, + {"enum": ["SOFT_CLUSTERING"], "description": "https://en.wikipedia.org/wiki/Fuzzy_clustering"}, + {"enum": ["SOFTMAX_FUNCTION"], "description": "https://en.wikipedia.org/wiki/Softmax_function"}, + {"enum": ["SPARSE_DICTIONARY_LEARNING"], "description": "https://en.wikipedia.org/wiki/Sparse_dictionary_learning"}, + {"enum": ["SPARSE_PCA"], "description": "https://en.wikipedia.org/wiki/Sparse_PCA"}, + {"enum": ["SPECTRAL_CLUSTERING"], "description": "https://en.wikipedia.org/wiki/Spectral_clustering"}, + {"enum": ["SPIKE_AND_SLAB_VARIABLE_SELECTION"], "description": "https://en.wikipedia.org/wiki/Spike-and-slab_variable_selection"}, + {"enum": ["SPIKING_NEURAL_NETWORKS"], "description": "https://en.wikipedia.org/wiki/Spiking_neural_network"}, + {"enum": ["SPRUCE"], "description": "https://gitlab.com/zinkov/spruce/blob/master/README.md"}, + {"enum": ["STATISTICAL_METAFEATURE_EXTRACTION"]}, + {"enum": ["STATISTICAL_MOMENT_ANALYSIS"], "description": "https://en.wikipedia.org/wiki/Moment_(mathematics)"}, + {"enum": ["STOCHASTIC_CHAINS_WITH_MEMORY_OF_VARIABLE_LENGTH"], "description": "https://en.wikipedia.org/wiki/Stochastic_chains_with_memory_of_variable_length"}, + {"enum": ["STOCHASTIC_GRADIENT_DESCENT"], "description": "https://en.wikipedia.org/wiki/Stochastic_gradient_descent"}, + {"enum": ["STOCHASTIC_NEURAL_NETWORK"], "description": "https://en.wikipedia.org/wiki/Stochastic_neural_network"}, + {"enum": ["STRICT_PARTITIONING_CLUSTERING"]}, + {"enum": ["STRICT_PARTITIONING_CLUSTERING_WITH_OUTLIERS"]}, + {"enum": ["STRUCTURED_KNN"], "description": "https://en.wikipedia.org/wiki/Structured_kNN"}, + {"enum": ["STRUCTURED_SPARSITY_REGULARIZATION"], "description": "https://en.wikipedia.org/wiki/Structured_sparsity_regularization"}, + {"enum": ["STRUCTURED_SUPPORT_VECTOR_MACHINE"], "description": "https://en.wikipedia.org/wiki/Structured_support_vector_machine"}, + {"enum": ["SUBSPACE_CLUSTERING"], "description": "https://en.wikipedia.org/wiki/Clustering_high-dimensional_data#Subspace_clustering"}, + {"enum": ["SUM_CODING"]}, + {"enum": ["SUPER_RECURSIVE_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Super-recursive_algorithm"}, + {"enum": ["SUPPORT_VECTOR_MACHINE"], "description": "https://en.wikipedia.org/wiki/Support_vector_machine"}, + {"enum": ["SYMBOLIC_REGRESSION"], "description": "https://en.wikipedia.org/wiki/Symbolic_regression"}, + {"enum": ["T_DISTRIBUTED_STOCHASTIC_NEIGHBOR_EMBEDDING"], "description": "https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding"}, + {"enum": ["TELEMANOM"]}, + {"enum": ["TFIDF"], "description": "https://en.wikipedia.org/wiki/Tf-idf"}, + {"enum": ["TIKHONOV_REGULARIZATION"], "description": "https://en.wikipedia.org/wiki/Tikhonov_regularization"}, + {"enum": ["TIME_DELAY_NEURAL_NETWORK"], "description": "https://en.wikipedia.org/wiki/Time_delay_neural_network"}, + {"enum": ["TRUNCATED_NEWTON_METHOD"], "description": "https://en.wikipedia.org/wiki/Truncated_Newton_method"}, + {"enum": ["TRUNCATED_NORMAL_DISTRIBUTION"], "description": "https://en.wikipedia.org/wiki/Truncated_normal_distribution"}, + {"enum": ["UNIFORM_DISTRIBUTION"], "description": "https://en.wikipedia.org/wiki/Uniform_distribution_(continuous)"}, + {"enum": ["UNIFORM_TIME_SERIES_SEGMENTATION"], "description": "Time-series segmentation into fixed-sized segments (windows, frames)"}, + {"enum": ["UNIT_WEIGHTED_REGRESSION"], "description": "https://en.wikipedia.org/wiki/Unit-weighted_regression"}, + {"enum": ["UNIVARIATE_REGRESSION"], "description": "https://en.wikipedia.org/wiki/Multi-label_classification"}, + {"enum": ["UNIVERSAL_PORTFOLIO_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Universal_portfolio_algorithm"}, + {"enum": ["VARIABLE_ORDER_MARKOV_MODEL"], "description": "https://en.wikipedia.org/wiki/Variable-order_Markov_model"}, + {"enum": ["VARIATIONAL_BAYESIAN_METHODS"], "description": "https://en.wikipedia.org/wiki/Variational_Bayesian_methods"}, + {"enum": ["VARIATIONAL_MESSAGE_PASSING"], "description": "https://en.wikipedia.org/wiki/Variational_message_passing"}, + {"enum": ["VECTOR_AUTOREGRESSION"], "description": "https://en.wikipedia.org/wiki/Vector_autoregression"}, + {"enum": ["VECTOR_QUANTIZATION"], "description": "https://en.wikipedia.org/wiki/Vector_quantization"}, + {"enum": ["VECTORIZATION"], "description": "https://en.wikipedia.org/wiki/Vectorization"}, + {"enum": ["VERSION_SPACE_LEARNING"], "description": "https://en.wikipedia.org/wiki/Version_space_learning"}, + {"enum": ["WAKE_SLEEP_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Wake-sleep_algorithm"}, + {"enum": ["WEIGHTED_MAJORITY_ALGORITHM"], "description": "https://en.wikipedia.org/wiki/Weighted_majority_algorithm_(machine_learning)"}, + {"enum": ["WINNOW"], "description": "https://en.wikipedia.org/wiki/Winnow_(algorithm)"}, + {"enum": ["WORD2VEC"], "description": "https://en.wikipedia.org/wiki/Word2vec"} + ] + }, + "minItems": 1 + }, + "primitive_family": { + "description": "Primitive family describes the high-level purpose/nature of the primitive. Only one value per primitive is possible. Consider splitting a primitive into multiple primitives if this represents a problem for you.", + "oneOf": [ + {"enum": ["REINFORCEMENT"], "description": "Reinforcement Module"}, + {"enum": ["ANOMALY_DETECTION"], "description": "TODS algorithms"}, + {"enum": ["CLASSIFICATION"], "description": "https://en.wikipedia.org/wiki/Statistical_classification"}, + {"enum": ["CLUSTERING"], "description": "https://en.wikipedia.org/wiki/Cluster_analysis"}, + {"enum": ["COLLABORATIVE_FILTERING"], "description": "https://en.wikipedia.org/wiki/Collaborative_filtering"}, + {"enum": ["COMMUNITY_DETECTION"], "description": "https://en.wikipedia.org/wiki/Community_search"}, + {"enum": ["DATA_AUGMENTATION"], "description": "Adding value to base data by adding information derived from internal and external sources."}, + {"enum": ["DATA_CLEANING"], "description": "https://en.wikipedia.org/wiki/Data_cleansing"}, + {"enum": ["DATA_COMPRESSION"], "description": "https://en.wikipedia.org/wiki/Data_compression"}, + {"enum": ["DATA_GENERATION"], "description": "https://en.wikipedia.org/wiki/Data_generating_process"}, + {"enum": ["DATA_PREPROCESSING"], "description": "https://en.wikipedia.org/wiki/Data_pre-processing"}, + {"enum": ["DATA_TRANSFORMATION"], "description": "https://en.wikipedia.org/wiki/Data_transformation"}, + {"enum": ["DATA_VALIDATION"], "description": "https://en.wikipedia.org/wiki/Data_validation"}, + {"enum": ["DATA_WRANGLING"], "description": "https://en.wikipedia.org/wiki/Data_wrangling"}, + {"enum": ["DIGITAL_IMAGE_PROCESSING"], "description": "https://en.wikipedia.org/wiki/Digital_image_processing"}, + {"enum": ["DIGITAL_SIGNAL_PROCESSING"], "description": "https://en.wikipedia.org/wiki/Digital_signal_processing"}, + {"enum": ["DIMENSIONALITY_REDUCTION"], "description": "https://en.wikipedia.org/wiki/Dimensionality_reduction"}, + {"enum": ["EVALUATION"], "description": "Primitives providing validation/evaluation, like cross-validation."}, + {"enum": ["FEATURE_CONSTRUCTION"], "description": "A primitive which creates new features."}, + {"enum": ["FEATURE_EXTRACTION"], "description": "https://en.wikipedia.org/wiki/Feature_extraction"}, + {"enum": ["FEATURE_SELECTION"], "description": "https://en.wikipedia.org/wiki/Feature_selection"}, + {"enum": ["GRAPH_CLUSTERING"]}, + {"enum": ["GRAPH_MATCHING"], "description": "https://en.wikipedia.org/wiki/Graph_matching"}, + {"enum": ["LAYER"], "description": "A primitive which is a neural network layer used in construction of a neural network."}, + {"enum": ["LEARNER"], "description": "A primitive which is a learner/model."}, + {"enum": ["LINK_PREDICTION"]}, + {"enum": ["LOSS_FUNCTION"], "description": "Primitives can take a loss function as an argument. This family of primitives provide such loss functions and they can be passed as an argument to other primitives."}, + {"enum": ["METALEARNING"], "description": "https://en.wikipedia.org/wiki/Meta_learning_(computer_science)"}, + {"enum": ["NATURAL_LANGUAGE_PROCESSING"], "description": "https://en.wikipedia.org/wiki/Natural_language_processing"}, + {"enum": ["NORMALIZATION"]}, + {"enum": ["OBJECT_DETECTION"], "description": "https://en.wikipedia.org/wiki/Object_detection"}, + {"enum": ["OPERATOR"], "description": "A simple mathematical operator."}, + {"enum": ["REGRESSION"], "description": "A primitive which can be used to address regression problems."}, + {"enum": ["SEMISUPERVISED_CLASSIFICATION"]}, + {"enum": ["SEMISUPERVISED_REGRESSION"]}, + {"enum": ["SIMILARITY_MODELING"], "description": "A primitive which attempts to learn or infer a measure of similarity or dissimilarity between pairs of instances."}, + {"enum": ["TIME_SERIES_CLASSIFICATION"], "description": "A primitive which can be used to address classification problems of time-series."}, + {"enum": ["TIME_SERIES_EMBEDDING"], "description": "A fixed-length representation of variable-length time series." }, + {"enum": ["TIME_SERIES_FORECASTING"]}, + {"enum": ["TIME_SERIES_SEGMENTATION"], "description": "A primitive which segments an input time-series into a sequence of discrete segments in order to reveal the underlying properties of its source. https://en.wikipedia.org/wiki/Time-series_segmentation."}, + {"enum": ["VERTEX_CLASSIFICATION"]}, + {"enum": ["VERTEX_NOMINATION"]}, + {"enum": ["VIDEO_PROCESSING"], "description": "https://en.wikipedia.org/wiki/Video_processing"}, + {"enum": ["SCHEMA_DISCOVERY"]}, + {"enum": ["REMOTE_SENSING"]} + ] + }, + "preconditions": { + "type": "array", + "description": "A set of requirements for the data given as an input to this primitive. For example, a primitive may not be able to handle data with missing values.", + "items": { + "oneOf": [ + {"enum": ["NO_MISSING_VALUES"], "description": "The primitive cannot handle missing values."}, + {"enum": ["NO_CATEGORICAL_VALUES"], "description": "The primitive cannot handle categorical values."}, + {"enum": ["NO_NEGATIVE_VALUES"], "description": "The primitive cannot handle negative values."}, + {"enum": ["NO_CONTINUOUS_VALUES"], "description": "The primitive cannot handle continuous values."}, + {"enum": ["NO_JAGGED_VALUES"], "description": "The primitive cannot handle values where different elements of data have different dimensions. Both numpy arrays and pandas support only fixed dimension sizes, but a list of lists could have some sub-lists of a different length to others, or a numpy array of objects where objects are numpy arrays of different sizes."}, + {"enum": ["NO_NESTED_VALUES"], "description": "The primitive cannot handle values where a container value contains nested other values with dimensions. E.g., a Pandas DataFrame having numpy arrays as values. Not just container types have dimensions."} + ] + }, + "minItems": 1 + }, + "effects": { + "type": "array", + "description": "A set of postconditions obtained by the data processed by this primitive. For example, a primitive may remove missing values.", + "items": { + "oneOf":[ + {"enum": ["NO_MISSING_VALUES"], "description": "The primitive removes missing values (e.g., imputation)."}, + {"enum": ["NO_CATEGORICAL_VALUES"], "description": "The primitive removes categorical columns (e.g., label encoder)."}, + {"enum": ["NO_NEGATIVE_VALUES"], "description": "The primitive produces only non-negative values."}, + {"enum": ["NO_CONTINUOUS_VALUES"], "description": "The data produced by this primitive is discretized."}, + {"enum": ["NO_JAGGED_VALUES"], "description": "The primitive produces values with fixed dimension sizes across all elements."}, + {"enum": ["NO_NESTED_VALUES"], "description": "The primitive produces values where a container value does not contain nested any other values with dimensions."} + ] + }, + "minItems": 1 + }, + "hyperparams_to_tune": { + "type": "array", + "description": "A list containing the significant hyper-parameter names of a primitive that should be tuned (for prioritizing hyper-parameter tuning). For instance, if a primitive has 10 hyper-parameters, this metadata may be used to specify the two or three that affect the results the most.", + "items": { + "type": "string" + }, + "minItems": 1 + }, + "outliers": { + "type": "array", + "description": "Number of outliers n sigma away from mean for some list of numbers.", + "items": { + "type": "object", + "properties": { + "n": { + "type": "integer" + }, + "count": { + "type": "integer" + } + }, + "required": [ + "n", + "count" + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "aggregate": { + "type": "object", + "description": "Aggregate metadata about some list of numbers.", + "properties": { + "name": { + "$ref": "#/definitions/name" + }, + "description": { + "$ref": "#/definitions/description" + }, + "count": { + "type": "integer", + "description": "A count of values in the list." + }, + "min": { + "type": "number", + "description": "Minimum value of the list." + }, + "max": { + "type": "number", + "description": "Maximum value of the list." + }, + "mean": { + "type": "number", + "description": "Mean value of the list." + }, + "median": { + "type": "number", + "description": "Median value of the list." + }, + "std": { + "type": "number", + "description": "Unbiased standard deviation value of the list." + }, + "quartile_1": { + "type": "number", + "description": "The 25th percentile value of the list." + }, + "quartile_3": { + "type": "number", + "description": "The 75th percentile value of the list." + }, + "kurtosis": { + "type": "number", + "description": "The unbiased kurtosis of the distribution using Fisher’s definition of kurtosis (kurtosis of normal == 0.0). Normalized by N-1." + }, + "skewness": { + "type": "number", + "description": "The unbiased skew of the distribution." + } + }, + "additionalProperties": true + }, + "python_value": { + "description": "A Python value. Schema allows a value of any type, even not JSON-compatible." + }, + "python_type": { + "description": "A Python type.", + "format": "python-type" + }, + "supported_media_types": { + "allOf": [{"$ref": "#/definitions/media_types"}], + "description": "Which media types a primitive knows how to manipulate." + }, + "timestamp": { + "type": "string", + "description": "A timestamp.", + "anyOf": [ + {"format": "date-time"}, + {"format": "date"} + ] + }, + "problem": { + "type": "object", + "properties": { + "task_keywords": { + "type": "array", + "description": "Keywords describing the task.", + "items": { + "oneOf": [ + {"enum": ["ANOMALY_DETECTION"]}, + {"enum": ["CLASSIFICATION"], "description": "https://en.wikipedia.org/wiki/Statistical_classification"}, + {"enum": ["REGRESSION"], "description": "https://en.wikipedia.org/wiki/Regression_analysis"}, + {"enum": ["CLUSTERING"], "description": "https://en.wikipedia.org/wiki/Cluster_analysis"}, + {"enum": ["LINK_PREDICTION"]}, + {"enum": ["VERTEX_NOMINATION"]}, + {"enum": ["VERTEX_CLASSIFICATION"]}, + {"enum": ["COMMUNITY_DETECTION"], "description": "https://en.wikipedia.org/wiki/Community_search"}, + {"enum": ["GRAPH_MATCHING"], "description": "https://en.wikipedia.org/wiki/Graph_matching"}, + {"enum": ["FORECASTING"]}, + {"enum": ["COLLABORATIVE_FILTERING"], "description": "https://en.wikipedia.org/wiki/Collaborative_filtering"}, + {"enum": ["OBJECT_DETECTION"], "description": "https://en.wikipedia.org/wiki/Object_detection"}, + {"enum": ["SEMISUPERVISED"]}, + {"enum": ["BINARY"]}, + {"enum": ["MULTICLASS"]}, + {"enum": ["MULTILABEL"]}, + {"enum": ["UNIVARIATE"]}, + {"enum": ["MULTIVARIATE"]}, + {"enum": ["OVERLAPPING"]}, + {"enum": ["NONOVERLAPPING"]}, + {"enum": ["TABULAR"]}, + {"enum": ["RELATIONAL"]}, + {"enum": ["NESTED"]}, + {"enum": ["IMAGE"]}, + {"enum": ["AUDIO"]}, + {"enum": ["VIDEO"]}, + {"enum": ["SPEECH"]}, + {"enum": ["TEXT"]}, + {"enum": ["GRAPH"]}, + {"enum": ["MULTIGRAPH"]}, + {"enum": ["TIME_SERIES"]}, + {"enum": ["GROUPED"]}, + {"enum": ["GEOSPATIAL"]}, + {"enum": ["REMOTE_SENSING"], "description": "https://en.wikipedia.org/wiki/Remote_sensing"}, + {"enum": ["LUPI"]}, + {"enum": ["MISSING_METADATA"]} + ] + }, + "minItems": 1 + }, + "performance_metrics": { + "type": "array", + "description": "For which performance metrics to optimize for?", + "items": { + "$ref": "#/definitions/performance_metric" + }, + "minItems": 1 + } + }, + "additionalProperties": true + }, + "problem_inputs": { + "type": "array", + "description": "A list describing input datasets for the problem and associated targets. This list should match the list of inputs to a solution pipeline, in order.", + "items": { + "type": "object", + "description": "A description of an input dataset.", + "properties": { + "dataset_id": { + "allOf": [{"$ref": "#/definitions/id"}], + "description": "An ID of a dataset associated with this input, among known or available datasets. Information which datasets precisely (version, digest, etc.) are inputs should be available elsewhere, e.g., in a pipeline run description, while this ID serves to map problem inputs to those datasets." + }, + "targets": { + "allOf": [{"$ref": "#/definitions/targets"}], + "description": "A list of targets used for this problem from this dataset." + }, + "forecasting_horizon": { + "type": "object", + "description": "In time series forecasting, the problem description can contain additional information about the horizon of forecast.", + "properties": { + "resource_id": { + "$ref": "#/definitions/resource_id" + }, + "column_index": { + "$ref": "#/definitions/column_index" + }, + "column_name": { + "$ref": "#/definitions/column_name" + }, + "horizon_value": { + "type": "number", + "description": "The maximum number of time steps in future the predictions will need to be made, in units of \"time_granularity\" of the referenced column." + } + }, + "required": [ + "resource_id", + "column_index", + "column_name", + "horizon_value" + ], + "additionalProperties": true + }, + "privileged_data": { + "type": "array", + "description": "A list of privileged data columns related to unavailable attributes during testing. These columns do not have data available in the test split of a dataset.", + "items": { + "type": "object", + "properties": { + "privileged_data_index": { + "type": "integer", + "description": "An index of the privileged data column in this list of privileged data columns, 0-based." + }, + "resource_id": { + "$ref": "#/definitions/resource_id" + }, + "column_index": { + "$ref": "#/definitions/column_index" + }, + "column_name": { + "$ref": "#/definitions/column_name" + } + }, + "required": [ + "privileged_data_index", + "resource_id", + "column_index", + "column_name" + ], + "additionalProperties": true + }, + "minItems": 1 + } + }, + "required": [ + "dataset_id" + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "data_augmentation": { + "type": "array", + "description": "Information about internal or external sources of data that can be used to address the challenge of data augmentation.", + "items": { + "type": "object", + "properties": { + "domain": { + "allOf": [{"$ref": "#/definitions/keywords"}], + "description": "The application domain(s) of the source (e.g., government, census, economics)." + }, + "keywords": { + "allOf": [{"$ref": "#/definitions/keywords"}], + "description": "Additional tags that help narrow the search (e.g., housing, household income)." + } + }, + "additionalProperties": true + }, + "minItems": 1 + }, + "resource_id": { + "type": "string" + }, + "column_index": { + "type": "integer", + "description": "An index of the column, 0-based." + }, + "column_name": { + "allOf": [{"$ref": "#/definitions/name"}], + "description": "A name of the column. There are no restrictions on the content, length, it can contain whitespace, and names do not even have to be unique." + }, + "data_reference": { + "type": "string", + "description": "Data reference is a string which identifies an output of a step or a pipeline input and forms a data-flow connection between data available and an input to a step.", + "examples": [ + "steps.0.produce", + "inputs.1" + ] + }, + "context": { + "description": "Context in which a pipeline was run.", + "oneOf": [ + {"enum": ["PRETRAINING"], "description": "Pipeline was run during building/training of the system itself, e.g., during metalearning."}, + {"enum": ["TESTING"], "description": "Pipeline was run during development or testing of the system itself, e.g., during debugging. This is also a default context."}, + {"enum": ["EVALUATION"], "description": "Pipeline was run during evaluation of the system itself, e.g., blind evaluation."}, + {"enum": ["PRODUCTION"], "description": "Pipeline was run during regular (production) operation of the system."} + ] + }, + "users": { + "type": "array", + "description": "A list of users associated with the value.", + "items": { + "type": "object", + "properties": { + "id": { + "allOf": [{"$ref": "#/definitions/id"}], + "description": "Globally unique ID for this user. It can be opaque, but it should identify the same user across sessions. Consider using UUID variant 5 with namespace set to the name of your system and name to an ID in your system's database." + }, + "reason": { + "allOf": [{"$ref": "#/definitions/description"}], + "description": "A natural language description of what the user did to be on the list, e.g., \"Picked a pipeline from a list of pipelines.\"." + }, + "rationale": { + "allOf": [{"$ref": "#/definitions/description"}], + "description": "A natural language description by the user of what the user did, e.g., \"I picked a pipeline because it looks short in comparison with others.\"." + } + }, + "required": [ + "id" + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "container_argument": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "A regular container type output from another step or pipeline's input.", + "enum": ["CONTAINER"] + }, + "data": { + "$ref": "#/definitions/data_reference" + } + }, + "required": [ + "type", + "data" + ], + "additionalProperties": true + }, + "container_arguments": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "A list of regular container type outputs from another steps or pipeline's inputs.", + "enum": ["CONTAINER"] + }, + "data": { + "type": "array", + "items": { + "$ref": "#/definitions/data_reference" + }, + "minItems": 1 + } + }, + "required": [ + "type", + "data" + ], + "additionalProperties": true + }, + "data_argument": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "A singleton output from another step in a pipeline. This means that container's sole element is passed as an argument to the primitive instead of the whole container value.", + "enum": ["DATA"] + }, + "data": { + "$ref": "#/definitions/data_reference" + } + }, + "required": [ + "type", + "data" + ], + "additionalProperties": true + }, + "data_arguments": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "A list of singleton outputs from other steps in a pipeline.", + "enum": ["DATA"] + }, + "data": { + "type": "array", + "items": { + "$ref": "#/definitions/data_reference" + }, + "minItems": 1 + } + }, + "required": [ + "type", + "data" + ], + "additionalProperties": true + }, + "primitive_argument": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "A primitive instance to be passed as a hyper-parameter. A primitive should be part of a pipeline and is identified by its step.", + "enum": ["PRIMITIVE"] + }, + "data": { + "type": "integer", + "description": "0-based index identifying a step of which primitive is used as a value." + } + }, + "required": [ + "type", + "data" + ], + "additionalProperties": true + }, + "primitive_arguments": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "A list of primitive instances to be passed as a hyper-parameter. Primitives should be part of a pipeline and are identified by their step.", + "enum": ["PRIMITIVE"] + }, + "data": { + "type": "array", + "items": { + "type": "integer", + "description": "0-based index identifying a step of which primitive is used as a value." + }, + "minItems": 1 + } + }, + "required": [ + "type", + "data" + ], + "additionalProperties": true + }, + "value_argument": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "A constant value of a hyper-parameter. Each hyper-parameter class knows how to convert its value to a JSON-compatible structure and back.", + "enum": ["VALUE"] + }, + "data": { + "description": "Hyper-parameter value as converted to a JSON-compatible structure by a hyper-parameter class." + } + }, + "required": [ + "type", + "data" + ], + "additionalProperties": true + }, + "arguments": { + "type": "object", + "description": "A mapping between primitive's arguments and their values. Primitive's arguments are passed in turn to primitive's methods which need them. Only those which are specified as kind \"PIPELINE\" in primitive's metadata can be specified here.", + "additionalProperties": false, + "patternProperties": { + "^[A-Za-z][A-Za-z_0-9]*$": { + "oneOf": [ + { + "$ref": "#/definitions/container_argument" + }, + { + "$ref": "#/definitions/container_arguments" + }, + { + "$ref": "#/definitions/data_argument" + } + ] + } + } + }, + "hyperparams": { + "type": "object", + "description": "A mapping between primitive's hyper-parameters and their values.", + "additionalProperties": false, + "patternProperties": { + "^[A-Za-z][A-Za-z_0-9]*([.][A-Za-z][A-Za-z_0-9]*)*$": { + "oneOf": [ + { + "$ref": "#/definitions/container_argument" + }, + { + "$ref": "#/definitions/data_argument" + }, + { + "$ref": "#/definitions/primitive_argument" + }, + { + "$ref": "#/definitions/value_argument" + }, + { + "$ref": "#/definitions/data_arguments" + }, + { + "$ref": "#/definitions/primitive_arguments" + } + ] + } + } + }, + "pipeline_inputs": { + "type": "array", + "description": "Inputs to a pipeline. The order of inputs matter. Inputs are referenced by steps using a data reference.", + "items": { + "type": "object", + "properties": { + "name": { + "$ref": "#/definitions/name" + } + }, + "additionalProperties": true + } + }, + "pipeline_outputs": { + "type": "array", + "description": "Outputs from a pipeline. The order of outputs matter. Each output references an output of a step and in this way makes that step output a pipeline output as well.", + "items": { + "type": "object", + "properties": { + "name": { + "$ref": "#/definitions/name" + }, + "data": { + "$ref": "#/definitions/data_reference" + } + }, + "required": [ + "data" + ], + "additionalProperties": true + } + }, + "pipeline_steps": { + "type": "array", + "description": "Steps defining pipeline's logic.", + "items": { + "type": "object", + "oneOf": [ + { + "properties": { + "type": { + "type": "string", + "description": "A step which runs a primitive.", + "enum": ["PRIMITIVE"] + }, + "primitive": { + "$ref": "#/definitions/primitive_reference" + }, + "arguments": { + "allOf": [{"$ref": "#/definitions/arguments"}], + "description": "Arguments to a primitive as a whole. Not all arguments defined by a primitive have to be specified here. Furthermore, only those which are specified as kind \"PIPELINE\" in primitive's metadata can be specified. Constructor arguments should not be specified here, because they can be automatically created from other information." + }, + "outputs": { + "type": "array", + "description": "A list of produce method names of this primitive which are outputs of this step.", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The name of the primitive's produce method which returns output data available by this primitive step." + } + }, + "required": [ + "id" + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "hyperparams": { + "allOf": [{"$ref": "#/definitions/hyperparams"}], + "description": "Only those hyper-parameters which should be fixed as part of the pipeline should be specified here, e.g., control hyper-parameters. Any hyper-parameter specified here should not be further modified (e.g., tuned). Author of a pipeline decides which hyper-parameters are which, probably based on their semantic type." + }, + "users": { + "$ref": "#/definitions/users" + } + }, + "required": [ + "type", + "primitive" + ] + }, + { + "properties": { + "type": { + "type": "string", + "description": "A step which runs another pipeline.", + "enum": ["SUBPIPELINE"] + }, + "pipeline": { + "allOf": [{"$ref": "#/definitions/pipeline_or_pipeline_reference"}], + "description": "A pipeline to run at this step, of pipelines known to the system." + }, + "inputs": { + "type": "array", + "description": "Mapping between data references available in the context of the outer pipeline to inputs of sub-pipeline, in order.", + "items": { + "type": "object", + "properties": { + "data": { + "allOf": [{"$ref": "#/definitions/data_reference"}], + "description": "Data reference, probably of an output of a step or outer pipeline input, mapped to sub-pipeline's inputs in order." + } + }, + "required": [ + "data" + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "outputs": { + "type": "array", + "description": "Mapping between outputs of a sub-pipeline to names under which they should be exposed as outputs of this step, in order. For example: [{\"id\": \"predictions\"}] would map the first output of a sub-pipeline to a data reference \"steps.X.predictions\" where \"X\" is the step number of a given sub-pipeline step.", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The name used in constructing the step's output data reference. If not provided, this output is skipped." + } + }, + "required": [ + "id" + ], + "additionalProperties": true + }, + "minItems": 1 + } + }, + "required": [ + "type", + "pipeline", + "inputs", + "outputs" + ] + }, + { + "properties": { + "type": { + "type": "string", + "description": "This step is used to represent a pipeline template which can be used to generate full pipelines. Not to be used in the metalearning context. Additional properties to further specify the placeholder constraints are allowed.", + "enum": ["PLACEHOLDER"] + }, + "inputs": { + "type": "array", + "description": "Mapping between data references available in the context of the outer pipeline which can be used as inputs to resulting sub-pipeline, in order. Resulting sub-pipeline does not have to use all the inputs, but it cannot use any other inputs.", + "items": { + "type": "object", + "properties": { + "data": { + "allOf": [{"$ref": "#/definitions/data_reference"}], + "description": "Data reference, probably of an output of a step or outer pipeline input, mapped to resulting sub-pipeline's inputs in order." + } + }, + "required": [ + "data" + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "outputs": { + "type": "array", + "description": "Mapping between outputs of a resulting sub-pipeline to names under which they should be exposed as outputs of this step, in order. For example: [{\"id\": \"predictions\"}] would map the first output of a resulting sub-pipeline to a data reference \"steps.X.predictions\" where \"X\" is the step number of a given placeholder step.", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The name used in constructing the step's output data reference." + } + }, + "required": [ + "id" + ], + "additionalProperties": true + }, + "minItems": 1 + } + }, + "required": [ + "type", + "inputs", + "outputs" + ] + } + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "model_features": { + "type": "array", + "description": "A set of features supported by an underlying model of a primitive.", + "items": { + "enum": [ + "BINARY", + "MULTICLASS", + "MULTILABEL", + "UNIVARIATE", + "MULTIVARIATE", + "OVERLAPPING", + "NONOVERLAPPING" + ] + }, + "minItems": 1 + }, + "primitive_reference": { + "type": "object", + "properties": { + "id": { + "$ref": "#/definitions/id" + }, + "version": { + "$ref": "#/definitions/version" + }, + "python_path": { + "$ref": "#/definitions/python_path" + }, + "name": { + "$ref": "#/definitions/name" + }, + "digest": { + "$ref": "#/definitions/digest" + } + }, + "required": [ + "id", + "version", + "python_path", + "name" + ], + "additionalProperties": true + }, + "file_columns": { + "type": "array", + "description": "When the value is referencing a file with columns (e.g., a CSV file), columns metadata might be known in advance.", + "items": { + "type": "object", + "properties": { + "column_index": { + "$ref": "#/definitions/column_index" + }, + "column_name": { + "$ref": "#/definitions/column_name" + }, + "description": { + "$ref": "#/definitions/description" + }, + "semantic_types": { + "$ref": "#/definitions/semantic_types" + }, + "foreign_key": { + "$ref": "#/definitions/foreign_key" + }, + "boundary_for" : { + "$ref": "#/definitions/boundary_for" + }, + "time_granularity" : { + "$ref": "#/definitions/time_granularity" + } + }, + "required": [ + "column_index", + "column_name" + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "file_columns_count": { + "type": "integer", + "description": "When the value is referencing a file with columns (e.g., a CSV file), number of columns might be known in advance." + }, + "document_reference": { + "description": "A reference to another document.", + "type": "object", + "properties": { + "id": { + "$ref": "#/definitions/id" + }, + "digest": { + "$ref": "#/definitions/digest" + } + }, + "required": [ + "id" + ], + "additionalProperties": true + }, + "pipeline_run_reference": { + "description": "A reference to a pipeline run.", + "type": "object", + "properties": { + "id": { + "$ref": "#/definitions/hash_id" + } + }, + "required": [ + "id" + ], + "additionalProperties": true + }, + "problem_reference": { + "allOf": [{"$ref": "#/definitions/document_reference"}], + "description": "A reference to a problem." + }, + "dataset_reference": { + "allOf": [{"$ref": "#/definitions/document_reference"}], + "description": "A reference to a dataset." + }, + "pipeline_reference": { + "allOf": [{"$ref": "#/definitions/document_reference"}], + "description": "A reference to a pipeline." + }, + "problem_or_problem_reference": { + "anyOf": [ + { + "$ref": "#/definitions/problem_reference" + }, + { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/problem.json" + } + ] + }, + "dataset": { + "allOf": [{"$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/container.json"}], + "description": "A dataset." + }, + "dataset_or_dataset_reference": { + "anyOf": [ + { + "$ref": "#/definitions/dataset_reference" + }, + { + "$ref": "#/definitions/dataset" + } + ] + }, + "pipeline_or_pipeline_reference": { + "anyOf": [ + { + "$ref": "#/definitions/pipeline_reference" + }, + { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json" + } + ] + }, + "datasets": { + "description": "A list of input datasets. The order matters because it is mapped to pipeline inputs.", + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/dataset_or_dataset_reference" + } + }, + "status": { + "description": "Indicates whether a pipeline, or some portion of it, ran successfully. May include a message with more details about the status.", + "type": "object", + "properties": { + "state": { + "type": "string", + "enum": ["SUCCESS", "FAILURE"] + }, + "message": { + "description": "Further information describing the status. Though not required, this is especially helpful in a FAILURE state. It can be or include a stacktrace.", + "type": "string" + } + }, + "required": [ + "state" + ], + "additionalProperties": true + }, + "logging": { + "description": "Python LogRecord entries recorded during a method call. See https://docs.python.org/3/library/logging.html#logging.LogRecord for more information.", + "type": "array", + "minItems": 0, + "items": { + "description": "A Python LogRecord entry. Other custom fields are allowed (Python LogRecord can be extended with custom fields).", + "type": "object", + "properties": { + "name": { + "description": "The name of the logger used to log the event represented by this LogRecord. Note that this name will always have this value, even though it may be emitted by a handler attached to a different (ancestor) logger.", + "type": "string" + }, + "msg": { + "description": "The non-interpolated event description message.", + "type": "string" + }, + "args": { + "description": "Arguments for message interpolation, when JSON-serializable.", + "type": ["object", "array"] + }, + "levelname": { + "description": "Level at which the logging call was made.", + "type": "string" + }, + "levelno": { + "description": "Level at which the logging call was made.", + "type": "integer" + }, + "pathname": { + "description": "The full pathname of the source file where the logging call was made.", + "type": "string" + }, + "filename": { + "description": "Just the filename of the source file where the logging call was made.", + "type": "string" + }, + "module": { + "description": "Python module name where the logging call was made.", + "type": "string" + }, + "exc_text": { + "description": "Python exception and formatted stack trace as text.", + "type": "string" + }, + "exc_type": { + "description": "Python exception type name.", + "type": "string" + }, + "stack_info": { + "description": "Formatted stack trace as text.", + "type": "string" + }, + "lineno": { + "description": "The line number in the source file where the logging call was made.", + "type": "integer" + }, + "funcName": { + "description": "The name of the function or method from which the logging call was made.", + "type": "string" + }, + "created": { + "type": "number" + }, + "msecs": { + "type": "number" + }, + "relativeCreated": { + "type": "number" + }, + "thread": { + "type": "integer" + }, + "threadName": { + "type": "string" + }, + "processName": { + "type": "string" + }, + "process": { + "type": "integer" + }, + "message": { + "description": "The interpolated event description message.", + "type": "string" + }, + "asctime": { + "type": "string" + } + }, + "required": [ + "name", + "msg", + "levelname", + "levelno", + "pathname", + "filename", + "module", + "lineno", + "funcName", + "created", + "msecs", + "relativeCreated", + "message", + "asctime" + ], + "additionalProperties": true + } + }, + "method_call": { + "description": "Information about a method called on the primitive.", + "oneOf": [ + { + "allOf": [ + { + "description": "Any method call except the constructor.", + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the Python method called.", + "not": { + "enum": ["__init__"] + } + }, + "arguments": { + "$ref": "#/definitions/arguments", + "description": "Pipeline arguments to methods are provided in a standard way, but methods can have additional runtime arguments or arguments overriding hyper-parameters for a call. Those are the values have to be explicitly provided here." + } + }, + "required": [ + "name" + ], + "additionalProperties": true + }, + { + "$ref": "#/definitions/method_call_base" + } + ] + }, + { + "allOf": [ + { + "description": "A constructor method call.", + "type": "object", + "properties": { + "name": { + "description": "Name of the Python method called.", + "type": "string", + "enum": ["__init__"] + } + }, + "required": [ + "name" + ], + "not": { + "description": "Arguments to constructor should not be provided, because they are provided by the runtime and are runtime specific (paths to volumes, etc.).", + "required": [ + "arguments" + ] + }, + "additionalProperties": true + }, + { + "$ref": "#/definitions/method_call_base" + } + ] + } + ] + }, + "method_call_base": { + "description": "General information about a single method call, common to all method calls.", + "type": "object", + "properties": { + "logging": { + "$ref": "#/definitions/logging" + }, + "metadata": { + "description": "If the method call returns a container type, we store its metadata.", + "anyOf": [ + { + "type": "object", + "description": "For \"CallResult\", we store metadata under \"value\" key.", + "properties": { + "value": { + "$ref": "#/definitions/metadata_values" + } + } + }, + { + "type": "object", + "description": "For \"MultiCallResult\", keys should match \"values\" names, which are primitive's produce method names", + "additionalProperties": false, + "patternProperties": { + "^produce[A-Za-z_0-9]*$": { + "$ref": "#/definitions/metadata_values" + } + } + } + ] + }, + "status": { + "$ref": "#/definitions/status" + }, + "start": { + "allOf": [{"$ref": "#/definitions/timestamp"}], + "description": "Absolute timestamp of the start of the method call." + }, + "end": { + "allOf": [{"$ref": "#/definitions/timestamp"}], + "description": "Absolute timestamp of the end of the method call." + }, + "calls": { + "description": "The number of additional times this method was called consecutively with the exactly same arguments in same runtime environment. When omitted, this method was called once. When used, the corresponding \"start\" timestamp is recorded before the first method call and the corresponding \"end\" timestamp is recorded after the final method call. This is an optimization allowing the combining of identical consecutive method calls into one record.", + "type": "integer", + "minimum": 1 + }, + "environment": { + "$ref": "#/definitions/runtime_environment" + } + }, + "required": [ + "status", + "start", + "end" + ], + "additionalProperties": true + }, + "metadata_values": { + "description": "This matches the output of \"Metadata.to_json_structure\" method.", + "type": "array", + "items": { + "type": "object", + "properties": { + "selector": { + "type": "array", + "items": { + "type": ["string", "integer"] + }, + "minItems": 0 + }, + "metadata": { + "description": "Metadata associated with the value at \"selector\".", + "type": "object" + } + } + }, + "minItems": 0 + }, + "runtime_environment": { + "description": "A description of the runtime environment, including engine versions, Docker images, compute resources, and benchmarks.", + "type": "object", + "properties": { + "id": { + "description": "A hash ID computed over the whole runtime environment document to allow for faster identification of same runtime environments.", + "allOf": [{"$ref": "#/definitions/hash_id"}] + }, + "worker_id": { + "description": "A globally unique identifier for the machine on which this pipeline run occurred. The idea is that the worker specifies the system inside which the pipeline is run so that multiple runs on the same system can be grouped together.", + "type": "string" + }, + "reference_engine_version": { + "description": "A version of the released d3m core package with the reference engine used to run this pipeline. Provide the version of the released d3m core package even if your engine is subclassing the reference engine. Alternatively, if you are not using a released d3m core package, provide a git commit hash of the d3m core package repository with the reference engine you used.", + "anyOf": [ + { + "$ref": "#/definitions/version" + }, + { + "$ref": "#/definitions/git_commit" + } + ] + }, + "engine_version": { + "description": "A version of your engine used to run this pipeline (or reference engine, if directly using it). This can be useful for the author of the pipeline run to record, but is less useful for others. For others, \"reference_engine_version\" is probably more useful.", + "anyOf": [ + { + "$ref": "#/definitions/version" + }, + { + "$ref": "#/definitions/git_commit" + } + ] + }, + "base_docker_image": { + "description": "If a pipeline is run inside a Docker container which is based on a public image or known base image, then this field should specify that Docker image. I.e., if your system is using a private Docker image but is extending a \"complete\" Docker image, then list the \"complete\" Docker image here.", + "allOf": [{"$ref": "#/definitions/docker_image"}] + }, + "docker_image": { + "description": "If a pipeline is run inside a Docker container, this field should specify the Docker image used to run this pipeline. This can be useful for the author of the pipeline run to record, but is less useful for others. For others, \"base_docker_image\" is probably more useful.", + "allOf": [{"$ref": "#/definitions/docker_image"}] + }, + "resources": { + "$ref": "#/definitions/compute_resources" + }, + "reference_benchmarks": { + "$ref": "#/definitions/reference_benchmarks" + } + }, + "required": [ + "id", + "worker_id" + ] + }, + "pipeline_run_steps": { + "description": "All of the steps invoked in the pipeline run. There is a one-to-one correspondence between this array and the steps in the pipeline.", + "type": "array", + "items": { + "type": "object", + "oneOf": [ + { + "properties": { + "type": { + "type": "string", + "description": "A primitive step.", + "enum": ["PRIMITIVE"] + }, + "hyperparams": { + "allOf": [{"$ref": "#/definitions/hyperparams"}], + "description": "Together with hyper-parameters listed as part of a pipeline they complete all values necessary to instantiate \"hyperparams\" constructor argument of the primitive. All hyper-parameter values have to be listed explicitly, even if the value matches the default value of a hyper-parameter." + }, + "random_seed": { + "description": "Random seed used, if the primitive accepts a random seed.", + "type": "integer" + }, + "method_calls": { + "description": "Information about the methods called on the primitive, in the order called.", + "type": "array", + "items": { + "$ref": "#/definitions/method_call" + }, + "minItems": 1 + }, + "status": { + "$ref": "#/definitions/status" + }, + "start": { + "allOf": [{"$ref": "#/definitions/timestamp"}], + "description": "Absolute timestamp of the start of the execution of the primitive. Execution of the primitive starts with the first method call but it can also include any preparation work not captured by method calls, so timestamp can be sooner than the first method call timestamp." + }, + "end": { + "allOf": [{"$ref": "#/definitions/timestamp"}], + "description": "Absolute timestamp of the end of the execution of the primitive. Execution of the primitive ends with the last method call but it can also include any cleanup work not captured by method calls, so timestamp can be later than the last method call timestamp." + }, + "environment": { + "allOf": [{"$ref": "#/definitions/runtime_environment"}], + "description": "Provided if this step was run in a different runtime environment than the runtime environment specified at a higher level." + } + }, + "$comment": "TODO: Make \"start\" and \"end\" required when the next version of this schema is released.", + "required": [ + "type", + "status" + ], + "not": { + "required": [ + "steps" + ] + } + }, + { + "properties": { + "type": { + "type": "string", + "description": "A sub-pipeline step.", + "enum": ["SUBPIPELINE"] + }, + "steps": { + "allOf": [{"$ref": "#/definitions/pipeline_run_steps"}], + "description": "Steps of a sub-pipeline, recursively." + }, + "status": { + "$ref": "#/definitions/status" + }, + "start": { + "allOf": [{"$ref": "#/definitions/timestamp"}], + "description": "Absolute timestamp of the start of the execution of the sub-pipeline. Execution of the sub-pipeline starts with the execution of the first primitive but it can also include any preparation work not captured by primitive, so timestamp can be sooner than the first primitive timestamp." + }, + "end": { + "allOf": [{"$ref": "#/definitions/timestamp"}], + "description": "Absolute timestamp of the end of the execution of the sub-pipeline. Execution of the sub-pipeline ends with the execution of the last primitive but it can also include any cleanup work not captured by primitive, so timestamp can be later than the last primitive timestamp." + }, + "environment": { + "allOf": [{"$ref": "#/definitions/runtime_environment"}], + "description": "Provided if this step was run in a different runtime environment than the runtime environment specified at a higher level." + }, + "random_seed": { + "$ref": "#/definitions/pipeline_random_seed" + } + }, + "$comment": "TODO: Make \"start\", \"end\", and \"random_seed\" required when the next version of this schema is released.", + "required": [ + "type", + "status" + ], + "not": { + "required": [ + "hyperparams", + "random_seed", + "method_calls" + ] + } + } + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "performance_metric": { + "type": "object", + "properties": { + "metric": { + "type": "string" + }, + "params": { + "type": "object" + } + }, + "required": [ + "metric" + ], + "oneOf": [ + { + "properties": { + "metric": {"enum": ["ACCURACY"]} + } + }, + { + "properties": { + "metric": {"enum": ["PRECISION"]}, + "params": { + "type": "object", + "properties": { + "pos_label": { + "type": "string" + } + }, + "additionalProperties": true + } + } + }, + { + "properties": { + "metric": {"enum": ["RECALL"]}, + "params": { + "type": "object", + "properties": { + "pos_label": { + "type": "string" + } + }, + "additionalProperties": true + } + } + }, + { + "properties": { + "metric": {"enum": ["F1"]}, + "params": { + "type": "object", + "properties": { + "pos_label": { + "type": "string" + } + }, + "additionalProperties": true + } + } + }, + { + "properties": { + "metric": {"enum": ["F1_MICRO"]} + } + }, + { + "properties": { + "metric": {"enum": ["F1_MACRO"]} + } + }, + { + "properties": { + "metric": {"enum": ["ROC_AUC"]} + } + }, + { + "properties": { + "metric": {"enum": ["ROC_AUC_MICRO"]} + } + }, + { + "properties": { + "metric": {"enum": ["ROC_AUC_MACRO"]} + } + }, + { + "properties": { + "metric": {"enum": ["MEAN_SQUARED_ERROR"]} + } + }, + { + "properties": { + "metric": {"enum": ["ROOT_MEAN_SQUARED_ERROR"]} + } + }, + { + "properties": { + "metric": {"enum": ["MEAN_ABSOLUTE_ERROR"]} + } + }, + { + "properties": { + "metric": {"enum": ["R_SQUARED"]} + } + }, + { + "properties": { + "metric": {"enum": ["NORMALIZED_MUTUAL_INFORMATION"]} + } + }, + { + "properties": { + "metric": {"enum": ["JACCARD_SIMILARITY_SCORE"]}, + "params": { + "type": "object", + "properties": { + "pos_label": { + "type": "string" + } + }, + "additionalProperties": true + } + } + }, + { + "properties": { + "metric": {"enum": ["PRECISION_AT_TOP_K"]}, + "params": { + "type": "object", + "properties": { + "k": { + "type": "integer" + } + }, + "additionalProperties": true + } + } + }, + { + "properties": { + "metric": {"enum": ["OBJECT_DETECTION_AVERAGE_PRECISION"]} + } + }, + { + "properties": { + "metric": {"enum": ["HAMMING_LOSS"]} + } + }, + { + "properties": { + "metric": {"enum": ["HITS_AT_K"]}, + "params": { + "type": "object", + "properties": { + "k": { + "type": "integer" + } + }, + "additionalProperties": true + } + } + }, + { + "properties": { + "metric": {"enum": ["MEAN_RECIPROCAL_RANK"]} + } + } + ], + "additionalProperties": true + }, + "targets": { + "type": "array", + "items": { + "type": "object", + "properties": { + "target_index": { + "type": "integer", + "description": "An index of the target in this list of targets, 0-based." + }, + "resource_id": { + "$ref": "#/definitions/resource_id" + }, + "column_index": { + "$ref": "#/definitions/column_index" + }, + "column_name": { + "$ref": "#/definitions/column_name" + }, + "clusters_number": { + "type": "integer", + "description": "The number of clusters to be generated by the solution algorithm (if this information is known apriori)." + } + }, + "required": [ + "target_index", + "resource_id", + "column_index", + "column_name" + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "scores": { + "description": "Scores should match the output of the scoring pipeline.", + "type": "array", + "items": { + "type": "object", + "properties": { + "metric": { + "description": "Description of a metric used. Generally it should match one from the problem description, but it can also be different.", + "anyOf": [ + { + "$ref": "#/definitions/performance_metric" + }, + { + "type": "object", + "properties": { + "metric": { + "description": "A custom metric name. Any custom metric name should match the metric name in the scoring pipeline output.", + "type": "string" + } + }, + "required": [ + "metric" + ], + "additionalProperties": true + } + ] + }, + "value": { + "description": "The value of the scoring metric.", + "type": "number" + }, + "normalized": { + "description": "The normalized value of the scoring metric. Value is from the [0, 1] interval, where higher is better.", + "type": "number" + } + }, + "required": [ + "metric", + "value" + ], + "additionalProperties": true + }, + "minItems": 1 + }, + "predictions": { + "description": "The predictions table generated from the pipeline, including the index column. This follows the MIT Lincoln Labs predictions format. There is a one-to-one correspondence between the header array and the values array.", + "type": "object", + "properties": { + "header": { + "description": "A list of predictions table's column names.", + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/column_name" + } + }, + "values": { + "description": "An array of predictions. Every element of this array is a column of values corresponding to one header element.", + "type": "array", + "minItems": 1, + "items": { + "description": "A single column of values.", + "type": "array", + "minItems": 1, + "items": { + "description": "A single prediction value." + } + } + } + }, + "required": [ + "header", + "values" + ], + "additionalProperties": true + }, + "pipeline_run_results": { + "description": "The predictions of the pipeline and corresponding metric scores.", + "type": "object", + "properties": { + "scores": { + "$ref": "#/definitions/scores" + }, + "predictions": { + "$ref": "#/definitions/predictions" + } + }, + "additionalProperties": true + }, + "additional_pipeline": { + "description": "An auxiliary pipeline used for preparing data or scoring.", + "type": "object", + "properties": { + "pipeline": { + "$ref": "#/definitions/pipeline_or_pipeline_reference" + }, + "steps": { + "$ref": "#/definitions/pipeline_run_steps" + }, + "status": { + "$ref": "#/definitions/status" + }, + "start": { + "$ref": "#/definitions/pipeline_run_start" + }, + "end": { + "$ref": "#/definitions/pipeline_run_end" + }, + "random_seed": { + "$ref": "#/definitions/pipeline_random_seed" + }, + "environment": { + "$ref": "#/definitions/runtime_environment" + } + }, + "$comment": "TODO: Make \"start\", \"end\", and \"random_seed\" required when the next version of this schema is released.", + "required": [ + "pipeline", + "status" + ], + "additionalProperties": true + }, + "pipeline_run": { + "description": "How a pipeline was run and corresponding results.", + "type": "object", + "properties": { + "phase": { + "description": "A string representing the phase with which this pipeline run is associated.", + "anyOf": [ + { + "type": "string", + "enum": [ + "FIT", + "PRODUCE" + ] + }, + { + "type": "string", + "description": "Some other string representing the phase, for non-standard phases." + } + ] + }, + "is_standard_pipeline": { + "description": "Has been this pipeline run as a standard pipeline or not?", + "type": "boolean" + }, + "fold_group": { + "description": "Groups pipeline runs which belong together. E.g., they are part of the same cross-validation evaluation run.", + "type": "object", + "properties": { + "id": { + "$ref": "#/definitions/id" + }, + "fold": { + "description": "The cross-validation fold index. If not part of the cross-validation, this can be set to 0.", + "type": "integer", + "minimum": 0 + } + }, + "required": [ + "id", + "fold" + ], + "additionalProperties": true + }, + "data_preparation": { + "$ref": "#/definitions/additional_pipeline" + }, + "scoring": { + "allOf": [ + { + "$ref": "#/definitions/additional_pipeline" + }, + { + "properties": { + "datasets": { + "$ref": "#/definitions/datasets" + } + } + } + ] + }, + "results": { + "$ref": "#/definitions/pipeline_run_results" + } + }, + "oneOf": [ + { + "allOf": [ + { + "not": { + "required": [ + "data_preparation" + ] + } + }, + { + "not": { + "required": [ + "scoring" + ] + }, + "properties": { + "results": { + "not": { + "required": [ + "scores" + ] + } + } + } + } + ] + }, + { + "required": [ + "data_preparation" + ], + "not": { + "required": [ + "scoring" + ] + }, + "properties": { + "results": { + "not": { + "required": [ + "scores" + ] + } + } + } + }, + { + "properties": { + "scoring": { + "not": { + "required": [ + "datasets" + ] + } + } + }, + "required": [ + "data_preparation", + "scoring" + ] + }, + { + "properties": { + "scoring": { + "required": [ + "datasets" + ] + } + }, + "required": [ + "scoring" + ], + "not": { + "required": [ + "data_preparation" + ] + } + } + ], + "$comment": "TODO: Make \"is_standard_pipeline\" required when the next version of this schema is released.", + "required": [ + "phase" + ], + "additionalProperties": true + }, + "previous_pipeline_run": { + "allOf": [{"$ref": "#/definitions/pipeline_run_reference"}], + "description": "References a pipeline run that occurred immediately before this pipeline run. Used for reproducibility, for example a test run would reference the train run. If it is not provided, it indicates the first pipeline run." + }, + "compute_resources": { + "description": "Compute resources available.", + "type": "object", + "properties": { + "cpu": { + "description": "CPU devices on the worker. If possible, only those available ot the pipeline runtime, otherwise all.", + "type": "object", + "properties": { + "devices": { + "description": "An array of CPU devices.", + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "properties": { + "name": { + "description": "A physical CPU device name.", + "type": "string" + } + }, + "additionalProperties": true + } + }, + "physical_present": { + "description": "The number of physical CPU cores present on the worker, but not necessary fully available the pipeline runtime.", + "type": "integer", + "minimum": 1 + }, + "logical_present": { + "description": "The number of logical CPU cores present on the worker, but not necessary fully available the pipeline runtime.", + "type": "integer", + "minimum": 1 + }, + "configured_available": { + "description": "The amount of CPU resources available to the pipeline runtime in Kubernetes CPU units or equivalent. See https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu for more information.", + "type": "string" + }, + "constraints": { + "description": "Any constraints as found in the cgroups (e.g., inside of a resource limited Docker container).", + "type": "object" + } + }, + "additionalProperties": true + }, + "gpu": { + "description": "GPU devices on the worker. If possible, only those available ot the pipeline runtime, otherwise all.", + "type": "object", + "properties": { + "devices": { + "description": "An array of GPU devices.", + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "properties": { + "name": { + "description": "A GPU device name.", + "type": "string" + }, + "memory": { + "description": "The total GPU memory on this device, in bytes.", + "type": "integer", + "minimum": 1 + } + }, + "additionalProperties": true + } + }, + "total_memory": { + "description": "The total GPU memory over all devices, in bytes, but not necessary fully available the pipeline runtime.", + "type": "integer", + "minimum": 1 + }, + "configured_memory": { + "description": "The amount of GPU memory available to the pipeline runtime in Kubernetes memory units or equivalent. See https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-memory for more information.", + "type": "string" + }, + "constraints": { + "description": "Any constraints as found in the cgroups (e.g., inside of a resource limited Docker container).", + "type": "object" + } + }, + "additionalProperties": true + }, + "memory": { + "description": "Memory devices on the worker. If possible, only those available ot the pipeline runtime, otherwise all.", + "type": "object", + "properties": { + "devices": { + "description": "An array of memory devices.", + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "properties": { + "name": { + "description": "A physical memory device name.", + "type": "string" + }, + "memory": { + "description": "The amount of memory on this device, in bytes.", + "type": "integer", + "minimum": 1 + } + }, + "additionalProperties": true + } + }, + "total_memory": { + "description": "The total memory over all memory devices, in bytes, but not necessary fully available the pipeline runtime.", + "type": "integer", + "minimum": 1 + }, + "configured_memory": { + "description": "The amount of memory available to the pipeline runtime in Kubernetes memory units or equivalent. See https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-memory for more information.", + "type": "string" + }, + "constraints": { + "description": "Any constraints as found in the cgroups (e.g., inside of a resource limited Docker container).", + "type": "object" + } + }, + "additionalProperties": true + } + }, + "additionalProperties": true + }, + "reference_benchmarks": { + "description": "Reference benchmarks are pipeline runs of standard and optional additional benchmark pipelines which should be run on the worker during same or equivalent session so that this pipeline run can be expected to have the same timing characteristics. If it is known that worker configuration has not been changed between sessions, benchmark pipeline runs can be reused.", + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/pipeline_run_reference" + } + }, + "git_commit": { + "description": "A reference to a particular git commit hash.", + "type": "string", + "pattern": "^[a-fA-F0-9]{40}$" + }, + "pipeline_run_start": { + "description": "Absolute timestamp of the start of the run of the pipeline.", + "allOf": [{"$ref": "#/definitions/timestamp"}] + }, + "pipeline_run_end": { + "description": "Absolute timestamp of the end of the run of the pipeline.", + "allOf": [{"$ref": "#/definitions/timestamp"}] + }, + "pipeline_random_seed": { + "type": "integer", + "description": "The main random seed used to run the pipeline." + }, + "pure_primitive": { + "type": "boolean", + "description": "Does a primitive behave as a pure function. Are produced values always the same for same hyper-parameter values, arguments, random seed, and method calls made, including the order of method calls? Are there no side effects (mutations of state outside of primitive's internal state) when running the primitive? If primitive is connecting to Internet or some other resource not controlled by the runtime, then primitive is not pure. If primitive caches files during execution, then primitive is pure despite modifying more than primitive's internal state, given that caching is implemented so that it does not leak information between different runs of a primitive.", + "default": true + }, + "can_use_gpus": { + "type": "boolean", + "description": "Can a primitive use GPUs if available? Caller should control available GPUs to the primitive through \"CUDA_VISIBLE_DEVICES\" environment variable.", + "default": true + }, + "all_distinct_values": { + "description": "All possible distinct non-missing values in a categorical attribute.", + "type": "array", + "minItems": 1 + } + } +} diff --git a/d3m/d3m/metadata/schemas/v0/pipeline.json b/d3m/d3m/metadata/schemas/v0/pipeline.json new file mode 100644 index 0000000..8b5ffe6 --- /dev/null +++ b/d3m/d3m/metadata/schemas/v0/pipeline.json @@ -0,0 +1,56 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "id": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", + "title": "Pipeline description", + "description": "Schema for a description of a pipeline.", + "type": "object", + "properties": { + "schema": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/schema" + }, + "id": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/id" + }, + "digest": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/digest" + }, + "source": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/source" + }, + "created": { + "allOf": [{"$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/timestamp"}], + "description": "A timestamp when was the pipeline was created." + }, + "name": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/name" + }, + "other_names": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/other_names" + }, + "description": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/description" + }, + "users": { + "allOf": [{"$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/users"}], + "description": "A list of users who are associated with the creation of this pipeline." + }, + "inputs": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/pipeline_inputs" + }, + "outputs": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/pipeline_outputs" + }, + "steps": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/pipeline_steps" + } + }, + "required": [ + "id", + "schema", + "created", + "inputs", + "outputs", + "steps" + ], + "additionalProperties": true +} diff --git a/d3m/d3m/metadata/schemas/v0/pipeline_run.json b/d3m/d3m/metadata/schemas/v0/pipeline_run.json new file mode 100644 index 0000000..135c3d5 --- /dev/null +++ b/d3m/d3m/metadata/schemas/v0/pipeline_run.json @@ -0,0 +1,66 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "id": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline_run.json", + "title": "Pipeline run description", + "description": "Schema for a description of one run of a pipeline. Pipeline outputs and scores are recorded. It includes references to input dataset(s), a problem, and a pipeline.", + "type": "object", + "properties": { + "schema": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/schema" + }, + "id": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/hash_id" + }, + "problem": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/problem_or_problem_reference" + }, + "datasets": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/datasets" + }, + "pipeline": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/pipeline_or_pipeline_reference" + }, + "steps": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/pipeline_run_steps" + }, + "status": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/status" + }, + "start": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/pipeline_run_start" + }, + "end": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/pipeline_run_end" + }, + "run": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/pipeline_run" + }, + "context": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/context" + }, + "previous_pipeline_run": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/previous_pipeline_run" + }, + "users": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/users" + }, + "environment": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/runtime_environment" + }, + "random_seed": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/pipeline_random_seed" + } + }, + "$comment": "TODO: Make \"start\", \"end\", and \"random_seed\" required when the next version of this schema is released.", + "required": [ + "schema", + "id", + "datasets", + "pipeline", + "status", + "run", + "context", + "environment" + ], + "additionalProperties": true +} diff --git a/d3m/d3m/metadata/schemas/v0/primitive.json b/d3m/d3m/metadata/schemas/v0/primitive.json new file mode 100644 index 0000000..5003720 --- /dev/null +++ b/d3m/d3m/metadata/schemas/v0/primitive.json @@ -0,0 +1,94 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "id": "https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json", + "title": "Primitive metadata", + "description": "Schema for metadata for primitives.", + "type": "object", + "properties": { + "schema": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/schema" + }, + "id": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/id" + }, + "version": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/version" + }, + "digest": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/digest" + }, + "name": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/name" + }, + "other_names": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/other_names" + }, + "description": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/description" + }, + "python_path": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/python_path" + }, + "original_python_path": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/original_python_path" + }, + "keywords": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/keywords" + }, + "source": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/source" + }, + "installation": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/installation" + }, + "primitive_code": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/primitive_code" + }, + "structural_type": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/structural_type" + }, + "location_uris": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/location_uris" + }, + "algorithm_types": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/algorithm_types" + }, + "primitive_family": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/primitive_family" + }, + "preconditions": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/preconditions" + }, + "effects": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/effects" + }, + "hyperparams_to_tune": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/hyperparams_to_tune" + }, + "supported_media_types": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/supported_media_types" + }, + "model_features": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/model_features" + }, + "pure_primitive": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/pure_primitive" + }, + "can_use_gpus": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/can_use_gpus" + } + }, + "required": [ + "algorithm_types", + "id", + "name", + "original_python_path", + "primitive_code", + "primitive_family", + "python_path", + "schema", + "structural_type", + "version" + ], + "additionalProperties": true +} diff --git a/d3m/d3m/metadata/schemas/v0/problem.json b/d3m/d3m/metadata/schemas/v0/problem.json new file mode 100644 index 0000000..d082e92 --- /dev/null +++ b/d3m/d3m/metadata/schemas/v0/problem.json @@ -0,0 +1,50 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "id": "https://metadata.datadrivendiscovery.org/schemas/v0/problem.json", + "title": "Problem description", + "description": "Schema for problem description.", + "type": "object", + "properties": { + "id": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/id" + }, + "version": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/version" + }, + "name": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/name" + }, + "other_names": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/other_names" + }, + "description": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/description" + }, + "digest": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/digest" + }, + "schema": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/schema" + }, + "source": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/source" + }, + "problem": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/problem" + }, + "inputs": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/problem_inputs" + }, + "data_augmentation": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/data_augmentation" + }, + "location_uris": { + "$ref": "https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json#/definitions/location_uris" + } + }, + "required": [ + "id", + "schema" + ], + "additionalProperties": true +} diff --git a/d3m/d3m/metrics.py b/d3m/d3m/metrics.py new file mode 100644 index 0000000..defe60c --- /dev/null +++ b/d3m/d3m/metrics.py @@ -0,0 +1,1100 @@ +import abc +import itertools +import typing + +import numpy # type: ignore +import pandas # type: ignore +from sklearn import metrics, preprocessing # type: ignore + +from d3m import container, exceptions, utils +from d3m.metadata import problem + +__ALL__ = ('class_map',) + +INDEX_COLUMN = 'd3mIndex' +CONFIDENCE_COLUMN = 'confidence' +RANK_COLUMN = 'rank' +EMPTY_VALUES = {numpy.nan, float('NaN'), ""} + +Truth = typing.TypeVar('Truth', bound=container.DataFrame) +Predictions = typing.TypeVar('Predictions', bound=container.DataFrame) +AllLabels = typing.TypeVar('AllLabels', bound=typing.Mapping[str, typing.Sequence]) + + +class Metric(metaclass=utils.AbstractMetaclass): + @abc.abstractmethod + def score(self, truth: Truth, predictions: Predictions) -> typing.Any: + raise NotImplementedError + + @classmethod + def align(cls, truth: Truth, predictions: Predictions) -> Predictions: + """ + Aligns columns and rows in ``predictions`` to match those in ``truth``. + + It requires that all index values in ``truth`` are present in ``predictions`` + and only those. It requires that any column name in ``truth`` is also + present in ``predictions``. Any additional columns present in ``predictions`` + are pushed to the right. + + Parameters + ---------- + truth: + Truth DataFrame. + predictions: + Predictions DataFrame. + + Returns + ------- + Predictions with aligned rows. + """ + + truth_columns_set = set(truth.columns) + predictions_columns_set = set(predictions.columns) + + if len(truth_columns_set) != len(truth.columns): + raise exceptions.InvalidArgumentValueError("Duplicate column names in predictions.") + if len(predictions_columns_set) != len(predictions.columns): + raise exceptions.InvalidArgumentValueError("Duplicate column names in predictions.") + + columns_diff = truth_columns_set - predictions_columns_set + if columns_diff: + raise exceptions.InvalidArgumentValueError(f"Not all columns which exist in truth exist in predictions: {sorted(columns_diff)}") + + if INDEX_COLUMN not in truth.columns: + raise exceptions.InvalidArgumentValueError(f"Index column '{INDEX_COLUMN}' is missing in truth.") + if INDEX_COLUMN not in predictions.columns: + raise exceptions.InvalidArgumentValueError(f"Index column '{INDEX_COLUMN}' is missing in predictions.") + + extra_predictions_columns = [column for column in predictions.columns if column not in truth_columns_set] + + # Reorder columns. + predictions = predictions.reindex(columns=list(truth.columns) + extra_predictions_columns) + + truth_index_set = set(truth.loc[:, INDEX_COLUMN]) + predictions_index_set = set(predictions.loc[:, INDEX_COLUMN]) + + if truth_index_set != predictions_index_set: + raise exceptions.InvalidArgumentValueError(f"Predictions and truth do not have the same set of index values.") + + truth_index_map: typing.Dict = {} + last_index = None + for i, index in enumerate(truth.loc[:, INDEX_COLUMN]): + if index in truth_index_map: + if last_index != index: + raise exceptions.InvalidArgumentValueError(f"Truth does not have all rows with same index value grouped together.") + else: + truth_index_map[index] = i + last_index = index + + predictions_index_order = [] + for index in predictions.loc[:, INDEX_COLUMN]: + predictions_index_order.append(truth_index_map[index]) + + # Reorder rows. + # TODO: How to not use a special column name? + # Currently it will fail if "__row_order__" already exists. We could set "allow_duplicates", but that would just hide + # the fact that we have a duplicated column. How can we then control over which one we really sort and which one we drop? + predictions.insert(0, '__row_order__', predictions_index_order) + predictions.sort_values(['__row_order__'], axis=0, inplace=True, kind='mergesort') + predictions.drop('__row_order__', axis=1, inplace=True) + predictions.reset_index(drop=True, inplace=True) + + return predictions + + @classmethod + def get_target_columns(cls, dataframe: pandas.DataFrame) -> pandas.DataFrame: + """ + Returns only target columns present in ``dataframe``. + """ + + columns = list(dataframe.columns) + + index_columns = columns.count(INDEX_COLUMN) + if index_columns < 1: + raise exceptions.InvalidArgumentValueError(f"Index column '{INDEX_COLUMN}' is missing in predictions.") + elif index_columns > 1: + raise exceptions.InvalidArgumentValueError(f"Predictions contain multiple index columns '{INDEX_COLUMN}': {index_columns}") + + dataframe = dataframe.drop(columns=[INDEX_COLUMN]) + + confidence_columns = columns.count(CONFIDENCE_COLUMN) + if confidence_columns > 1: + raise exceptions.InvalidArgumentValueError(f"Predictions contain multiple confidence columns '{CONFIDENCE_COLUMN}': {confidence_columns}") + elif confidence_columns: + dataframe = dataframe.drop(columns=[CONFIDENCE_COLUMN]) + + rank_columns = columns.count(RANK_COLUMN) + if rank_columns > 1: + raise exceptions.InvalidArgumentValueError(f"Predictions contain multiple rank columns '{RANK_COLUMN}': {rank_columns}") + elif rank_columns: + dataframe = dataframe.drop(columns=[RANK_COLUMN]) + + if not len(dataframe.columns): + raise exceptions.InvalidArgumentValueError(f"Predictions do not contain any target columns.") + + return dataframe + + @classmethod + def get_index_column(cls, dataframe: pandas.DataFrame) -> pandas.DataFrame: + """ + Returns only index column present in ``dataframe``. + """ + + columns = list(dataframe.columns) + + index_columns = columns.count(INDEX_COLUMN) + if index_columns < 1: + raise exceptions.InvalidArgumentValueError(f"Index column '{INDEX_COLUMN}' is missing in predictions.") + elif index_columns > 1: + raise exceptions.InvalidArgumentValueError(f"Predictions contain multiple index columns '{INDEX_COLUMN}': {index_columns}") + + return dataframe.loc[:, [INDEX_COLUMN]] + + @classmethod + def get_confidence_column(cls, dataframe: pandas.DataFrame) -> pandas.DataFrame: + """ + Returns only confidence column present in ``dataframe``. + """ + + columns = list(dataframe.columns) + + confidence_columns = columns.count(CONFIDENCE_COLUMN) + if confidence_columns < 1: + raise exceptions.InvalidArgumentValueError(f"Confidence column '{CONFIDENCE_COLUMN}' is missing in predictions.") + elif confidence_columns > 1: + raise exceptions.InvalidArgumentValueError(f"Predictions contain multiple confidence columns '{CONFIDENCE_COLUMN}': {confidence_columns}") + + return dataframe.loc[:, [CONFIDENCE_COLUMN]] + + @classmethod + def get_rank_column(cls, dataframe: pandas.DataFrame) -> pandas.DataFrame: + """ + Returns only rank column present in ``dataframe``. + """ + + columns = list(dataframe.columns) + + rank_columns = columns.count(RANK_COLUMN) + if rank_columns < 1: + raise exceptions.InvalidArgumentValueError(f"Rank column '{RANK_COLUMN}' is missing in predictions.") + elif rank_columns > 1: + raise exceptions.InvalidArgumentValueError(f"Predictions contain multiple rank columns '{RANK_COLUMN}': {rank_columns}") + + return dataframe.loc[:, [RANK_COLUMN]] + + @classmethod + def vectorize_columns(cls, dataframe: pandas.DataFrame) -> pandas.DataFrame: + """ + For every non-index column, convert all values in rows belonging to the + same index to one row with value being a tuple of values. The order of values + in a tuple follows the order of original rows and is preserved between columns. + """ + + columns_set = set(dataframe.columns) + + if len(columns_set) != len(dataframe.columns): + raise exceptions.InvalidArgumentValueError("Duplicate column names.") + + if INDEX_COLUMN not in dataframe.columns: + raise exceptions.InvalidArgumentValueError(f"Index column '{INDEX_COLUMN}' is missing.") + + columns_without_index = [column_name for column_name in dataframe.columns if column_name != INDEX_COLUMN] + + rows = {} + for index_value in dataframe.loc[:, INDEX_COLUMN].unique(): + rows[index_value] = { + # When we have multiple columns, some of them might not have values for all rows, + # and there are more rows because some other column needs them. In such case + # the column with less values should put an empty value in those extra rows + # (generally an empty string). + column_name: tuple(v for v in dataframe.loc[dataframe[INDEX_COLUMN] == index_value, column_name] if not cls.is_empty_value(v)) + for column_name in columns_without_index + } + + output = pandas.DataFrame.from_dict(rows, orient='index', columns=columns_without_index) + output.index.set_names([INDEX_COLUMN], inplace=True) + output.reset_index(inplace=True) + + return output + + @classmethod + def is_empty_value(cls, v: typing.Any) -> bool: + return v in EMPTY_VALUES or (isinstance(v, (float, numpy.float64, numpy.float32)) and numpy.isnan(v)) + + @classmethod + def one_hot_encode_target(cls, series: pandas.Series, all_labels: typing.Sequence) -> pandas.DataFrame: + """ + Returns one-hot-encoded dataframe where the columns are the labels of the target column, + which is provided as a series of tuples, where each tuple contains all labels of a + given sample. + """ + + mlb = preprocessing.MultiLabelBinarizer(all_labels) + encoded = mlb.fit_transform(series) + + return encoded + + @classmethod + def one_hot_encode_confidence(cls, series: pandas.Series, all_labels: typing.Sequence) -> pandas.DataFrame: + """ + Returns one-hot-encoded dataframe where the columns are the labels of the confidence column, + which is provided as a series of tuples, where each tuple contains confidence for all labels + of a given sample, ordered in order specified by ``labels``. + + Returned dataframe has instead of 0 or 1, a confidence value itself. + """ + + encoded = series.apply(pandas.Series) + encoded.columns = all_labels + + return encoded + + +class _AllAsMultiLabelBase(Metric): + def __init__(self, all_labels: AllLabels = None) -> None: + self.all_labels = all_labels + + def encode_targets(self, truth: Truth, predictions: Predictions) -> typing.Sequence[typing.Tuple[pandas.DataFrame, pandas.DataFrame, typing.Sequence]]: + truth_vectorized = self.vectorize_columns(truth) + predictions_vectorized = self.vectorize_columns(predictions) + + predictions_vectorized = self.align(truth_vectorized, predictions_vectorized) + + truth_targets = self.get_target_columns(truth_vectorized) + predictions_targets = self.get_target_columns(predictions_vectorized) + + if len(truth_targets.columns) != len(predictions_targets.columns): + raise exceptions.InvalidArgumentValueError(f"The number of target columns in truth ({len(truth_targets.columns)}) and predictions ({len(predictions_targets.columns)}) do not match.") + + truth_targets_columns_set = set(truth_targets.columns) + + # This holds from checks in "align". + assert truth_targets_columns_set == set(predictions_targets.columns), (truth_targets.columns, predictions_targets.columns) + + result = [] + for column in truth_targets.columns: + # We know that column names are unique because we check in "align". + truth_target = truth_targets[column] + predictions_target = predictions_targets[column] + + truth_target_values_set = set(itertools.chain.from_iterable(truth_target)) + predictions_target_values_set = set(itertools.chain.from_iterable(predictions_target)) + + # If all labels were provided. + if self.all_labels is not None and column in self.all_labels: + all_labels_set = set(self.all_labels[column]) + + extra_truth_target_values_set = truth_target_values_set - all_labels_set + if extra_truth_target_values_set: + raise exceptions.InvalidArgumentValueError(f"Truth contains extra labels: {sorted(extra_truth_target_values_set)}") + + extra_predictions_target_values_set = predictions_target_values_set - all_labels_set + if extra_predictions_target_values_set: + raise exceptions.InvalidArgumentValueError(f"Predictions contain extra labels: {sorted(extra_predictions_target_values_set)}") + + # Otherwise we infer all labels from available data. + else: + all_labels_set = truth_target_values_set | predictions_target_values_set + + all_labels = sorted(all_labels_set) + + truth_target_encoded = self.one_hot_encode_target(truth_target, all_labels) + predictions_target_encoded = self.one_hot_encode_target(predictions_target, all_labels) + + result.append((truth_target_encoded, predictions_target_encoded, all_labels)) + + return result + + def score(self, truth: Truth, predictions: Predictions) -> float: + # We encode all as multi-label. + encoded_targets = self.encode_targets(truth, predictions) + + if not encoded_targets: + raise exceptions.InvalidArgumentValueError("No target column.") + + scores = [] + for truth_target_encoded, predictions_target_encoded, labels in encoded_targets: + scores.append(self.score_one(truth_target_encoded, predictions_target_encoded, labels)) + + return float(numpy.mean(scores)) + + @abc.abstractmethod + def score_one(self, truth_target_encoded: pandas.DataFrame, predictions_target_encoded: pandas.DataFrame, all_labels: typing.Sequence) -> float: + raise NotImplementedError + + +class _MultiTaskBase(Metric): + def score(self, truth: Truth, predictions: Predictions) -> float: + predictions = self.align(truth, predictions) + + truth_targets = self.get_target_columns(truth) + predictions_targets = self.get_target_columns(predictions) + + if len(truth_targets.columns) != len(predictions_targets.columns): + raise exceptions.InvalidArgumentValueError(f"The number of target columns in truth ({len(truth_targets.columns)}) and predictions ({len(predictions_targets.columns)}) do not match.") + + if not len(truth_targets.columns): + raise exceptions.InvalidArgumentValueError("No target column.") + + # This holds from checks in "align". + assert set(truth_targets.columns) == set(predictions_targets.columns), (truth_targets.columns, predictions_targets.columns) + + scores = [] + for column in truth_targets.columns: + # We know that column names are unique because we check in "align". + truth_target = truth_targets[column] + predictions_target = predictions_targets[column] + + scores.append(self.score_one(truth_target, predictions_target)) + + return float(numpy.mean(scores)) + + @abc.abstractmethod + def score_one(self, truth_target: pandas.Series, predictions_target: pandas.Series) -> float: + raise NotImplementedError + + +class AccuracyMetric(_AllAsMultiLabelBase): + """ + Supports binary, multi-class, multi-label, and multi-task predictions. + """ + + def score_one(self, truth_target_encoded: pandas.DataFrame, predictions_target_encoded: pandas.DataFrame, all_labels: typing.Sequence) -> float: + return metrics.accuracy_score(truth_target_encoded, predictions_target_encoded) + + +class PrecisionMetric(_MultiTaskBase): + """ + Supports binary and multi-task predictions. + """ + + def __init__(self, pos_label: str) -> None: + self.pos_label = pos_label + + def score_one(self, truth_target: pandas.Series, predictions_target: pandas.Series) -> float: + # We do not have to pass labels because we are using binary average. + return metrics.precision_score(truth_target, predictions_target, pos_label=self.pos_label, average='binary') + + +class RecallMetric(_MultiTaskBase): + """ + Supports binary and multi-task predictions. + """ + + def __init__(self, pos_label: str) -> None: + self.pos_label = pos_label + + def score_one(self, truth_target: pandas.Series, predictions_target: pandas.Series) -> float: + # We do not have to pass labels because we are using binary average. + return metrics.recall_score(truth_target, predictions_target, pos_label=self.pos_label, average='binary') + + +class F1Metric(_MultiTaskBase): + """ + Supports binary and multi-task predictions. + """ + + def __init__(self, pos_label: str) -> None: + self.pos_label = pos_label + + def score_one(self, truth_target: pandas.Series, predictions_target: pandas.Series) -> float: + # We do not have to pass labels because we are using binary average. + return metrics.f1_score(truth_target, predictions_target, pos_label=self.pos_label, average='binary') + + +class F1MicroMetric(_AllAsMultiLabelBase): + """ + Supports multi-class, multi-label, and multi-task predictions. + """ + + def score_one(self, truth_target_encoded: pandas.DataFrame, predictions_target_encoded: pandas.DataFrame, all_labels: typing.Sequence) -> float: + # We use multi-label F1 score to compute for multi-class target as well. + # We want to use all labels, so we do not pass labels on. + return metrics.f1_score(truth_target_encoded, predictions_target_encoded, average='micro') + + +class F1MacroMetric(_AllAsMultiLabelBase): + """ + Supports multi-class, multi-label, and multi-task predictions. + """ + + def score_one(self, truth_target_encoded: pandas.DataFrame, predictions_target_encoded: pandas.DataFrame, all_labels: typing.Sequence) -> float: + # We use multi-label F1 score to compute for multi-class target as well. + # We want to use all labels, so we do not pass labels on. + return metrics.f1_score(truth_target_encoded, predictions_target_encoded, average='macro') + + +class MeanSquareErrorMetric(Metric): + """ + Supports univariate and multivariate. + """ + + def score(self, truth: Truth, predictions: Predictions) -> float: + predictions = self.align(truth, predictions) + + truth_targets = self.get_target_columns(truth) + predictions_targets = self.get_target_columns(predictions) + + return metrics.mean_squared_error(truth_targets, predictions_targets, multioutput='uniform_average') + + +class RootMeanSquareErrorMetric(Metric): + """ + Supports univariate and multivariate. + """ + + def score(self, truth: Truth, predictions: Predictions) -> float: + predictions = self.align(truth, predictions) + + truth_targets = self.get_target_columns(truth) + predictions_targets = self.get_target_columns(predictions) + + mean_squared_error = metrics.mean_squared_error(truth_targets, predictions_targets, multioutput='raw_values') + + return float(numpy.mean(numpy.sqrt(mean_squared_error))) + + +class MeanAbsoluteErrorMetric(Metric): + """ + Supports univariate and multivariate. + """ + + def score(self, truth: Truth, predictions: Predictions) -> float: + predictions = self.align(truth, predictions) + + truth_targets = self.get_target_columns(truth) + predictions_targets = self.get_target_columns(predictions) + + return metrics.mean_absolute_error(truth_targets, predictions_targets, multioutput='uniform_average') + + +class RSquaredMetric(Metric): + """ + Supports univariate and multivariate. + """ + + def score(self, truth: Truth, predictions: Predictions) -> float: + predictions = self.align(truth, predictions) + + truth_targets = self.get_target_columns(truth) + predictions_targets = self.get_target_columns(predictions) + + return metrics.r2_score(truth_targets, predictions_targets, multioutput='uniform_average') + + +class NormalizeMutualInformationMetric(Metric): + def score(self, truth: Truth, predictions: Predictions) -> float: + predictions = self.align(truth, predictions) + + truth_targets = self.get_target_columns(truth) + predictions_targets = self.get_target_columns(predictions) + + if len(truth_targets.columns) != len(predictions_targets.columns): + raise exceptions.InvalidArgumentValueError(f"The number of target columns in truth ({len(truth_targets.columns)}) and predictions ({len(predictions_targets.columns)}) do not match.") + + if len(truth_targets.columns) != 1: + raise exceptions.InvalidArgumentValueError("Only one target column is supported.") + + return metrics.normalized_mutual_info_score(truth_targets.iloc[:, 0].ravel(), predictions_targets.iloc[:, 0].ravel(), average_method='geometric') + + +class JaccardSimilarityScoreMetric(_MultiTaskBase): + """ + Supports binary and multi-task predictions. + """ + + def __init__(self, pos_label: str) -> None: + self.pos_label = pos_label + + def score_one(self, truth_target: pandas.Series, predictions_target: pandas.Series) -> float: + # We do not have to pass labels because we are using binary average. + return metrics.jaccard_score(truth_target, predictions_target, pos_label=self.pos_label, average='binary') + + +class PrecisionAtTopKMetric(Metric): + def __init__(self, k: int) -> None: + self.k = k + + def score(self, truth: Truth, predictions: Predictions) -> float: + predictions = self.align(truth, predictions) + + truth_targets = self.get_target_columns(truth) + predictions_targets = self.get_target_columns(predictions) + + if len(truth_targets.columns) != len(predictions_targets.columns): + raise exceptions.InvalidArgumentValueError(f"The number of target columns in truth ({len(truth_targets.columns)}) and predictions ({len(predictions_targets.columns)}) do not match.") + + if len(truth_targets.columns) != 1: + raise exceptions.InvalidArgumentValueError("Only one target column is supported.") + + truth_targets = truth_targets.values.ravel().astype(int) + predictions_targets = predictions_targets.values.ravel().astype(int) + + truth_targets = numpy.argsort(truth_targets)[::-1] + predictions_targets = numpy.argsort(predictions_targets)[::-1] + + truth_targets = truth_targets[0:self.k] + predictions_targets = predictions_targets[0:self.k] + + return numpy.float(len(numpy.intersect1d(truth_targets, predictions_targets))) / self.k + + +class ObjectDetectionAveragePrecisionMetric(Metric): + def _convert_bounding_polygon_to_box_coords(self, bounding_polygon: typing.List) -> typing.List: + # box_coords = [x_min, y_min, x_max, y_max] + if len(bounding_polygon) != 8: + raise exceptions.NotSupportedError("Polygon must contain eight vertices for this metric.") + + if bounding_polygon[0] != bounding_polygon[2] or bounding_polygon[4] != bounding_polygon[6]: + raise exceptions.NotSupportedError("X coordinates in bounding box do not match.") + + if bounding_polygon[1] != bounding_polygon[7] or bounding_polygon[3] != bounding_polygon[5]: + raise exceptions.NotSupportedError("Y coordinates in bounding box do not match.") + + box_coords = [bounding_polygon[0], bounding_polygon[1], + bounding_polygon[4], bounding_polygon[5]] + return box_coords + + def _group_gt_boxes_by_image_name(self, gt_boxes: typing.List) -> typing.Dict: + gt_dict: typing.Dict = {} + + for box in gt_boxes: + image_name = box[0] + bounding_polygon = box[1:] + bbox = self._convert_bounding_polygon_to_box_coords(bounding_polygon) + + if image_name not in gt_dict.keys(): + gt_dict[image_name] = [] + + gt_dict[image_name].append({'bbox': bbox}) + + return gt_dict + + def _voc_ap(self, rec: numpy.ndarray, prec: numpy.ndarray) -> float: + # First append sentinel values at the end. + mrec = numpy.concatenate(([0.], rec, [1.])) + mpre = numpy.concatenate(([0.], prec, [0.])) + + # Compute the precision envelope. + for i in range(mpre.size - 1, 0, -1): + mpre[i - 1] = numpy.maximum(mpre[i - 1], mpre[i]) + + # To calculate area under PR curve, look for points + # where X axis (recall) changes value. + i = numpy.where(mrec[1:] != mrec[:-1])[0] + + # And sum (\Delta recall) * prec. + ap = numpy.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) + + return float(ap) + + def _object_detection_average_precision(self, y_true: typing.List, y_pred: typing.List) -> float: + """ + This function takes a list of ground truth bounding polygons (rectangles in this case) + and a list of detected bounding polygons (also rectangles) for a given class and + computes the average precision of the detections with respect to the ground truth polygons. + Parameters: + ----------- + y_true: list + List of ground truth polygons. Each polygon is represented as a list of + vertices, starting in the upper-left corner going counter-clockwise. + Since in this case, the polygons are rectangles, they will have the + following format: + [image_name, x_min, y_min, x_min, y_max, x_max, y_max, x_max, y_min]. + y_pred: list + List of bounding box polygons with their corresponding confidence scores. Each + polygon is represented as a list of vertices, starting in the upper-left corner + going counter-clockwise. Since in this case, the polygons are rectangles, they + will have the following format: + [image_name, x_min, y_min, x_min, y_max, x_max, y_max, x_max, y_min, confidence_score]. + Returns: + -------- + ap: float + Average precision between detected polygons (rectangles) and the ground truth polylgons (rectangles). + (it is also the area under the precision-recall curve). + Example 1: + >> predictions_list_1 = [['img_00001.png', 110, 110, 110, 210, 210, 210, 210, 110, 0.6], + ['img_00002.png', 5, 10, 5, 20, 20, 20, 20, 10, 0.9], + ['img_00002.png', 120, 130, 120, 200, 200, 200, 200, 130, 0.6]] + >> ground_truth_list_1 = [['img_00001.png', 100, 100, 100, 200, 200, 200, 200, 100], + ['img_00002.png', 10, 10, 10, 20, 20, 20, 20, 10], + ['img_00002.png', 70, 80, 70, 150, 140, 150, 140, 80]] + >> ap_1 = object_detection_average_precision(ground_truth_list_1, predictions_list_1) + >> print(ap_1) + 0.667 + Example 2: + >> predictions_list_2 = [['img_00285.png', 330, 463, 330, 505, 387, 505, 387, 463, 0.0739], + ['img_00285.png', 420, 433, 420, 498, 451, 498, 451, 433, 0.0910], + ['img_00285.png', 328, 465, 328, 540, 403, 540, 403, 465, 0.1008], + ['img_00285.png', 480, 477, 480, 522, 508, 522, 508, 477, 0.1012], + ['img_00285.png', 357, 460, 357, 537, 417, 537, 417, 460, 0.1058], + ['img_00285.png', 356, 456, 356, 521, 391, 521, 391, 456, 0.0843], + ['img_00225.png', 345, 460, 345, 547, 415, 547, 415, 460, 0.0539], + ['img_00225.png', 381, 362, 381, 513, 455, 513, 455, 362, 0.0542], + ['img_00225.png', 382, 366, 382, 422, 416, 422, 416, 366, 0.0559], + ['img_00225.png', 730, 463, 730, 583, 763, 583, 763, 463, 0.0588]] + >> ground_truth_list_2 = [['img_00285.png', 480, 457, 480, 529, 515, 529, 515, 457], + ['img_00285.png', 480, 457, 480, 529, 515, 529, 515, 457], + ['img_00225.png', 522, 540, 522, 660, 576, 660, 576, 540], + ['img_00225.png', 739, 460, 739, 545, 768, 545, 768, 460]] + >> ap_2 = object_detection_average_precision(ground_truth_list_2, predictions_list_2) + >> print(ap_2) + 0.125 + Example 3: + >> predictions_list_3 = [['img_00001.png', 110, 110, 110, 210, 210, 210, 210, 110, 0.6], + ['img_00002.png', 120, 130, 120, 200, 200, 200, 200, 130, 0.6], + ['img_00002.png', 5, 8, 5, 16, 15, 16, 15, 8, 0.9], + ['img_00002.png', 11, 12, 11, 18, 21, 18, 21, 12, 0.9]] + >> ground_truth_list_3 = [['img_00001.png', 100, 100, 100, 200, 200, 200, 200, 100], + ['img_00002.png', 10, 10, 10, 20, 20, 20, 20, 10], + ['img_00002.png', 70, 80, 70, 150, 140, 150, 140, 80]] + >> ap_3 = object_detection_average_precision(ground_truth_list_3, predictions_list_3) + >> print(ap_3) + 0.444 + Example 4: + (Same as example 3 except the last two box predictions in img_00002.png are switched) + >> predictions_list_4 = [['img_00001.png', 110, 110, 110, 210, 210, 210, 210, 110, 0.6], + ['img_00002.png', 120, 130, 120, 200, 200, 200, 200, 130, 0.6], + ['img_00002.png', 11, 12, 11, 18, 21, 18, 21, 12, 0.9], + ['img_00002.png', 5, 8, 5, 16, 15, 16, 15, 8, 0.9]] + >> ground_truth_list_4 = [['img_00001.png', 100, 100, 100, 200, 200, 200, 200, 100], + ['img_00002.png', 10, 10, 10, 20, 20, 20, 20, 10], + ['img_00002.png', 70, 80, 70, 150, 140, 150, 140, 80]] + >> ap_4 = object_detection_average_precision(ground_truth_list_4, predictions_list_4) + >> print(ap_4) + 0.444 + """ + + ovthresh = 0.5 + + # y_true = typing.cast(Truth, unvectorize(y_true)) + # y_pred = typing.cast(Predictions, unvectorize(y_pred)) + + # Load ground truth. + gt_dict = self._group_gt_boxes_by_image_name(y_true) + + # Extract gt objects for this class. + recs = {} + npos = 0 + + imagenames = sorted(gt_dict.keys()) + for imagename in imagenames: + Rlist = [obj for obj in gt_dict[imagename]] + bbox = numpy.array([x['bbox'] for x in Rlist]) + det = [False] * len(Rlist) + npos = npos + len(Rlist) + recs[imagename] = {'bbox': bbox, 'det': det} + + # Load detections. + det_length = len(y_pred[0]) + + # Check that all boxes are the same size. + for det in y_pred: + assert len(det) == det_length, 'Not all boxes have the same dimensions.' + + image_ids = [x[0] for x in y_pred] + BP = numpy.array([[float(z) for z in x[1:-1]] for x in y_pred]) + BB = numpy.array([self._convert_bounding_polygon_to_box_coords(x) for x in BP]) + + confidence = numpy.array([float(x[-1]) for x in y_pred]) + boxes_w_confidences_list = numpy.hstack((BB, -1 * confidence[:, None])) + boxes_w_confidences = numpy.empty( + (boxes_w_confidences_list.shape[0],), + dtype=[ + ('x_min', float), ('y_min', float), + ('x_max', float), ('y_max', float), + ('confidence', float), + ], + ) + boxes_w_confidences[:] = [tuple(i) for i in boxes_w_confidences_list] + + # Sort by confidence. + sorted_ind = numpy.argsort( + boxes_w_confidences, kind='mergesort', + order=('confidence', 'x_min', 'y_min', 'x_max', 'y_max')) + BB = BB[sorted_ind, :] + image_ids = [image_ids[x] for x in sorted_ind] + + # Go down y_pred and mark TPs and FPs. + nd = len(image_ids) + tp = numpy.zeros(nd) + fp = numpy.zeros(nd) + for d in range(nd): + R = recs[image_ids[d]] + bb = BB[d, :].astype(float) + ovmax = -numpy.inf + BBGT = R['bbox'].astype(float) + + if BBGT.size > 0: + # Compute overlaps. + # Intersection. + ixmin = numpy.maximum(BBGT[:, 0], bb[0]) + iymin = numpy.maximum(BBGT[:, 1], bb[1]) + ixmax = numpy.minimum(BBGT[:, 2], bb[2]) + iymax = numpy.minimum(BBGT[:, 3], bb[3]) + iw = numpy.maximum(ixmax - ixmin + 1., 0.) + ih = numpy.maximum(iymax - iymin + 1., 0.) + inters = iw * ih + + # Union. + uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + + (BBGT[:, 2] - BBGT[:, 0] + 1.) * + (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) + + overlaps = inters / uni + ovmax = numpy.max(overlaps) + jmax = numpy.argmax(overlaps) + + if ovmax > ovthresh: + if not R['det'][jmax]: + tp[d] = 1. + R['det'][jmax] = 1 + else: + fp[d] = 1. + else: + fp[d] = 1. + + # Compute precision recall. + fp = numpy.cumsum(fp) + tp = numpy.cumsum(tp) + rec = tp / float(npos) + # Avoid divide by zero in case the first detection matches a difficult ground truth. + prec = tp / numpy.maximum(tp + fp, numpy.finfo(numpy.float64).eps) + ap = self._voc_ap(rec, prec) + + return ap + + def score(self, truth: Truth, predictions: Predictions) -> float: + predictions = self.align(truth, predictions) + + truth_index = self.get_index_column(truth) + truth_targets = self.get_target_columns(truth) + + if len(truth_targets.columns) != 1: + raise NotImplementedError("Support for multiple target columns is not yet implemented.") + + truth_list = [] + for i, (index, target) in enumerate(pandas.concat([truth_index, truth_targets], axis=1).itertuples(index=False, name=None)): + truth_list.append([index] + [float(v) for v in target.split(',')]) + + predictions_index = self.get_index_column(predictions) + predictions_targets = self.get_target_columns(predictions) + predictions_confidence = self.get_confidence_column(predictions) + + if len(predictions_targets.columns) != 1: + raise NotImplementedError("Support for multiple target columns is not yet implemented.") + + predictions_list = [] + for i, (index, target, confidence) in enumerate(pandas.concat([predictions_index, predictions_targets, predictions_confidence], axis=1).itertuples(index=False, name=None)): + predictions_list.append([index] + [float(v) for v in target.split(',')] + [float(confidence)]) + + return self._object_detection_average_precision(truth_list, predictions_list) + + +class HammingLossMetric(_AllAsMultiLabelBase): + """ + Hamming loss gives the percentage of wrong labels to the total number of labels. + Lower the hamming loss, better is the performance of the method used. + + Supports multi-label and multi-task predictions. + """ + + def score_one(self, truth_target_encoded: pandas.DataFrame, predictions_target_encoded: pandas.DataFrame, all_labels: typing.Sequence) -> float: + # We do not have to pass labels because they are not needed and passing them is deprecated. + return metrics.hamming_loss(truth_target_encoded, predictions_target_encoded) + + +class _RocAucBase(Metric): + def __init__(self, all_labels: AllLabels = None) -> None: + self.all_labels = all_labels + + def encode_confidence(self, truth: Truth, predictions: Predictions) -> typing.Tuple[pandas.DataFrame, pandas.DataFrame]: + truth_vectorized = self.vectorize_columns(truth) + predictions_vectorized = self.vectorize_columns(predictions) + + predictions_vectorized = self.align(truth_vectorized, predictions_vectorized) + + truth_targets = self.get_target_columns(truth_vectorized) + predictions_targets = self.get_target_columns(predictions_vectorized) + predictions_confidence = self.get_confidence_column(predictions_vectorized).iloc[:, 0] + + if len(truth_targets.columns) != 1: + raise exceptions.InvalidArgumentValueError(f"Invalid number of target columns in truth: {len(truth_targets.columns)}") + if len(predictions_targets.columns) != 1: + raise exceptions.InvalidArgumentValueError(f"Invalid number of target columns in predictions: {len(predictions_targets.columns)}") + + truth_targets_columns_set = set(truth_targets.columns) + + # This holds from checks in "align". + assert truth_targets_columns_set == set(predictions_targets.columns), (truth_targets.columns, predictions_targets.columns) + + target_column_name = truth_targets.columns[0] + truth_target = truth_targets.iloc[:, 0] + predictions_target = predictions_targets.iloc[:, 0] + + truth_target_values_set = set(itertools.chain.from_iterable(truth_target)) + predictions_target_values_set = set(itertools.chain.from_iterable(predictions_target)) + + # If all labels were provided. + if self.all_labels is not None and target_column_name in self.all_labels: + all_labels_set = set(self.all_labels[target_column_name]) + + extra_truth_target_values_set = truth_target_values_set - all_labels_set + if extra_truth_target_values_set: + raise exceptions.InvalidArgumentValueError(f"Truth contains extra labels: {sorted(extra_truth_target_values_set)}") + + extra_predictions_target_values_set = predictions_target_values_set - all_labels_set + if extra_predictions_target_values_set: + raise exceptions.InvalidArgumentValueError(f"Predictions contain extra labels: {sorted(extra_predictions_target_values_set)}") + + # Otherwise we infer labels from available data. + else: + all_labels_set = truth_target_values_set | predictions_target_values_set + + all_labels = sorted(all_labels_set) + + truth_target_encoded = self.one_hot_encode_target(truth_target, all_labels) + + for i, prediction_targets in enumerate(predictions_target): + prediction_targets_set = set(prediction_targets) + prediction_targets_list = list(prediction_targets) + confidences = predictions_confidence[i] + + if len(prediction_targets_set) != len(prediction_targets_list): + raise exceptions.InvalidArgumentValueError( + f"Duplicate target values ({prediction_targets_list}) for sample '{predictions.loc[i, INDEX_COLUMN]}'." + ) + if len(prediction_targets) != len(confidences): + raise exceptions.InvalidArgumentValueError( + f"The number of target values ({len(prediction_targets)}) does not match the number of confidence values ({len(confidences)}) for sample '{predictions.loc[i, INDEX_COLUMN]}'." + ) + + assert not (prediction_targets_set - all_labels_set), (prediction_targets_set, all_labels_set) + + # We have to order confidences to match labels order. + # If any label is missing in confidences, we add it with confidence 0. + if all_labels != prediction_targets_list: + confidences_map = {label: confidence for label, confidence in zip(prediction_targets, confidences)} + predictions_confidence[i] = tuple(confidences_map.get(label, 0.0) for label in all_labels) + + # Check that all confidences can be converted to float and that they sum to 1. + sum_confidences = sum(float(confidence) for confidence in predictions_confidence[i]) + if not numpy.isclose(sum_confidences, 1.0): + raise exceptions.InvalidArgumentValueError( + f"Confidences do not sum to 1.0 for sample '{predictions.loc[i, INDEX_COLUMN]}', but {sum_confidences}." + ) + + predictions_confidence_encoded = self.one_hot_encode_confidence(predictions_confidence, all_labels) + + return truth_target_encoded, predictions_confidence_encoded + + +class RocAucMetric(_RocAucBase): + """ + Supports binary predictions. + """ + + def score(self, truth: Truth, predictions: Predictions) -> float: + truth_target_encoded, predictions_confidence_encoded = self.encode_confidence(truth, predictions) + + # We use multi-label ROC AUC to compute for binary target as well. + scores = metrics.roc_auc_score(truth_target_encoded, predictions_confidence_encoded, average=None) + + if len(scores) != 2: + raise exceptions.InvalidArgumentValueError("Predictions are not binary.") + + assert numpy.isclose(scores[0], scores[1]), scores + + return scores[0] + + +class RocAucMicroMetric(_RocAucBase): + """ + Supports multi-class and multi-label predictions. + """ + + def score(self, truth: Truth, predictions: Predictions) -> float: + truth_target_encoded, predictions_confidence_encoded = self.encode_confidence(truth, predictions) + + # We use multi-label ROC AUC to compute for multi-class target as well. + return metrics.roc_auc_score(truth_target_encoded, predictions_confidence_encoded, average='micro') + + +class RocAucMacroMetric(_RocAucBase): + """ + Supports multi-class and multi-label predictions. + """ + + def score(self, truth: Truth, predictions: Predictions) -> float: + truth_target_encoded, predictions_confidence_encoded = self.encode_confidence(truth, predictions) + + # We use multi-label ROC AUC to compute for multi-class target as well. + return metrics.roc_auc_score(truth_target_encoded, predictions_confidence_encoded, average='macro') + + +class _RankMetricBase(Metric): + MAX_RANK = 500 + + @classmethod + def get_merged_truth_predictions(cls, truth: Truth, predictions: Predictions) -> pandas.DataFrame: + predictions = cls.align(truth, predictions) + + truth_index = cls.get_index_column(truth) + truth_targets = cls.get_target_columns(truth) + + if len(truth_targets.columns) != 1: + raise exceptions.InvalidArgumentValueError("Only one target column is supported.") + + truth = pandas.concat([truth_index, truth_targets], axis=1) + + predictions_index = cls.get_index_column(predictions) + predictions_targets = cls.get_target_columns(predictions) + predictions_rank = cls.get_rank_column(predictions) + + if len(predictions_targets.columns) != 1: + raise exceptions.InvalidArgumentValueError("Only one target column is supported.") + + predictions = pandas.concat([predictions_index, predictions_targets, predictions_rank], axis=1) + + merged_truth_predictions = pandas.merge(truth, predictions, how='inner', on=truth.columns.values.tolist()) + + # edge-case: none of the true tuples appear in the predictions. + if merged_truth_predictions.empty: + return merged_truth_predictions + + # edge-case: some of the tuples does not appear in the predictions. In this case we give missing true tuples a MAX_RANK of 500. + if merged_truth_predictions.shape[0] != truth.shape[0]: + outer_merged_truth_predictions = pandas.merge(truth, predictions, how='outer', on=truth.columns.values.tolist()) + non_represented = outer_merged_truth_predictions[outer_merged_truth_predictions[RANK_COLUMN].isnull()] + non_represented = non_represented.fillna(cls.MAX_RANK) + merged_truth_predictions = pandas.concat([merged_truth_predictions, non_represented], axis=0) + + return merged_truth_predictions + + +class MeanReciprocalRankMetric(_RankMetricBase): + """ + This computes the mean of the reciprocal of elements of a vector of rankings. This metric is used for linkPrediction problems. + Consider the example: + learningData: + d3mIndex subject object relationship (target) + 0 James John father + 1 John Patricia sister + 2 Robert Thomas brother + ... + ... + + truth: + d3mIndex relationship + 0 father + 1 sister + 2 brother + + predictions: + d3mIndex relationships rank + 0 brother 1 + 0 cousin 2 + 0 mother 3 + 0 father 4 * + 0 grandfather 5 + 1 sister 1 * + 1 mother 2 + 1 aunt 3 + 2 father 1 + 2 brother 2 * + 2 sister 3 + 2 grandfather 4 + 2 aunt 5 + + Note that ranks (of truth relationships in the predictions) = [4,1,2] + MRR = np.sum(1/ranks)/len(ranks) + MRR = 0.58333 + """ + + def score(self, truth: Truth, predictions: Predictions) -> float: + merged_truth_predictions = self.get_merged_truth_predictions(truth, predictions) + + # edge-case: none of the true tuples appear in the predictions. This should return a score of 0.0. + if merged_truth_predictions.empty: + return 0.0 + + ranks = merged_truth_predictions[RANK_COLUMN].astype(float) + return numpy.sum(1 / ranks) / len(ranks) + + +class HitsAtKMetric(_RankMetricBase): + """ + The computes how many elements of a vector of ranks make it to the top 'k' positions. + Consider the example: + learningData: + d3mIndex subject object relationship (target) + 0 James John father + 1 John Patricia sister + 2 Robert Thomas brother + ... + ... + + truth: + d3mIndex relationship + 0 father + 1 sister + 2 brother + + predictions: + d3mIndex relationships rank + 0 brother 1 + 0 cousin 2 + 0 mother 3 + 0 father 4 * + 0 grandfather 5 + 1 sister 1 * + 1 mother 2 + 1 aunt 3 + 2 father 1 + 2 brother 2 * + 2 sister 3 + 2 grandfather 4 + 2 aunt 5 + + Note that ranks (of truth relationships in the predictions) = [4,1,2] + Hits@3 = 2/3 = 0.666666 + Hits@1 = 1/3 = 0.3333333 + Hits@5 = 3/3 = 1.0 + """ + + def __init__(self, k: int) -> None: + self.k = k + + def score(self, truth: Truth, predictions: Predictions) -> float: + merged_truth_predictions = self.get_merged_truth_predictions(truth, predictions) + + # edge-case: none of the true tuples appear in the predictions. This should return a score of 0.0. + if merged_truth_predictions.empty: + return 0.0 + + ranks = merged_truth_predictions[RANK_COLUMN].astype(float) + return numpy.sum(ranks <= self.k) / len(ranks) + + +class_map: typing.Dict[problem.PerformanceMetricBase, Metric] = { + problem.PerformanceMetric.ACCURACY: AccuracyMetric, + problem.PerformanceMetric.PRECISION: PrecisionMetric, + problem.PerformanceMetric.RECALL: RecallMetric, + problem.PerformanceMetric.F1: F1Metric, + problem.PerformanceMetric.F1_MICRO: F1MicroMetric, + problem.PerformanceMetric.F1_MACRO: F1MacroMetric, + problem.PerformanceMetric.MEAN_SQUARED_ERROR: MeanSquareErrorMetric, + problem.PerformanceMetric.ROOT_MEAN_SQUARED_ERROR: RootMeanSquareErrorMetric, + problem.PerformanceMetric.MEAN_ABSOLUTE_ERROR: MeanAbsoluteErrorMetric, + problem.PerformanceMetric.R_SQUARED: RSquaredMetric, + problem.PerformanceMetric.NORMALIZED_MUTUAL_INFORMATION: NormalizeMutualInformationMetric, + problem.PerformanceMetric.JACCARD_SIMILARITY_SCORE: JaccardSimilarityScoreMetric, + problem.PerformanceMetric.PRECISION_AT_TOP_K: PrecisionAtTopKMetric, + problem.PerformanceMetric.OBJECT_DETECTION_AVERAGE_PRECISION: ObjectDetectionAveragePrecisionMetric, + problem.PerformanceMetric.HAMMING_LOSS: HammingLossMetric, + problem.PerformanceMetric.ROC_AUC: RocAucMetric, + problem.PerformanceMetric.ROC_AUC_MICRO: RocAucMicroMetric, + problem.PerformanceMetric.ROC_AUC_MACRO: RocAucMacroMetric, + problem.PerformanceMetric.MEAN_RECIPROCAL_RANK: MeanReciprocalRankMetric, + problem.PerformanceMetric.HITS_AT_K: HitsAtKMetric, +} diff --git a/d3m/d3m/namespace.py b/d3m/d3m/namespace.py new file mode 100644 index 0000000..05770eb --- /dev/null +++ b/d3m/d3m/namespace.py @@ -0,0 +1,195 @@ +import importlib.abc +import importlib.machinery +import logging +import pkg_resources +import sys +import types +import typing + +__all__ = ('setup',) + +logger = logging.getLogger(__name__) + +# For which entry points we already warned that they are ignored? +_ignored_entry_points: typing.Set[str] = set() + + +def entry_points() -> typing.Iterator[pkg_resources.EntryPoint]: + """ + Makes sure that if two entry points are conflicting (one has a path + pointing to a primitive, and another is a path pointing to a module containing + other modules or primitives), the latter entry point is returned + while the former is ignored (and warned about). This makes loading primitives + deterministic. + + We iterate every time over entry points because maybe entry points have changed. + """ + + modules = set(tuple(entry_point.name.split('.')[:-1]) for entry_point in pkg_resources.iter_entry_points('d3m.primitives')) + + for entry_point in pkg_resources.iter_entry_points('d3m.primitives'): + primitive_path = tuple(entry_point.name.split('.')) + + # "primitive_path" starts with a module path the last segment is a class name. If it exists + # as a whole among what is seen as modules for all primitives, we have a conflict. + if primitive_path in modules: + if entry_point.name not in _ignored_entry_points: + _ignored_entry_points.add(entry_point.name) + logger.warning("An entry point for a primitive is conflicting with another entry point which has it as a module: %(entry_point_name)s", {'entry_point_name': entry_point.name}) + else: + yield entry_point + + +class ModuleType(types.ModuleType): + """ + A module which loads primitives on demand under ``d3m.primitives`` namespace. + """ + + def __dir__(self) -> typing.Sequence[str]: + """ + Adds to listed attributes of a module all primitive classes known from + entry points to be available under this module. + + They are not necessary loadable (trying to access them tries to load a primitive which + might fail) and it is not yet necessary that they are really pointing to primitive classes, + because this method does not try to load them yet to determine any of that. + + Already loaded primitives and imported submodules are provided by parent implementation + of "__dir__" already because they are real attributes of this module. + + We add only classes. Submodules are added as real attributes once they are + explicitly imported. This mimics how things work for regular modules in Python. + """ + + entries = set(super().__dir__()) + + current_module = self.__name__.split('.') + + for entry_point in entry_points(): + entry_point_name = ['d3m', 'primitives'] + entry_point.name.split('.') + + # We assume the last segment is a class name, so we remove it. + entry_point_module = entry_point_name[:-1] + + # If an entry point points to a class directly under this module, we add that class' name. + if current_module == entry_point_module: + # The last segment is a class name. + entries.add(entry_point_name[-1]) + + return list(entries) + + def __getattr__(self, item: str) -> typing.Any: + """ + This method is called when there is no real attribute with name "item" already + present in this module object (so not an existing method, an already loaded primitive, + or already imported submodule). + + If it looks like "name" is pointing to a primitive, we load the primitive here and add + it to the module object as a real attribute by calling "register_primitive". + + If it does not look like a primitive, we raise an exception and Python importing logic + tries to import the module instead. + """ + + # Importing here to prevent import cycle. + from d3m import index + + item_path = self.__name__.split('.') + [item] + + for entry_point in entry_points(): + entry_point_name = ['d3m', 'primitives'] + entry_point.name.split('.') + + # We assume for the last segment to be a class, so the full path has to match + # for path to look like it is pointing to a primitive's class. + if item_path == entry_point_name: + primitive = None + try: + logger.debug("Loading entry point '%(entry_point_name)s'.", {'entry_point_name': entry_point.name}) + entry_point.require() + primitive = entry_point.resolve() # type: ignore + except pkg_resources.ResolutionError as error: + logger.warning("While loading primitive '%(entry_point_name)s', an error has been detected: %(error)s", {'entry_point_name': entry_point.name, 'error': error}) + logger.warning("Attempting to load primitive '%(entry_point_name)s' without checking requirements.", {'entry_point_name': entry_point.name}) + + # There was an error, so we try again without checking requirements. + if primitive is None: + primitive = entry_point.resolve() # type: ignore + + try: + # We set the sentinel so that when during registration attribute with name "name" + # is accessed this method is not called again (because a real attribute already + # exists) but the sentinel is returned. + setattr(self, item, index._SENTINEL) + index.register_primitive('.'.join(entry_point_name), primitive) + except Exception: + if getattr(self, item) is index._SENTINEL: + delattr(self, item) + raise + + # Calling "register_primitive" should set a real attribute on this module object. + assert getattr(self, item) is primitive + + return primitive + + raise AttributeError('module \'{name}\' has no attribute \'{item}\''.format(name=self.__name__, item=item)) + + +class Loader(importlib.abc.Loader): + """ + A loader which returns modules of our subclass. + """ + + def create_module(self, spec: importlib.machinery.ModuleSpec) -> types.ModuleType: + return ModuleType(spec.name, ModuleType.__doc__) + + def exec_module(self, module: types.ModuleType) -> None: + pass + + +class MetaPathFinder(importlib.abc.MetaPathFinder): + """ + A finder for ``d3m.primitives`` namespace which uses our loader for entries in entry points. + """ + + def find_spec(self, fullname, path, target=None): # type: ignore + if not fullname.startswith('d3m.primitives'): + return None + + if fullname == 'd3m.primitives': + return importlib.machinery.ModuleSpec(fullname, Loader(), is_package=True) + + name = fullname.split('.') + + for entry_point in entry_points(): + entry_point_name = ['d3m', 'primitives'] + entry_point.name.split('.') + + # We assume the last segment is a class name, so we remove it. + entry_point_module = entry_point_name[:-1] + + # There is at least one entry point having this name as its module, + # so we return a module. + if len(entry_point_module) >= len(name) and entry_point_module[0:len(name)] == name: + return importlib.machinery.ModuleSpec(fullname, Loader(), is_package=True) + + return None + + +def setup() -> None: + """ + Expose all primitives under the same ``d3m.primitives`` namespace. + + This is achieved using Python entry points. Python packages containing primitives + can register them and expose them under the common namespace by adding an entry + like the following to package's ``setup.py``:: + + entry_points = { + 'd3m.primitives': [ + 'primitive_namespace.PrimitiveName = my_package.my_module:PrimitiveClassName', + ], + }, + + The example above would expose the ``my_package.my_module.PrimitiveClassName`` primitive under + ``d3m.primitives.primitive_namespace.PrimitiveName``. + """ + + sys.meta_path.append(MetaPathFinder()) diff --git a/d3m/d3m/primitive_interfaces/__init__.py b/d3m/d3m/primitive_interfaces/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/d3m/d3m/primitive_interfaces/base.py b/d3m/d3m/primitive_interfaces/base.py new file mode 100644 index 0000000..4af1cb6 --- /dev/null +++ b/d3m/d3m/primitive_interfaces/base.py @@ -0,0 +1,1293 @@ +import abc +import inspect +import logging +import time +import typing + +from d3m import exceptions, types, utils +from d3m.metadata import base as metadata_base, hyperparams, params, problem + +__all__ = ( + 'Inputs', 'Outputs', 'Params', 'Hyperparams', 'CallResult', 'MultiCallResult', 'DockerContainer', + 'PrimitiveBase', 'ContinueFitMixin', 'SamplingCompositionalityMixin', + 'ProbabilisticCompositionalityMixin', 'Gradients', + 'GradientCompositionalityMixin', 'LossFunctionMixin', + 'NeuralNetworkModuleMixin', 'NeuralNetworkObjectMixin', + 'singleton', 'inputs_across_samples', +) + + +Inputs = typing.TypeVar('Inputs', bound=typing.Union[types.Container]) # type: ignore +Outputs = typing.TypeVar('Outputs', bound=typing.Union[types.Container]) # type: ignore +# This type parameter is optional and can be set to None. +# See "TransformerPrimitiveBase" for an example. +Params = typing.TypeVar('Params', bound=params.Params) +Hyperparams = typing.TypeVar('Hyperparams', bound=hyperparams.Hyperparams) +Module = typing.TypeVar('Module') + +T = typing.TypeVar('T') + +# All base classes (primitive interfaces) should have docstrings starting with this language. +# This allows us to validate that primitives have changed their descriptions/docstrings to something different. +DEFAULT_DESCRIPTION = "A base class for primitives" + + +class CallResult(typing.Generic[T]): + """ + Some methods return additional metadata about the method call itself + (which is different to metadata about the value returned, which is stored + in ``metadata`` attribute of the value itself). + + For ``produce`` method call, ``has_finished`` is ``True`` if the last call + to ``produce`` has produced the final outputs and a call with more time or + more iterations cannot get different outputs. + + For ``fit`` method call, ``has_finished`` is ``True`` if a primitive has been + fully fitted on current training data and further calls to ``fit`` are + unnecessary and will not change anything. ``False`` means that more iterations + can be done (but it does not necessary mean that more iterations are beneficial). + + If a primitive has iterations internally, then ``iterations_done`` contains + how many of those iterations have been made during the last call. If primitive + does not support them, ``iterations_done`` is ``None``. + + Those methods should return value wrapped into this class. + + Parameters + ---------- + value: + The value itself of the method call. + has_finished: + Set to ``True`` if it is not reasonable to call the method again anymore. + iterations_done: + How many iterations have been done during a method call, if any. + """ + + def __init__(self, value: T, has_finished: bool = True, iterations_done: int = None) -> None: + self.value = value + self.has_finished = has_finished + self.iterations_done = iterations_done + + +class MultiCallResult: + """ + Similar to `CallResult`, but used by ``multi_produce``. + + It has no precise typing information because type would have to be a dependent type + which is not (yet) supported in standard Python typing. Type would depend on + ``produce_methods`` argument and output types of corresponding produce methods. + + Parameters + ---------- + values: + A dict of values mapping between produce method names and their value outputs. + has_finished: + Set to ``True`` if it is not reasonable to call the method again anymore. + iterations_done: + How many iterations have been done during a method call, if any. + """ + + def __init__(self, values: typing.Dict, has_finished: bool = True, iterations_done: int = None) -> None: + self.values = values + self.has_finished = has_finished + self.iterations_done = iterations_done + + +class PrimitiveBaseMeta(utils.GenericMetaclass): + """ + A metaclass which provides the primitive instance to metadata so that primitive + metadata can be automatically generated. + """ + + def __new__(mcls, class_name, bases, namespace, **kwargs): # type: ignore + cls = super().__new__(mcls, class_name, bases, namespace, **kwargs) + + if inspect.isabstract(cls): + return cls + + if not isinstance(cls.metadata, metadata_base.PrimitiveMetadata): + raise TypeError("'metadata' attribute is not an instance of PrimitiveMetadata.") + + # We are creating a class-level logger so that it can be used both from class and instance methods. + # "python_path" is a required metadata value, but we leave metadata validation to later. + python_path = cls.metadata.query().get('python_path', None) + if python_path is not None: + cls.logger = logging.getLogger(python_path) + + cls.metadata.contribute_to_class(cls) + + return cls + + def __repr__(cls) -> str: + if getattr(cls, 'metadata', None) is not None: + return cls.metadata.query().get('python_path', super().__repr__()) + else: + return super().__repr__() + + +class DockerContainer(typing.NamedTuple): + """ + A tuple suitable to describe connection information necessary to connect + to exposed ports of a running Docker container. + + Attributes + ---------- + address: + An address at which the Docker container is available. + ports: + Mapping between image's exposed ports and real ports. E.g., + ``{'80/tcp': 80}``. + """ + + address: str + ports: typing.Dict[str, int] + + +class PrimitiveBase(typing.Generic[Inputs, Outputs, Params, Hyperparams], metaclass=PrimitiveBaseMeta): + """ + A base class for primitives. + + Class is parameterized using four type variables, ``Inputs``, ``Outputs``, ``Params``, + and ``Hyperparams``. + + ``Params`` has to be a subclass of `d3m.metadata.params.Params` and should define + all fields and their types for parameters which the primitive is fitting. + + ``Hyperparams`` has to be a subclass of a `d3m.metadata.hyperparams.Hyperparams`. + Hyper-parameters are those primitive's parameters which primitive is not fitting and + generally do not change during a life-time of a primitive. + + ``Params`` and ``Hyperparams`` have to be picklable and copyable. See `pickle`, + `copy`, and `copyreg` Python modules for more information. + + In this context we use term method arguments to mean both formal parameters and + actual parameters of a method. We do this to not confuse method parameters with + primitive parameters (``Params``). + + All arguments to all methods are keyword-only. No ``*args`` or ``**kwargs`` should + ever be used in any method. + + Standardized interface use few public attributes and no other public attributes are + allowed to assure future compatibility. For your attributes use the convention that + private symbols should start with ``_``. + + Primitives can have methods which are not part of standardized interface classes: + + * Additional "produce" methods which are prefixed with ``produce_`` and have + the same semantics as ``produce`` but potentially return different output + container types instead of ``Outputs`` (in such primitive ``Outputs`` is seen as + primary output type, but the primitive also has secondary output types). + They should return ``CallResult`` and have ``timeout`` and ``iterations`` arguments. + * Private methods prefixed with ``_``. + + No other public additional methods are allowed. If this represents a problem for you, + open an issue. (The rationale is that for other methods an automatic system will not + understand the semantics of the method.) + + Method arguments which start with ``_`` are seen as private and can be used for arguments + useful for debugging and testing, but they should not be used by (or even known to) a + caller during normal execution. Such arguments have to be optional (have a default value) + so that the method can be called without the knowledge of the argument. + + All arguments to all methods and all hyper-parameters together are seen as arguments to + the primitive as a whole. They are identified by their names. This means that any argument + name must have the same type and semantics across all methods, effectively be the same argument. + If a method argument matches in name a hyper-parameter, it has to match it in type and semantics + as well. Such method argument overrides a hyper-parameter for a method call. All this is necessary + so that callers can have easier time determine what values to pass to arguments and that it is + easier to describe what all values are inputs to a primitive as a whole (set of all + arguments). + + To recap, subclasses can extend arguments of standard methods with explicit typed keyword + arguments used for the method call, or define new "produce" methods with arbitrary explicit + typed keyword arguments. There are multiple kinds of such arguments allowed: + + * An (additional) input argument of any container type and not necessary of ``Inputs`` + (in such primitive ``Inputs`` is seen as primary input type, but the primitive also has + secondary input types). + * An argument which is overriding a hyper-parameter for the duration of the call. + It should match a hyper-parameter in name and type. It should be a required argument + (no default value) which the caller has to supply (or with a default value of a + hyper-parameter, or with the same hyper-parameter as it was passed to the constructor, + or with some other value). This is meant just for fine-control by a caller during fitting + or producing, e.g., for a threshold or learning rate, and is not reasonable for most + hyper-parameters. + * An (additional) value argument which is one of standard data types, but not a container type. + In this case a caller will try to satisfy the input by creating part of a pipeline which + ends with a primitive with singleton produce method and extract the singleton value and + pass it without a container. This kind of an argument is **discouraged** and should probably + be a hyper-parameter instead (because it is unclear how can a caller determine which value + is a reasonable value to pass in an automatic way), but it is defined for completeness and + so that existing pipelines can be easier described. + * A private argument prefixed with ``_`` which is used for debugging and testing. + It should not be used by (or even known to) a caller during normal execution. + Such argument has to be optional (have a default value) so that the method can be called + without the knowledge of the argument. + + Each primitive's class automatically gets an instance of Python's logging logger stored + into its ``logger`` class attribute. The instance is made under the name of primitive's + ``python_path`` metadata value. Primitives can use this logger to log information at + various levels (debug, warning, error) and even associate extra data with log record + using the ``extra`` argument to the logger calls. + + Subclasses of this class allow functional compositionality. + + Attributes + ---------- + metadata: + Primitive's metadata. Available as a class attribute. + logger: + Primitive's logger. Available as a class attribute. + hyperparams: + Hyperparams passed to the constructor. + random_seed: + Random seed passed to the constructor. + docker_containers: + A dict mapping Docker image keys from primitive's metadata to (named) tuples containing + container's address under which the container is accessible by the primitive, and a + dict mapping exposed ports to ports on that address. + volumes: + A dict mapping volume keys from primitive's metadata to file and directory paths + where downloaded and extracted files are available to the primitive. + temporary_directory: + An absolute path to a temporary directory a primitive can use to store any files + for the duration of the current pipeline run phase. Directory is automatically + cleaned up after the current pipeline run phase finishes. + """ + + # Primitive's metadata (annotation) should be put on "metadata' attribute to provide + # all fields (which cannot be determined automatically) inside the code. In this way metadata + # is close to the code and it is easier for consumers to make sure metadata they are using + # is really matching the code they are using. PrimitiveMetadata class will automatically + # extract additional metadata and update itself with metadata about code and other things + # it can extract automatically. + metadata: typing.ClassVar[metadata_base.PrimitiveMetadata] = None + + # This gets automatically set to primitive's logger in metaclass. + logger: typing.ClassVar[logging.Logger] = None + + hyperparams: Hyperparams + random_seed: int + docker_containers: typing.Dict[str, DockerContainer] + volumes: typing.Dict[str, str] + temporary_directory: str + + def __init__(self, *, hyperparams: Hyperparams, random_seed: int = 0, + docker_containers: typing.Dict[str, DockerContainer] = None, + volumes: typing.Dict[str, str] = None, + temporary_directory: str = None) -> None: + """ + All primitives should accept all their hyper-parameters in a constructor as one value, + an instance of type ``Hyperparams``. + + Provided random seed should control all randomness used by this primitive. + Primitive should behave exactly the same for the same random seed across multiple + invocations. You can call `numpy.random.RandomState(random_seed)` to obtain an + instance of a random generator using provided seed. If your primitive does not + use randomness, consider not exposing this argument in your primitive's constructor + to signal that. + + Primitives can be wrappers around or use one or more Docker images which they can + specify as part of ``installation`` field in their metadata. Each Docker image listed + there has a ``key`` field identifying that image. When primitive is created, + ``docker_containers`` contains a mapping between those keys and connection information + which primitive can use to connect to a running Docker container for a particular Docker + image and its exposed ports. Docker containers might be long running and shared between + multiple instances of a primitive. If your primitive does not use Docker images, + consider not exposing this argument in your primitive's constructor. + + **Note**: Support for primitives using Docker containers has been put on hold. + Currently it is not expected that any runtime running primitives will run + Docker containers for a primitive. + + Primitives can also use additional static files which can be added as a dependency + to ``installation`` metadata. When done so, given volumes are provided to the + primitive through ``volumes`` argument to the primitive's constructor as a + dict mapping volume keys to file and directory paths where downloaded and + extracted files are available to the primitive. All provided files and directories + are read-only. If your primitive does not use static files, consider not exposing + this argument in your primitive's constructor. + + Primitives can also use the provided temporary directory to store any files for + the duration of the current pipeline run phase. Directory is automatically + cleaned up after the current pipeline run phase finishes. Do not store in this + directory any primitive's state you would like to preserve between "fit" and + "produce" phases of pipeline execution. Use ``Params`` for that. The main intent + of this temporary directory is to store files referenced by any ``Dataset`` object + your primitive might create and followup primitives in the pipeline should have + access to. When storing files into this directory consider using capabilities + of Python's `tempfile` module to generate filenames which will not conflict with + any other files stored there. Use provided temporary directory as ``dir`` argument + to set it as base directory to generate additional temporary files and directories + as needed. If your primitive does not use temporary directory, consider not exposing + this argument in your primitive's constructor. + + No other arguments to the constructor are allowed (except for private arguments) + because we want instances of primitives to be created without a need for any other + prior computation. + + Module in which a primitive is defined should be kept lightweight and on import not do + any (pre)computation, data loading, or resource allocation/reservation. Any loading + and resource allocation/reservation should be done in the constructor. Any (pre)computation + should be done lazily when needed once requested through other methods and not in the constructor. + """ + + self.hyperparams = hyperparams + self.random_seed = random_seed + if docker_containers is None: + self.docker_containers: typing.Dict[str, DockerContainer] = {} + else: + self.docker_containers = docker_containers + if volumes is None: + self.volumes: typing.Dict[str, str] = {} + else: + self.volumes = volumes + self.temporary_directory = temporary_directory + + @abc.abstractmethod + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + """ + Produce primitive's best choice of the output for each of the inputs. + + The output value should be wrapped inside ``CallResult`` object before returning. + + In many cases producing an output is a quick operation in comparison with ``fit``, but not + all cases are like that. For example, a primitive can start a potentially long optimization + process to compute outputs. ``timeout`` and ``iterations`` can serve as a way for a caller + to guide the length of this process. + + Ideally, a primitive should adapt its call to try to produce the best outputs possible + inside the time allocated. If this is not possible and the primitive reaches the timeout + before producing outputs, it should raise a ``TimeoutError`` exception to signal that the + call was unsuccessful in the given time. The state of the primitive after the exception + should be as the method call has never happened and primitive should continue to operate + normally. The purpose of ``timeout`` is to give opportunity to a primitive to cleanly + manage its state instead of interrupting execution from outside. Maintaining stable internal + state should have precedence over respecting the ``timeout`` (caller can terminate the + misbehaving primitive from outside anyway). If a longer ``timeout`` would produce + different outputs, then ``CallResult``'s ``has_finished`` should be set to ``False``. + + Some primitives have internal iterations (for example, optimization iterations). + For those, caller can provide how many of primitive's internal iterations + should a primitive do before returning outputs. Primitives should make iterations as + small as reasonable. If ``iterations`` is ``None``, then there is no limit on + how many iterations the primitive should do and primitive should choose the best amount + of iterations on its own (potentially controlled through hyper-parameters). + If ``iterations`` is a number, a primitive has to do those number of iterations, + if possible. ``timeout`` should still be respected and potentially less iterations + can be done because of that. Primitives with internal iterations should make + ``CallResult`` contain correct values. + + For primitives which do not have internal iterations, any value of ``iterations`` + means that they should run fully, respecting only ``timeout``. + + If primitive should have been fitted before calling this method, but it has not been, + primitive should raise a ``PrimitiveNotFittedError`` exception. + + Parameters + ---------- + inputs: + The inputs of shape [num_inputs, ...]. + timeout: + A maximum time this primitive should take to produce outputs during this method call, in seconds. + iterations: + How many of internal iterations should the primitive do. + + Returns + ------- + The outputs of shape [num_inputs, ...] wrapped inside ``CallResult``. + """ + + def multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, timeout: float = None, iterations: int = None) -> MultiCallResult: + """ + A method calling multiple produce methods at once. + + When a primitive has multiple produce methods it is common that they might compute the + same internal results for same inputs but return different representations of those results. + If caller is interested in multiple of those representations, calling multiple produce + methods might lead to recomputing same internal results multiple times. To address this, + this method allows primitive author to implement an optimized version which computes + internal results only once for multiple calls of produce methods, but return those different + representations. + + If any additional method arguments are added to primitive's produce method(s), they have + to be added to this method as well. This method should accept an union of all arguments + accepted by primitive's produce method(s) and then use them accordingly when computing + results. + + The default implementation of this method just calls all produce methods listed in + ``produce_methods`` in order and is potentially inefficient. + + If primitive should have been fitted before calling this method, but it has not been, + primitive should raise a ``PrimitiveNotFittedError`` exception. + + Parameters + ---------- + produce_methods: + A list of names of produce methods to call. + inputs: + The inputs given to all produce methods. + timeout: + A maximum time this primitive should take to produce outputs for all produce methods + listed in ``produce_methods`` argument, in seconds. + iterations: + How many of internal iterations should the primitive do. + + Returns + ------- + A dict of values for each produce method wrapped inside ``MultiCallResult``. + """ + + return self._multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs) + + def _multi_produce(self, *, produce_methods: typing.Sequence[str], timeout: float = None, iterations: int = None, **kwargs: typing.Dict[str, typing.Any]) -> MultiCallResult: + """ + We do not want a public API to use ``kwargs``, but such implementation allows easier subclassing and reuse + of a default implementation. Do not call directly. + """ + + results = [] + for method_name in produce_methods: + if method_name != 'produce' and not method_name.startswith('produce_'): + raise exceptions.InvalidArgumentValueError("Invalid produce method name '{method_name}'.".format(method_name=method_name)) + + if not hasattr(self, method_name): + raise exceptions.InvalidArgumentValueError("Unknown produce method name '{method_name}'.".format(method_name=method_name)) + + try: + expected_arguments = set(self.metadata.query()['primitive_code'].get('instance_methods', {})[method_name]['arguments']) + except KeyError as error: + raise exceptions.InvalidArgumentValueError("Unknown produce method name '{method_name}'.".format(method_name=method_name)) from error + + arguments = {name: value for name, value in kwargs.items() if name in expected_arguments} + + start = time.perf_counter() + results.append(getattr(self, method_name)(timeout=timeout, iterations=iterations, **arguments)) + delta = time.perf_counter() - start + + # Decrease the amount of time available to other calls. This delegates responsibility + # of raising a "TimeoutError" exception to produce methods themselves. It also assumes + # that if one passes a negative timeout value to a produce method, it raises a + # "TimeoutError" exception correctly. + if timeout is not None: + timeout -= delta + + if not isinstance(results[-1], CallResult): + raise exceptions.InvalidReturnTypeError("Primitive's produce method '{method_name}' has not returned a CallResult.".format( + method_name=method_name, + )) + + # We return the maximum number of iterations done by any produce method we called. + iterations_done = None + for result in results: + if result.iterations_done is not None: + if iterations_done is None: + iterations_done = result.iterations_done + else: + iterations_done = max(iterations_done, result.iterations_done) + + return MultiCallResult( + values={name: result.value for name, result in zip(produce_methods, results)}, + has_finished=all(result.has_finished for result in results), + iterations_done=iterations_done, + ) + + def fit_multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, outputs: Outputs, timeout: float = None, iterations: int = None) -> MultiCallResult: + """ + A method calling ``fit`` and after that multiple produce methods at once. + + This method allows primitive author to implement an optimized version of both fitting + and producing a primitive on same data. + + If any additional method arguments are added to primitive's ``set_training_data`` method + or produce method(s), or removed from them, they have to be added to or removed from this + method as well. This method should accept an union of all arguments accepted by primitive's + ``set_training_data`` method and produce method(s) and then use them accordingly when + computing results. + + The default implementation of this method just calls first ``set_training_data`` method, + ``fit`` method, and all produce methods listed in ``produce_methods`` in order and is + potentially inefficient. + + Parameters + ---------- + produce_methods: + A list of names of produce methods to call. + inputs: + The inputs given to ``set_training_data`` and all produce methods. + outputs: + The outputs given to ``set_training_data``. + timeout: + A maximum time this primitive should take to both fit the primitive and produce outputs + for all produce methods listed in ``produce_methods`` argument, in seconds. + iterations: + How many of internal iterations should the primitive do for both fitting and producing + outputs of all produce methods. + + Returns + ------- + A dict of values for each produce method wrapped inside ``MultiCallResult``. + """ + + return self._fit_multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs, outputs=outputs) + + def _fit_multi_produce(self, *, produce_methods: typing.Sequence[str], timeout: float = None, iterations: int = None, **kwargs: typing.Dict[str, typing.Any]) -> MultiCallResult: + """ + We do not want a public API to use ``kwargs``, but such implementation allows easier subclassing and reuse + of a default implementation. Do not call directly. + """ + + try: + expected_arguments = set(self.metadata.query()['primitive_code'].get('instance_methods', {})['set_training_data']['arguments']) + except KeyError as error: + raise exceptions.InvalidArgumentValueError("Unknown produce method name '{method_name}'.".format(method_name='set_training_data')) from error + + arguments = {name: value for name, value in kwargs.items() if name in expected_arguments} + + start = time.perf_counter() + self.set_training_data(**arguments) # type: ignore + delta = time.perf_counter() - start + + # Decrease the amount of time available to other calls. This delegates responsibility + # of raising a "TimeoutError" exception to fit and produce methods themselves. + # It also assumes that if one passes a negative timeout value to a fit or a produce + # method, it raises a "TimeoutError" exception correctly. + if timeout is not None: + timeout -= delta + + start = time.perf_counter() + fit_result = self.fit(timeout=timeout, iterations=iterations) + delta = time.perf_counter() - start + + if timeout is not None: + timeout -= delta + + if not isinstance(fit_result, CallResult): + raise exceptions.InvalidReturnTypeError("Primitive's fit method has not returned a CallResult.") + + produce_results = self._multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, **kwargs) + + results: typing.List[typing.Union[CallResult, MultiCallResult]] = [fit_result, produce_results] + + # We return the maximum number of iterations done by a fit method or any produce method we called. + iterations_done = None + for result in results: + if result.iterations_done is not None: + if iterations_done is None: + iterations_done = result.iterations_done + else: + iterations_done = max(iterations_done, result.iterations_done) + + return MultiCallResult( + # We return values just from produce methods. + values=produce_results.values, + has_finished=all(result.has_finished for result in results), + iterations_done=iterations_done, + ) + + @abc.abstractmethod + def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: + """ + Sets current training data of this primitive. + + This marks training data as changed even if new training data is the same as + previous training data. + + Standard sublasses in this package do not adhere to the Liskov substitution principle when + inheriting this method because they do not necessary accept all arguments found in the base + class. This means that one has to inspect which arguments are accepted at runtime, or in + other words, one has to inspect which exactly subclass a primitive implements, if + you are accepting a wider range of primitives. This relaxation is allowed only for + standard subclasses found in this package. Primitives themselves should not break + the Liskov substitution principle but should inherit from a suitable base class. + + Parameters + ---------- + inputs: + The inputs. + outputs: + The outputs. + """ + + @abc.abstractmethod + def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: + """ + Fits primitive using inputs and outputs (if any) using currently set training data. + + The returned value should be a ``CallResult`` object with ``value`` set to ``None``. + + If ``fit`` has already been called in the past on different training data, + this method fits it **again from scratch** using currently set training data. + + On the other hand, caller can call ``fit`` multiple times on the same training data + to continue fitting. + + If ``fit`` fully fits using provided training data, there is no point in making further + calls to this method with same training data, and in fact further calls can be noops, + or a primitive can decide to fully refit from scratch. + + In the case fitting can continue with same training data (even if it is maybe not reasonable, + because the internal metric primitive is using looks like fitting will be degrading), if ``fit`` + is called again (without setting training data), the primitive has to continue fitting. + + Caller can provide ``timeout`` information to guide the length of the fitting process. + Ideally, a primitive should adapt its fitting process to try to do the best fitting possible + inside the time allocated. If this is not possible and the primitive reaches the timeout + before fitting, it should raise a ``TimeoutError`` exception to signal that fitting was + unsuccessful in the given time. The state of the primitive after the exception should be + as the method call has never happened and primitive should continue to operate normally. + The purpose of ``timeout`` is to give opportunity to a primitive to cleanly manage + its state instead of interrupting execution from outside. Maintaining stable internal state + should have precedence over respecting the ``timeout`` (caller can terminate the misbehaving + primitive from outside anyway). If a longer ``timeout`` would produce different fitting, + then ``CallResult``'s ``has_finished`` should be set to ``False``. + + Some primitives have internal fitting iterations (for example, epochs). For those, caller + can provide how many of primitive's internal iterations should a primitive do before returning. + Primitives should make iterations as small as reasonable. If ``iterations`` is ``None``, + then there is no limit on how many iterations the primitive should do and primitive should + choose the best amount of iterations on its own (potentially controlled through + hyper-parameters). If ``iterations`` is a number, a primitive has to do those number of + iterations (even if not reasonable), if possible. ``timeout`` should still be respected + and potentially less iterations can be done because of that. Primitives with internal + iterations should make ``CallResult`` contain correct values. + + For primitives which do not have internal iterations, any value of ``iterations`` + means that they should fit fully, respecting only ``timeout``. + + Parameters + ---------- + timeout: + A maximum time this primitive should be fitting during this method call, in seconds. + iterations: + How many of internal iterations should the primitive do. + + Returns + ------- + A ``CallResult`` with ``None`` value. + """ + + @abc.abstractmethod + def get_params(self) -> Params: + """ + Returns parameters of this primitive. + + Parameters are all parameters of the primitive which can potentially change during a life-time of + a primitive. Parameters which cannot are passed through constructor. + + Parameters should include all data which is necessary to create a new instance of this primitive + behaving exactly the same as this instance, when the new instance is created by passing the same + parameters to the class constructor and calling ``set_params``. + + No other arguments to the method are allowed (except for private arguments). + + Returns + ------- + An instance of parameters. + """ + + @abc.abstractmethod + def set_params(self, *, params: Params) -> None: + """ + Sets parameters of this primitive. + + Parameters are all parameters of the primitive which can potentially change during a life-time of + a primitive. Parameters which cannot are passed through constructor. + + No other arguments to the method are allowed (except for private arguments). + + Parameters + ---------- + params: + An instance of parameters. + """ + + def __getstate__(self) -> dict: + """ + Returns state which is used to pickle an instance of a primitive. + + By default it returns standard constructor arguments and value + returned from ``get_params`` method. + + Consider extending default implementation if your primitive accepts + additional constructor arguments you would like to preserve when pickling. + + Note that unpickled primitive instances can generally continue to work only + inside the same environment they were pickled in because they continue to use + same ``docker_containers``, ``volumes``, and ``temporary_directory`` values + passed initially to primitive's constructor. Those generally do not work in + another environment where those resources might be available differently. + Consider constructing primitive instance directly providing updated constructor + arguments and then using ``get_params``/``set_params`` to restore primitive's + state. + + Returns + ------- + State to pickle. + """ + + standard_arguments = { + 'hyperparams': self.hyperparams, + 'random_seed': self.random_seed, + 'docker_containers': self.docker_containers, + 'volumes': self.volumes, + 'temporary_directory': self.temporary_directory, + } + expected_constructor_arguments = self.metadata.query()['primitive_code'].get('instance_methods', {})['__init__']['arguments'] + + return { + 'constructor': {name: value for name, value in standard_arguments.items() if name in expected_constructor_arguments}, + 'params': self.get_params(), + } + + def __setstate__(self, state: dict) -> None: + """ + Uses ``state`` to restore the state of a primitive when unpickling. + + By default it passes constructor arguments to the constructor and + calls ``get_params``. + + Parameters + ---------- + state: + Unpickled state. + """ + + self.__init__(**state['constructor']) # type: ignore + self.set_params(params=state['params']) + + def __repr__(self) -> str: + if 'random_seed' in self.metadata.query().get('primitive_code', {}).get('instance_methods', {}).get('__init__', {}).get('arguments', []): + return '{class_name}(hyperparams={hyperparams}, random_seed={random_seed})'.format( + class_name=self.metadata.query()['python_path'], + hyperparams=self.hyperparams, + random_seed=self.random_seed, + ) + else: + return '{class_name}(hyperparams={hyperparams})'.format( + class_name=self.metadata.query()['python_path'], + hyperparams=self.hyperparams, + ) + + +class ContinueFitMixin(typing.Generic[Inputs, Outputs, Params, Hyperparams], metaclass=utils.GenericMetaclass): + @abc.abstractmethod + def continue_fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: + """ + Similar to base ``fit``, this method fits the primitive using inputs and outputs (if any) + using currently set training data. + + The difference is what happens when currently set training data is different from + what the primitive might have already been fitted on. ``fit`` resets parameters and + refits the primitive (restarts fitting), while ``continue_fit`` fits the primitive + further on new training data. ``fit`` does **not** have to be called before ``continue_fit``, + calling ``continue_fit`` first starts fitting as well. + + Caller can still call ``continue_fit`` multiple times on the same training data as well, + in which case primitive should try to improve the fit in the same way as with ``fit``. + + From the perspective of a caller of all other methods, the training data in effect + is still just currently set training data. If a caller wants to call ``gradient_output`` + on all data on which the primitive has been fitted through multiple calls of ``continue_fit`` + on different training data, the caller should pass all this data themselves through + another call to ``set_training_data``, do not call ``fit`` or ``continue_fit`` again, + and use ``gradient_output`` method. In this way primitives which truly support + continuation of fitting and need only the latest data to do another fitting, do not + have to keep all past training data around themselves. + + If a primitive supports this mixin, then both ``fit`` and ``continue_fit`` can be + called. ``continue_fit`` always continues fitting, if it was started through ``fit`` + or ``continue_fit`` and fitting has not already finished. Calling ``fit`` always restarts + fitting after ``continue_fit`` has been called, even if training data has not changed. + + Primitives supporting this mixin and which operate on categorical target columns should + use ``all_distinct_values`` metadata to obtain which all values (labels) can be in + a target column, even if currently set training data does not contain all those values. + + Parameters + ---------- + timeout: + A maximum time this primitive should be fitting during this method call, in seconds. + iterations: + How many of internal iterations should the primitive do. + + Returns + ------- + A ``CallResult`` with ``None`` value. + """ + + +class SamplingCompositionalityMixin(typing.Generic[Inputs, Outputs, Params, Hyperparams], metaclass=utils.GenericMetaclass): + """ + This mixin signals to a caller that the primitive is probabilistic but + may be likelihood free. + """ + + @abc.abstractmethod + def sample(self, *, inputs: Inputs, num_samples: int = 1, timeout: float = None, iterations: int = None) -> CallResult[typing.Sequence[Outputs]]: + """ + Sample output for each input from ``inputs`` ``num_samples`` times. + + Semantics of ``timeout`` and ``iterations`` is the same as in ``produce``. + + Parameters + ---------- + inputs: + The inputs of shape [num_inputs, ...]. + num_samples: + The number of samples to return in a set of samples. + timeout: + A maximum time this primitive should take to sample outputs during this method call, in seconds. + iterations: + How many of internal iterations should the primitive do. + + Returns + ------- + The multiple sets of samples of shape [num_samples, num_inputs, ...] wrapped inside + ``CallResult``. While the output value type is specified as ``Sequence[Outputs]``, the + output value can be in fact any container type with dimensions/shape equal to combined + ``Sequence[Outputs]`` dimensions/shape. Subclasses should specify which exactly type + the output is. + """ + + +class ProbabilisticCompositionalityMixin(typing.Generic[Inputs, Outputs, Params, Hyperparams], metaclass=utils.GenericMetaclass): + """ + This mixin provides additional abstract methods which primitives should implement to + help callers with doing various end-to-end refinements using probabilistic + compositionality. + + This mixin adds methods to support at least: + + * Metropolis-Hastings + + Mixin should be used together with ``SamplingCompositionalityMixin`` mixin. + """ + + @abc.abstractmethod + def log_likelihoods(self, *, outputs: Outputs, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + """ + Returns log probability of outputs given inputs and params under this primitive: + + log(p(output_i | input_i, params)) + + Parameters + ---------- + outputs: + The outputs. The number of samples should match ``inputs``. + inputs: + The inputs. The number of samples should match ``outputs``. + timeout: + A maximum time this primitive should take to produce outputs during this method call, in seconds. + iterations: + How many of internal iterations should the primitive do. + + Returns + ------- + log(p(output_i | input_i, params))) wrapped inside ``CallResult``. + The number of columns should match the number of target columns in ``outputs``. + """ + + def log_likelihood(self, *, outputs: Outputs, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + """ + Returns log probability of outputs given inputs and params under this primitive: + + sum_i(log(p(output_i | input_i, params))) + + By default it calls ``log_likelihoods`` and tries to automatically compute a sum, but subclasses can + implement a more efficient or even correct version. + + Parameters + ---------- + outputs: + The outputs. The number of samples should match ``inputs``. + inputs: + The inputs. The number of samples should match ``outputs``. + timeout: + A maximum time this primitive should take to produce outputs during this method call, in seconds. + iterations: + How many of internal iterations should the primitive do. + + Returns + ------- + sum_i(log(p(output_i | input_i, params))) wrapped inside ``CallResult``. + The number of returned samples is always 1. + The number of columns should match the number of target columns in ``outputs``. + """ + + result = self.log_likelihoods(outputs=outputs, inputs=inputs, timeout=timeout, iterations=iterations) + + return CallResult(utils.columns_sum(result.value), result.has_finished, result.iterations_done) + + +Container = typing.TypeVar('Container', bound=typing.Union[types.Container]) # type: ignore + + +# TODO: This is not yet a properly defined type which would really be recognized similar to Container. +# You should specify a proper type in your subclass. Type checking might complain that your +# type does not match the parent type, but ignore it (add "type: ignore" comment to that line). +# This type will be fixed in the future. +class Gradients(typing.Generic[Container]): + """ + A type representing a structure similar to ``Container``, but the values are of type ``Optional[float]``. + Value is ``None`` if gradient for that part of the structure is not possible. + """ + + +class GradientCompositionalityMixin(typing.Generic[Inputs, Outputs, Params, Hyperparams], metaclass=utils.GenericMetaclass): + """ + This mixin provides additional abstract methods which primitives should implement to + help callers with doing various end-to-end refinements using gradient-based + compositionality. + + This mixin adds methods to support at least: + + * gradient-based, compositional end-to-end training + * regularized pre-training + * multi-task adaptation + * black box variational inference + * Hamiltonian Monte Carlo + """ + + @abc.abstractmethod + def gradient_output(self, *, outputs: Outputs, inputs: Inputs) -> Gradients[Outputs]: + """ + Returns the gradient of loss sum_i(L(output_i, produce_one(input_i))) with respect to outputs. + + When fit term temperature is set to non-zero, it should return the gradient with respect to outputs of: + + sum_i(L(output_i, produce_one(input_i))) + temperature * sum_i(L(training_output_i, produce_one(training_input_i))) + + When used in combination with the ``ProbabilisticCompositionalityMixin``, it returns gradient + of sum_i(log(p(output_i | input_i, params))) with respect to outputs. + + When fit term temperature is set to non-zero, it should return the gradient with respect to outputs of: + + sum_i(log(p(output_i | input_i, params))) + temperature * sum_i(log(p(training_output_i | training_input_i, params))) + + Parameters + ---------- + outputs: + The outputs. + inputs: + The inputs. + + Returns + ------- + A structure similar to ``Container`` but the values are of type ``Optional[float]``. + """ + + @abc.abstractmethod + def gradient_params(self, *, outputs: Outputs, inputs: Inputs) -> Gradients[Params]: + """ + Returns the gradient of loss sum_i(L(output_i, produce_one(input_i))) with respect to params. + + When fit term temperature is set to non-zero, it should return the gradient with respect to params of: + + sum_i(L(output_i, produce_one(input_i))) + temperature * sum_i(L(training_output_i, produce_one(training_input_i))) + + When used in combination with the ``ProbabilisticCompositionalityMixin``, it returns gradient + of sum_i(log(p(output_i | input_i, params))) with respect to params. + + When fit term temperature is set to non-zero, it should return the gradient with respect to params of: + + sum_i(log(p(output_i | input_i, params))) + temperature * sum_i(log(p(training_output_i | training_input_i, params))) + + Parameters + ---------- + outputs: + The outputs. + inputs: + The inputs. + + Returns + ------- + A version of ``Params`` with all differentiable fields from ``Params`` and values set to gradient for each parameter. + """ + + def forward(self, *, inputs: Inputs) -> Outputs: + """ + Similar to ``produce`` method but it is meant to be used for a forward pass during + backpropagation-based end-to-end training. Primitive can implement it differently + than ``produce``, e.g., forward pass during training can enable dropout layers, or + ``produce`` might not compute gradients while ``forward`` does. + + By default it calls ``produce`` for one iteration. + + Parameters + ---------- + inputs: + The inputs of shape [num_inputs, ...]. + + Returns + ------- + The outputs of shape [num_inputs, ...]. + """ + + return self.produce(inputs=inputs, timeout=None, iterations=1).value # type: ignore + + @abc.abstractmethod + def backward(self, *, gradient_outputs: Gradients[Outputs], fine_tune: bool = False, fine_tune_learning_rate: float = 0.00001, + fine_tune_weight_decay: float = 0.00001) -> typing.Tuple[Gradients[Inputs], Gradients[Params]]: + """ + Returns the gradient with respect to inputs and with respect to params of a loss + that is being backpropagated end-to-end in a pipeline. + + This is the standard backpropagation algorithm: backpropagation needs to be preceded by a + forward propagation (``forward`` method call). + + Parameters + ---------- + gradient_outputs: + The gradient of the loss with respect to this primitive's output. During backpropagation, + this comes from the next primitive in the pipeline, i.e., the primitive whose input + is the output of this primitive during the forward execution with ``forward`` (and ``produce``). + fine_tune: + If ``True``, executes a fine-tuning gradient descent step as a part of this call. + This provides the most straightforward way of end-to-end training/fine-tuning. + fine_tune_learning_rate: + Learning rate for end-to-end training/fine-tuning gradient descent steps. + fine_tune_weight_decay: + L2 regularization (weight decay) coefficient for end-to-end training/fine-tuning gradient + descent steps. + + Returns + ------- + A tuple of the gradient with respect to inputs and with respect to params. + """ + + @abc.abstractmethod + def set_fit_term_temperature(self, *, temperature: float = 0) -> None: + """ + Sets the temperature used in ``gradient_output`` and ``gradient_params``. + + Parameters + ---------- + temperature: + The temperature to use, [0, inf), typically, [0, 1]. + """ + + +class LossFunctionMixin(typing.Generic[Inputs, Outputs, Params, Hyperparams], metaclass=utils.GenericMetaclass): + """ + Mixin which provides abstract methods for a caller to call to inspect which + loss function or functions a primitive is using internally, and to compute + loss on given inputs and outputs. + """ + + @abc.abstractmethod + def get_loss_functions(self) -> typing.Sequence[typing.Tuple[problem.PerformanceMetric, PrimitiveBase, None]]: # type: ignore + """ + Returns a list of loss functions used by the primitive. Each element of the list can be: + + * A D3M metric value of the loss function used by the primitive during the last fitting. + * Primitives can be passed to other primitives as arguments. As such, some primitives + can accept another primitive as a loss function to use, or use it internally. A primitive + can expose this loss primitive to others, providing directly an instance of the primitive + being used during the last fitting. + * ``None`` if using a non-standard loss function. Used so that the loss function can still + be exposed through ``loss`` and ``losses`` methods. + + It should return an empty list if the primitive does not use loss functions at all. + + The order in the list matters because the loss function index is used for ``loss`` and ``losses`` methods. + + Returns + ------- + A list of: a D3M standard metric value of the loss function used, + or a D3M primitive used to compute loss, or ``None``. + """ + + @abc.abstractmethod + def losses(self, *, loss_function: int, inputs: Inputs, outputs: Outputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + """ + Returns the loss L(output_i, produce_one(input_i)) for each (input_i, output_i) pair + using a loss function used by the primitive during the last fitting, identified by the + ``loss_function`` index in the list of loss functions as returned by the ``get_loss_functions``. + + Parameters + ---------- + loss_function: + An index of the loss function to use. + inputs: + The inputs. + outputs: + The outputs. + timeout: + A maximum time this primitive should take to produce outputs during this method call, in seconds. + iterations: + How many of internal iterations should the primitive do. + + Returns + ------- + L(output_i, produce_one(input_i)) for each (input_i, output_i) pair + wrapped inside ``CallResult``. + The number of columns should match the number of target columns in ``outputs``. + """ + + def loss(self, *, loss_function: int, inputs: Inputs, outputs: Outputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + """ + Returns the loss sum_i(L(output_i, produce_one(input_i))) for all (input_i, output_i) pairs + using a loss function used by the primitive during the last fitting, identified by the + ``loss_function`` index in the list of loss functions as returned by the ``get_loss_functions``. + + By default it calls ``losses`` and tries to automatically compute a sum, but subclasses can + implement a more efficient or even correct version. + + Parameters + ---------- + loss_function: + An index of the loss function to use. + inputs: + The inputs. + outputs: + The outputs. + timeout: + A maximum time this primitive should take to produce outputs during this method call, in seconds. + iterations: + How many of internal iterations should the primitive do. + + Returns + ------- + sum_i(L(output_i, produce_one(input_i))) for all (input_i, output_i) pairs + wrapped inside ``CallResult``. + The number of returned samples is always 1. + The number of columns should match the number of target columns in ``outputs``. + """ + + result = self.losses(loss_function=loss_function, inputs=inputs, outputs=outputs, timeout=timeout, iterations=iterations) + + return CallResult(utils.columns_sum(result.value), result.has_finished, result.iterations_done) + + +class NeuralNetworkModuleMixin(typing.Generic[Inputs, Outputs, Params, Hyperparams, Module], metaclass=utils.GenericMetaclass): + """ + Mixin which provides an abstract method for connecting neural network + modules together. Mixin is parameterized with type variable ``Module``. + These modules can be either single layers, or they can be blocks of layers. + The construction of these modules is done by mapping the neural network + to the pipeline structure, where primitives (exposing modules through this + abstract method) are passed to followup layers through hyper-parameters. + The whole such structure is then passed for the final time as a hyper-parameter + to a training primitive which then builds the internal representation of the neural + network and trains it. + """ + + @abc.abstractmethod + def get_neural_network_module(self, *, input_module: Module) -> Module: + """ + Returns a neural network module corresponding to this primitive. That module + might be already connected to other modules, which can be done by + primitive calling this method recursively on other primitives. If this + is initial layer of the neural network, it input is provided through + ``input_module`` argument. + + Parameters + ---------- + input_module: + The input module to the initial layer of the neural network. + + Returns + ------- + The ``Module`` instance corresponding to this primitive. + """ + + +class NeuralNetworkObjectMixin(typing.Generic[Inputs, Outputs, Params, Hyperparams, Module], metaclass=utils.GenericMetaclass): + """ + Mixin which provides an abstract method which returns auxiliary objects for use + in representing neural networks as pipelines: loss functions, optimizers, etc. + + One should consider the use of other primitive metadata (primitive family, algorithm + types) to describe the primitive implementing this mixin and limit primitives + in hyper-parameters. + """ + + @abc.abstractmethod + def get_neural_network_object(self, module: Module) -> typing.Any: + """ + Returns a neural network object. The object is opaque from the perspective + of the pipeline. The caller is responsible to assure that the returned + object is of correct type and interface and that it is passed on to + a correct consumer understanding the object. + + Parameters + ---------- + module: + The module representing the neural network for which the object is requested. + It should be always provided even if particular implementation does not use it. + + Returns + ------- + An opaque object. + """ + + +def singleton(f: typing.Callable) -> typing.Callable: + """ + If a produce method is using this decorator, it is signaling that all outputs from the produce method are + sequences of length 1. This is useful because a caller can then directly extract this element. + + Example of such produce methods are produce methods of primitives which compute loss, which are returning + one number for multiple inputs. With this decorator they can return a sequence with this one number, but + caller which cares about the loss can extract it out. At the same time, other callers which operate + only on sequences can continue to operate normally. + + We can see other produce methods as mapping produce methods, and produce methods with this decorator as + reducing produce methods. + """ + + # Mark a produce method as a singleton. This is our custom flag. + f.__singleton__ = True # type: ignore + + return f + + +def inputs_across_samples(func: typing.Callable = None, inputs: typing.Sequence[str] = None, *args: str) -> typing.Callable: + """ + A produce method can use this decorator to signal which of the inputs (arguments) is using across + all samples and not sample by sample. + + For many produce methods it does not matter if it is called 100x on 1 sample or 1x on 100 samples, + but not all produce methods are like that and some produce results based on which all inputs were + given to them. If just a subset of inputs is given, results are different. An example of this is + ``produce_distance_matrix`` method which returns a NxN matrix where N is number of samples, computing + a distance from each sample to each other sample. + + When inputs have a primary key without uniqueness constraint, then "sample" for the purpose of + this decorator means all samples with the same primary key value. + + Decorator accepts a list of inputs which are used across all samples. By default, `inputs` + argument name is used. + """ + + if callable(func): + if inputs is None: + inputs = ('inputs',) + + # Make sure values are unique and sorted. + inputs = tuple(sorted(set(inputs))) + + # List inputs which a produce method computes across samples. This is our custom flag. + # That listed names are really argument names is checked during metadata generation. + func.__inputs_across_samples__ = inputs # type: ignore + + return func + + else: + def decorator(f): + # We do not have to call "functool.update_wrapper" or something similar + # because we are in fact returning the same function "f", just with + # set "__inputs_across_samples__" attribute + return inputs_across_samples(f, [s for s in [func, inputs] + list(args) if isinstance(s, str)]) + + return decorator + + +# We register additional immutable types. We are doing it this way to overcome issues with import cycles. +# This is a tricky one. Primitive instances are generally mutable, they can change state when they are used. +# But as part of hyper-parameters, they can be used as instances and are seen as immutable because the idea +# is that TA2 will make a copy of the primitive before passing it in as a hyper-parameter, leaving initial +# instance intact. +if PrimitiveBase not in utils.additional_immutable_types: + utils.additional_immutable_types += (PrimitiveBase,) diff --git a/d3m/d3m/primitive_interfaces/clustering.py b/d3m/d3m/primitive_interfaces/clustering.py new file mode 100644 index 0000000..f3caa19 --- /dev/null +++ b/d3m/d3m/primitive_interfaces/clustering.py @@ -0,0 +1,103 @@ +import abc +import typing + +from d3m import types, utils +from d3m.primitive_interfaces.base import * +from d3m.primitive_interfaces.transformer import TransformerPrimitiveBase +from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase + +__all__ = ('ClusteringLearnerPrimitiveBase', 'ClusteringTransformerPrimitiveBase', 'DistanceMatrixOutput', 'ClusteringDistanceMatrixMixin') + +DistanceMatrixOutput = typing.TypeVar('DistanceMatrixOutput', bound=typing.Union[types.Container]) # type: ignore + + +class ClusteringLearnerPrimitiveBase(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): + """ + A base class for primitives implementing a clustering algorithm which learns clusters. + """ + + @abc.abstractmethod + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + """ + ``produce`` method should return a membership map. + + A data structure that for each input sample tells to which cluster that sample was assigned to. So ``Outputs`` + should have the same number of samples than ``Inputs``, and the value at each output sample should represent + a cluster. Consider representing it with just a simple numeric identifier. + + Parameters + ---------- + inputs: + The inputs of shape [num_inputs, ...]. + timeout: + A maximum time this primitive should take to produce outputs during this method call, in seconds. + iterations: + How many of internal iterations should the primitive do. + + Returns + ------- + The outputs of shape [num_inputs, 1] wrapped inside ``CallResult`` for a simple numeric + cluster identifier. + """ + + +class ClusteringTransformerPrimitiveBase(TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + A base class for primitives implementing a clustering algorithm without learning any sort of model. + """ + + @abc.abstractmethod + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + """ + ``produce`` method should return a membership map. + + A data structure that for each input sample tells to which cluster that sample was assigned to. So ``Outputs`` + should have the same number of samples than ``Inputs``, and the value at each output sample should represent + a cluster. Consider representing it with just a simple numeric identifier. + + If an implementation of this method computes clusters based on the whole set of input samples, + use ``inputs_across_samples`` decorator to mark ``inputs`` as being computed across samples. + + Parameters + ---------- + inputs: + The inputs of shape [num_inputs, ...]. + timeout: + A maximum time this primitive should take to produce outputs during this method call, in seconds. + iterations: + How many of internal iterations should the primitive do. + + Returns + ------- + The outputs of shape [num_inputs, 1] wrapped inside ``CallResult`` for a simple numeric + cluster identifier. + """ + + +class ClusteringDistanceMatrixMixin(typing.Generic[Inputs, Outputs, Params, Hyperparams, DistanceMatrixOutput], metaclass=utils.GenericMetaclass): + @abc.abstractmethod + def produce_distance_matrix(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[DistanceMatrixOutput]: + """ + Semantics of this call are the same as the call to a regular ``produce`` method, just + that the output is a distance matrix instead of a membership map. + + Implementations of this method should use ``inputs_across_samples`` decorator to mark ``inputs`` + as being computed across samples. + + When this mixin is used with `ClusteringTransformerPrimitiveBase`, ``Params`` type variable should + be set to ``None``. + + Parameters + ---------- + inputs: + The inputs of shape [num_inputs, ...]. + timeout: + A maximum time this primitive should take to produce outputs during this method call, in seconds. + iterations: + How many of internal iterations should the primitive do. + + Returns + ------- + The distance matrix of shape [num_inputs, num_inputs, ...] wrapped inside ``CallResult``, where (i, j) element + of the matrix represent a distance between i-th and j-th sample in the inputs. + """ diff --git a/d3m/d3m/primitive_interfaces/distance.py b/d3m/d3m/primitive_interfaces/distance.py new file mode 100644 index 0000000..fd2e399 --- /dev/null +++ b/d3m/d3m/primitive_interfaces/distance.py @@ -0,0 +1,197 @@ +import abc +import typing + +from d3m import types +from d3m.primitive_interfaces.base import * +from d3m.primitive_interfaces.transformer import TransformerPrimitiveBase + +__all__ = ('PairwiseDistanceLearnerPrimitiveBase', 'PairwiseDistanceTransformerPrimitiveBase', 'InputLabels') + +InputLabels = typing.TypeVar('InputLabels', bound=typing.Union[types.Container]) # type: ignore + + +# Defining Generic with all type variables allows us to specify the order and an additional type variable. +class PairwiseDistanceLearnerPrimitiveBase(PrimitiveBase[Inputs, Outputs, Params, Hyperparams], typing.Generic[Inputs, InputLabels, Outputs, Params, Hyperparams]): + """ + A base class for primitives which learn distances (however defined) between two + different sets of instances. + + Class is parameterized using five type variables, ``Inputs``, ``InputLabels``, ``Outputs``, ``Params``, and ``Hyperparams``. + """ + + @abc.abstractmethod + def produce(self, *, inputs: Inputs, second_inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: # type: ignore + """ + Computes distance matrix between two sets of data. + + Implementations of this method should use ``inputs_across_samples`` decorator to mark ``inputs`` + and ``second_inputs`` as being computed across samples. + + Parameters + ---------- + inputs: + The first set of collections of instances. + second_inputs: + The second set of collections of instances. + timeout: + A maximum time this primitive should take to produce outputs during this method call, in seconds. + iterations: + How many of internal iterations should the primitive do. + + Returns + --------- + A n by m distance matrix describing the relationship between each instance in inputs[0] and each instance + in inputs[1] (n and m are the number of instances in inputs[0] and inputs[1], respectively), + wrapped inside ``CallResult``. + """ + + @abc.abstractmethod + def set_training_data(self, *, inputs: Inputs, input_labels: InputLabels) -> None: # type: ignore + """ + Sets training data of this primitive. + + Parameters + ---------- + inputs: + The inputs. + input_labels: + A set of class labels for the inputs. + """ + + def multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, second_inputs: Inputs, timeout: float = None, iterations: int = None) -> MultiCallResult: # type: ignore + """ + A method calling multiple produce methods at once. + + Parameters + ---------- + produce_methods: + A list of names of produce methods to call. + inputs: + The first set of collections of instances. + second_inputs: + The second set of collections of instances. + timeout: + A maximum time this primitive should take to produce outputs for all produce methods + listed in ``produce_methods`` argument, in seconds. + iterations: + How many of internal iterations should the primitive do. + + Returns + ------- + A dict of values for each produce method wrapped inside ``MultiCallResult``. + """ + + return self._multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs, second_inputs=second_inputs) + + def fit_multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, input_labels: InputLabels, + second_inputs: Inputs, timeout: float = None, iterations: int = None) -> MultiCallResult: # type: ignore + """ + A method calling ``fit`` and after that multiple produce methods at once. + + Parameters + ---------- + produce_methods: + A list of names of produce methods to call. + inputs: + The first set of collections of instances. + input_labels: + A set of class labels for the inputs. + second_inputs: + The second set of collections of instances. + timeout: + A maximum time this primitive should take to both fit the primitive and produce outputs + for all produce methods listed in ``produce_methods`` argument, in seconds. + iterations: + How many of internal iterations should the primitive do for both fitting and producing + outputs of all produce methods. + + Returns + ------- + A dict of values for each produce method wrapped inside ``MultiCallResult``. + """ + + return self._fit_multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs, input_labels=input_labels, second_inputs=second_inputs) + + +class PairwiseDistanceTransformerPrimitiveBase(TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + A base class for primitives which compute distances (however defined) between two + different sets of instances without learning any sort of model. + """ + + @abc.abstractmethod + def produce(self, *, inputs: Inputs, second_inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: # type: ignore + """ + Computes distance matrix between two sets of data. + + Implementations of this method should use ``inputs_across_samples`` decorator to mark ``inputs`` + and ``second_inputs`` as being computed across samples. + + Parameters + ---------- + inputs: + The first set of collections of instances. + second_inputs: + The second set of collections of instances. + timeout: + A maximum time this primitive should take to produce outputs during this method call, in seconds. + iterations: + How many of internal iterations should the primitive do. + + Returns + --------- + A n by m distance matrix describing the relationship between each instance in inputs[0] and each instance + in inputs[1] (n and m are the number of instances in inputs[0] and inputs[1], respectively), + wrapped inside ``CallResult``. + """ + + def multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, second_inputs: Inputs, timeout: float = None, iterations: int = None) -> MultiCallResult: # type: ignore + """ + A method calling multiple produce methods at once. + + Parameters + ---------- + produce_methods: + A list of names of produce methods to call. + inputs: + The first set of collections of instances. + second_inputs: + The second set of collections of instances. + timeout: + A maximum time this primitive should take to produce outputs for all produce methods + listed in ``produce_methods`` argument, in seconds. + iterations: + How many of internal iterations should the primitive do. + + Returns + ------- + A dict of values for each produce method wrapped inside ``MultiCallResult``. + """ + + return self._multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs, second_inputs=second_inputs) + + def fit_multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, second_inputs: Inputs, timeout: float = None, iterations: int = None) -> MultiCallResult: # type: ignore + """ + A method calling ``fit`` and after that multiple produce methods at once. + + Parameters + ---------- + produce_methods: + A list of names of produce methods to call. + inputs: + The first set of collections of instances. + second_inputs: + The second set of collections of instances. + timeout: + A maximum time this primitive should take to both fit the primitive and produce outputs + for all produce methods listed in ``produce_methods`` argument, in seconds. + iterations: + How many of internal iterations should the primitive do for both fitting and producing + outputs of all produce methods. + + Returns + ------- + A dict of values for each produce method wrapped inside ``MultiCallResult``. + """ + + return self._fit_multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs, second_inputs=second_inputs) diff --git a/d3m/d3m/primitive_interfaces/featurization.py b/d3m/d3m/primitive_interfaces/featurization.py new file mode 100644 index 0000000..4765e23 --- /dev/null +++ b/d3m/d3m/primitive_interfaces/featurization.py @@ -0,0 +1,22 @@ +from d3m.primitive_interfaces.base import * +from d3m.primitive_interfaces.transformer import TransformerPrimitiveBase + +__all__ = ('FeaturizationLearnerPrimitiveBase', 'FeaturizationTransformerPrimitiveBase') + + +class FeaturizationLearnerPrimitiveBase(PrimitiveBase[Inputs, Outputs, Params, Hyperparams]): + """ + A base class for primitives which transform raw data into a more usable form. + + Use this version for featurizers that allow for fitting (for domain-adaptation, data-specific deep + learning, etc.). Otherwise use `FeaturizationTransformerPrimitiveBase`. + """ + + +class FeaturizationTransformerPrimitiveBase(TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + A base class for primitives which transform raw data into a more usable form. + + Use this version for featurizers that do not require or allow any fitting, and simply + transform data on demand. Otherwise use `FeaturizationLearnerPrimitiveBase`. + """ diff --git a/d3m/d3m/primitive_interfaces/generator.py b/d3m/d3m/primitive_interfaces/generator.py new file mode 100644 index 0000000..a44c383 --- /dev/null +++ b/d3m/d3m/primitive_interfaces/generator.py @@ -0,0 +1,62 @@ +import abc +import typing + +from d3m import container +from d3m.primitive_interfaces.base import * + +__all__ = ('GeneratorPrimitiveBase',) + + +class GeneratorPrimitiveBase(PrimitiveBase[container.List, Outputs, Params, Hyperparams]): + """ + A base class for primitives which have to be fitted before they can start + producing (useful) outputs, but they are fitted only on output data. + Moreover, they do not accept any inputs to generate outputs, + which is represented as a sequence (list) of non-negative integer values + to ``produce`` method, only to signal how many outputs are requested, and + which one from the potential set of outputs. + + The list of integer values to ``produce`` method provides support for batching. + A caller does not have to rely on the order in which the primitive is called + but can specify the index of the requested output. + + This class is parameterized using only by three type variables, + ``Outputs``, ``Params``, and ``Hyperparams``. + """ + + @abc.abstractmethod + def set_training_data(self, *, outputs: Outputs) -> None: # type: ignore + """ + Sets training data of this primitive. + + Parameters + ---------- + outputs: + The outputs. + """ + + def fit_multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: container.List, outputs: Outputs, timeout: float = None, iterations: int = None) -> MultiCallResult: + """ + A method calling ``fit`` and after that multiple produce methods at once. + + Parameters + ---------- + produce_methods: + A list of names of produce methods to call. + inputs: + The inputs given to all produce methods. + outputs: + The outputs given to ``set_training_data``. + timeout: + A maximum time this primitive should take to both fit the primitive and produce outputs + for all produce methods listed in ``produce_methods`` argument, in seconds. + iterations: + How many of internal iterations should the primitive do for both fitting and producing + outputs of all produce methods. + + Returns + ------- + A dict of values for each produce method wrapped inside ``MultiCallResult``. + """ + + return self._fit_multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs, outputs=outputs) # type: ignore diff --git a/d3m/d3m/primitive_interfaces/supervised_learning.py b/d3m/d3m/primitive_interfaces/supervised_learning.py new file mode 100644 index 0000000..74efec4 --- /dev/null +++ b/d3m/d3m/primitive_interfaces/supervised_learning.py @@ -0,0 +1,10 @@ +from d3m.primitive_interfaces.base import * + +__all__ = ('SupervisedLearnerPrimitiveBase',) + + +class SupervisedLearnerPrimitiveBase(PrimitiveBase[Inputs, Outputs, Params, Hyperparams]): + """ + A base class for primitives which have to be fitted on both input and output data + before they can start producing (useful) outputs from inputs. + """ diff --git a/d3m/d3m/primitive_interfaces/transformer.py b/d3m/d3m/primitive_interfaces/transformer.py new file mode 100644 index 0000000..efed13e --- /dev/null +++ b/d3m/d3m/primitive_interfaces/transformer.py @@ -0,0 +1,71 @@ +import typing + +from d3m.primitive_interfaces.base import * + +__all__ = ('TransformerPrimitiveBase',) + + +class TransformerPrimitiveBase(PrimitiveBase[Inputs, Outputs, None, Hyperparams]): + """ + A base class for primitives which are not fitted at all and can + simply produce (useful) outputs from inputs directly. As such they + also do not have any state (params). + + This class is parameterized using only three type variables, ``Inputs``, + ``Outputs``, and ``Hyperparams``. + """ + + def set_training_data(self) -> None: # type: ignore + """ + A noop. + + Parameters + ---------- + """ + + return + + def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: + """ + A noop. + """ + + return CallResult(None) + + def get_params(self) -> None: + """ + A noop. + """ + + return None + + def set_params(self, *, params: None) -> None: + """ + A noop. + """ + + return + + def fit_multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, timeout: float = None, iterations: int = None) -> MultiCallResult: # type: ignore + """ + A method calling ``fit`` and after that multiple produce methods at once. + + Parameters + ---------- + produce_methods: + A list of names of produce methods to call. + inputs: + The inputs given to all produce methods. + timeout: + A maximum time this primitive should take to both fit the primitive and produce outputs + for all produce methods listed in ``produce_methods`` argument, in seconds. + iterations: + How many of internal iterations should the primitive do for both fitting and producing + outputs of all produce methods. + + Returns + ------- + A dict of values for each produce method wrapped inside ``MultiCallResult``. + """ + + return self._fit_multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs) diff --git a/d3m/d3m/primitive_interfaces/unsupervised_learning.py b/d3m/d3m/primitive_interfaces/unsupervised_learning.py new file mode 100644 index 0000000..796215d --- /dev/null +++ b/d3m/d3m/primitive_interfaces/unsupervised_learning.py @@ -0,0 +1,48 @@ +import abc +import typing + +from d3m.primitive_interfaces.base import * + +__all__ = ('UnsupervisedLearnerPrimitiveBase',) + + +class UnsupervisedLearnerPrimitiveBase(PrimitiveBase[Inputs, Outputs, Params, Hyperparams]): + """ + A base class for primitives which have to be fitted before they can start + producing (useful) outputs from inputs, but they are fitted only on input data. + """ + + @abc.abstractmethod + def set_training_data(self, *, inputs: Inputs) -> None: # type: ignore + """ + Sets training data of this primitive. + + Parameters + ---------- + inputs: + The inputs. + """ + + def fit_multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, timeout: float = None, iterations: int = None) -> MultiCallResult: # type: ignore + """ + A method calling ``fit`` and after that multiple produce methods at once. + + Parameters + ---------- + produce_methods: + A list of names of produce methods to call. + inputs: + The inputs given to ``set_training_data`` and all produce methods. + timeout: + A maximum time this primitive should take to both fit the primitive and produce outputs + for all produce methods listed in ``produce_methods`` argument, in seconds. + iterations: + How many of internal iterations should the primitive do for both fitting and producing + outputs of all produce methods. + + Returns + ------- + A dict of values for each produce method wrapped inside ``MultiCallResult``. + """ + + return self._fit_multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs) diff --git a/d3m/d3m/runtime.py b/d3m/d3m/runtime.py new file mode 100644 index 0000000..4c98310 --- /dev/null +++ b/d3m/d3m/runtime.py @@ -0,0 +1,2911 @@ +import argparse +import inspect +import json +import logging +import os +import os.path +import pickle +import re +import sys +import tempfile +import traceback +import typing +import uuid + +import jsonschema # type: ignore +import frozendict # type: ignore +import pandas # type: ignore + +from d3m import container, deprecate, exceptions, types, utils +from d3m.container import dataset as dataset_module +from d3m.container import utils as container_utils +from d3m.metadata import base as metadata_base, hyperparams as hyperparams_module, pipeline as pipeline_module, pipeline_run as pipeline_run_module, problem +from d3m.primitive_interfaces import base + +logger = logging.getLogger(__name__) + +DEFAULT_SCORING_PIPELINE_ID = 'f596cd77-25f8-4d4c-a350-bb30ab1e58f6' +DEFAULT_SCORING_PIPELINE_PATH = os.path.join( + os.path.dirname(__file__), 'contrib', 'pipelines', DEFAULT_SCORING_PIPELINE_ID + '.yml', +) + +DATASET_ID_REGEX = re.compile('(_TRAIN|_TEST|_SCORE)$') + + +class Result: + """ + Results from running a pipeline. + + Parameters + ---------- + pipeline_run: + A pipeline run description. + values: + A map between data references and their values computed during pipeline run. + error: + If during a run an exception occurred, then it is available here. + """ + + def __init__(self, pipeline_run: pipeline_run_module.PipelineRun, values: typing.Dict[str, typing.Any], error: Exception = None) -> None: + self.pipeline_run = pipeline_run + self.values = values + self.error = error + + def has_error(self) -> bool: + """ + Returns ``True`` if pipeline has not successfully finished. + """ + + return self.error is not None + + def check_success(self) -> None: + """ + Throws an exception if pipeline has not successfully finished. + """ + + if self.has_error(): + raise self.error + + +class MultiResult(typing.List[Result]): + """ + Results of running a pipeline multiple times. + """ + + @property + def pipeline_runs(self) -> typing.Sequence[pipeline_run_module.PipelineRun]: + return [result.pipeline_run for result in self] + + def has_error(self) -> bool: + """ + Returns ``True`` if any of pipelines has not successfully finished. + """ + + return any(result.has_error() for result in self) + + def check_success(self) -> None: + """ + Throws an exception if pipeline has not successfully finished in any of the runs. + """ + + for result in self: + result.check_success() + + +def get_singleton_value(value: typing.Any) -> typing.Any: + """ + A helper to extract a value from a singleton value (extracting a sole element of a + container of length 1). + """ + + if isinstance(value, pandas.DataFrame): + # Fetch the row as a list. This assures different columns can be of a different type. + singleton_value = container.List([value.iloc[0, k] for k in range(len(value.columns))]) + else: + singleton_value = value[0] + + if isinstance(singleton_value, types.Container): + singleton_value.metadata = metadata_base.DataMetadata() + singleton_value.metadata = value.metadata.copy_to( + singleton_value.metadata, + (0,), + ) + # TODO: We should also remove table metadata which might not hold true anymore. + # If original value was tabular, we now copied also metadata about tabular column dimension, + # but that is not true anymore for this singleton value, it is not tabular anymore. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/336 + singleton_value.metadata = singleton_value.metadata.generate(singleton_value) + + return singleton_value + + +# TODO: Add debug logging to the runtime. +class Runtime: + """ + Reference runtime to fit and produce a pipeline. + + Parameters + ---------- + pipeline: + A pipeline to run. + hyperparams: + Values for free hyper-parameters of the pipeline. It should be a list, where each element corresponds + to free hyper-parameters of the corresponding pipeline step. Not all free hyper-parameters have to be + specified. Default values are used for those which are not. Optional. + problem_description: + A parsed problem description in standard problem description schema. + context: + In which context to run pipelines, default is ``TESTING``. + random_seed: + A random seed to use for every run. This control all randomness during the run. + volumes_dir: + Path to a directory with static files required by primitives. + In the standard directory structure (as obtained running ``python3 -m d3m index download``). + scratch_dir: + Path to a directory to store any temporary files needed during execution. + is_standard_pipeline: + Is the pipeline a standard pipeline? + environment: + A description of the runtime environment, including engine versions, + Docker images, compute resources, and benchmarks. If not provided, + an attempt is made to determine it automatically. + users: + Users associated with running the pipeline. + + Attributes + ---------- + pipeline: + A pipeline to run. + hyperparams: + Values for free hyper-parameters of the pipeline. It should be a list, where each element corresponds + to free hyper-parameters of the corresponding pipeline step. Not all free hyper-parameters have to be + specified. Default values are used for those which are not. Optional. + problem_description: + A parsed problem description in standard problem description schema. + context: + In which context to run pipelines, default is ``TESTING``. + random_seed: + A random seed to use for every run. This control all randomness during the run. + volumes_dir: + Path to a directory with static files required by primitives. + In the standard directory structure (as obtained running ``python3 -m d3m index download``). + scratch_dir: + Path to a directory to store any temporary files needed during execution. + is_standard_pipeline: + Is the pipeline a standard pipeline? + environment: + A description of the runtime environment, including engine versions, + Docker images, compute resources, and benchmarks. If not provided, + an attempt is made to determine it automatically. + users: + Users associated with running the pipeline. + current_step: + Which step is currently being ran. + phase: + Which phase are we currently running. + pipeline_run: + A current instance of pipeline run. + return_values: + Which values should the runtime keep during a pipeline run, even after they are necessary. + data_values: + Map between available data references and their values during the run. + steps_state: + Fitted state for each step of the pipeline. + """ + + pipeline: pipeline_module.Pipeline + hyperparams: typing.Sequence + problem_description: problem.Problem + context: metadata_base.Context + random_seed: int + volumes_dir: str + scratch_dir: str + is_standard_pipeline: bool + environment: pipeline_run_module.RuntimeEnvironment + users: typing.Sequence[pipeline_run_module.User] + current_step: int + phase: metadata_base.PipelineRunPhase + pipeline_run: pipeline_run_module.PipelineRun + return_values: typing.Sequence[str] + data_values: typing.Dict[str, typing.Any] + steps_state: typing.List[typing.Union[typing.Any, typing.List]] + + def __init__( + self, pipeline: pipeline_module.Pipeline, hyperparams: typing.Sequence = None, *, + problem_description: problem.Problem = None, context: metadata_base.Context, + random_seed: int = 0, volumes_dir: str = None, scratch_dir: str = None, + is_standard_pipeline: bool = False, environment: pipeline_run_module.RuntimeEnvironment = None, + users: typing.Sequence[pipeline_run_module.User] = None, + ) -> None: + self.pipeline = pipeline + self.hyperparams = hyperparams + self.problem_description = problem_description + self.context = context + self.random_seed = random_seed + self.volumes_dir = volumes_dir + self.scratch_dir = scratch_dir + self.is_standard_pipeline = is_standard_pipeline + self.users = users + + if environment is None: + self.environment = pipeline_run_module.RuntimeEnvironment() + else: + self.environment = environment + + # Preliminary check. + self.pipeline.check(allow_placeholders=False, standard_pipeline=self.is_standard_pipeline) + + if self.hyperparams is not None: + self._check_hyperparams(self.pipeline, self.hyperparams) + + self.steps_state: typing.List[typing.Union[typing.Any, typing.List, None]] = [None for step in self.pipeline.steps] + + self._previous_pipeline_run: pipeline_run_module.PipelineRun = None + + self._initialize_run_state([], None, None) + + def _initialize_data_values(self, inputs: typing.Sequence[typing.Any]) -> None: + # TODO: Remove values from the "data_values" once they are not needed anymore to optimize memory use. + self.data_values: typing.Dict[str, typing.Any] = {} + + if self.phase is None: + return + + marked_problem_inputs: typing.Set[int] = set() + if self.problem_description is None: + problem_inputs: typing.List[typing.Dict] = [] + else: + problem_inputs = self.problem_description.get('inputs', []) + + for i, input_value in enumerate(inputs): + if isinstance(input_value, container.Dataset): + if problem_inputs: + input_value, marked_problem_indices = self._mark_columns(problem_inputs, input_value) + marked_problem_inputs.update(marked_problem_indices) + else: + # All standard pipeline inputs should be Datasets. + assert not self.is_standard_pipeline + + self.data_values['inputs.{i}'.format(i=i)] = input_value + + if len(marked_problem_inputs) != len(problem_inputs): + unmarked_problem_inputs = sorted(set(range(len(problem_inputs))) - marked_problem_inputs) + + raise exceptions.InvalidProblemError( + "Not all problem description inputs could be applied to input datasets: {inputs}".format( + inputs=', '.join(str(problem_inputs[unmarked_problem_input]) for unmarked_problem_input in unmarked_problem_inputs), + ) + ) + + def _clear_data_values(self) -> None: + self.data_values = {} + + def _initialize_run_state( + self, inputs: typing.Sequence[typing.Any], + phase: typing.Optional[metadata_base.PipelineRunPhase], + return_values: typing.Optional[typing.Sequence[str]], + ) -> None: + self.current_step = 0 + self.phase = phase + + if return_values is None: + self.return_values = self._get_all_outputs() + else: + # We sort "return_values" to have deterministic order. + self.return_values = sorted(set(return_values)) + + self._initialize_data_values(inputs) + + self._initialize_base_temporary_directory() + + self._initialize_pipeline_run() + + def _get_all_outputs(self) -> typing.Sequence[str]: + return ['outputs.{i}'.format(i=i) for i, output_description in enumerate(self.pipeline.outputs)] + + def _clear_run_state(self) -> None: + """ + After a pipeline run, we clear state which was necessary while pipeline was running, but it is not needed anymore. + """ + + # We keep "steps_state" so that we can produce. + + self.current_step = 0 + self.phase = None + self.return_values = None + + self._clear_data_values() + self._clear_base_temporary_directory() + self._clear_pipeline_run() + + def _check_hyperparams(self, pipeline: pipeline_module.Pipeline, hyperparams: typing.Sequence) -> None: + """ + Check provided values for free hyper-parameters. + """ + + if not utils.is_sequence(hyperparams): + raise exceptions.InvalidArgumentTypeError("Hyper-parameter values for the pipeline '{pipeline_id}' is not a sequence.".format( + pipeline_id=pipeline.id, + )) + + if len(hyperparams) != len(pipeline.steps): + raise exceptions.InvalidArgumentValueError( + "Hyper-parameter values for the pipeline '{pipeline_id}' do not match the number of steps in the pipeline: {hyperparams_steps} vs. {pipeline_steps}".format( + pipeline_id=pipeline.id, + hyperparams_steps=len(hyperparams), + pipeline_steps=len(pipeline.steps), + ), + ) + + for step_index, (hyperparams_for_step, step) in enumerate(zip(hyperparams, pipeline.steps)): + # Placeholder step is not really allowed, but we have it here for completeness. + # Its "get_free_hyperparams" returns an empty list. + if isinstance(step, pipeline_module.PlaceholderStep): + if not utils.is_sequence(hyperparams_for_step): + raise exceptions.InvalidArgumentTypeError("Hyper-parameter values for placeholder step {step_index} of pipeline '{pipeline_id}' is not a sequence.".format( + step_index=step_index, + pipeline_id=pipeline.id, + )) + + elif isinstance(step, pipeline_module.SubpipelineStep): + self._check_hyperparams(step.pipeline, hyperparams_for_step) + + elif isinstance(step, pipeline_module.PrimitiveStep): + if not isinstance(hyperparams_for_step, (dict, frozendict.frozendict)): + raise exceptions.InvalidArgumentTypeError("Hyper-parameter values for primitive step {step_index} of pipeline '{pipeline_id}' is not a dict.".format( + step_index=step_index, + pipeline_id=pipeline.id, + )) + + hyperparams_for_step_keys = set(hyperparams_for_step.keys()) + free_hyperparams_keys = set(step.get_free_hyperparams().keys()) + all_hyperparams_keys = set(step.get_all_hyperparams().keys()) + + if hyperparams_for_step_keys - all_hyperparams_keys: + raise exceptions.InvalidArgumentValueError( + "Hyper-parameter values for primitive step {step_index} of pipeline '{pipeline_id}' contain values for non-existent hyper-parameters: {hyperparams}".format( + step_index=step_index, + pipeline_id=pipeline.id, + hyperparams=sorted(hyperparams_for_step_keys - all_hyperparams_keys), + ), + ) + elif hyperparams_for_step_keys - free_hyperparams_keys: + raise exceptions.InvalidArgumentValueError( + "Hyper-parameter values for primitive step {step_index} of pipeline '{pipeline_id}' are overriding hyper-parameters fixed in the pipeline: {hyperparams}".format( + step_index=step_index, + pipeline_id=pipeline.id, + hyperparams=sorted(hyperparams_for_step_keys - free_hyperparams_keys), + ), + ) + + def _get_pipeline_run_class(self) -> typing.Type[pipeline_run_module.PipelineRun]: + return pipeline_run_module.PipelineRun + + def _initialize_pipeline_run(self) -> None: + if self.phase is None: + self.pipeline_run = None + return + + self.pipeline_run = self._get_pipeline_run_class()( + pipeline=self.pipeline, + problem_description=self.problem_description, + phase=self.phase, + context=self.context, + previous_pipeline_run=self._previous_pipeline_run, + environment=self.environment, + random_seed=self.random_seed, + is_standard_pipeline=self.is_standard_pipeline, + users=self.users + ) + + input_values = [] + for i, input_value in sorted((int(data_reference.split('.')[1]), input_value) for data_reference, input_value in self.data_values.items() if data_reference.startswith('inputs.')): + input_values.append(input_value) + + all_input_values_datasets = all(isinstance(input_value, container.Dataset) for input_value in input_values) + assert all_input_values_datasets or not self.is_standard_pipeline + + # Even if the pipeline is not a standard pipeline, we still record Dataset inputs (if all are Dataset inputs) + # into pipeline run to allow generation of pipeline runs for a subset of non-standard pipelines, especially + # those computing metafeatures. Because having inputs recorded is required for a pipeline run, any other + # (for other types of inputs) pipeline run is not a valid stand-alone pipeline run and you get an error if + # you want to serialize it to JSON. This is on purpose. (We could have a better error message though.) + # You can still build a pipeline run object for non-standard pipelines. This is being used for data + # preparation or scoring pipelines. + # See: https://gitlab.com/datadrivendiscovery/metalearning/issues/64 + if all_input_values_datasets: + for input_value in input_values: + self.pipeline_run.add_input_dataset(input_value) + + def _clear_pipeline_run(self) -> None: + self.pipeline_run = None + + def _initialize_base_temporary_directory(self) -> None: + if self.phase is None: + self._base_temporary_directory = None + self._base_temporary_directory_path = None + return + + self._base_temporary_directory = tempfile.TemporaryDirectory(dir=self.scratch_dir) + self._base_temporary_directory_path = os.path.abspath(self._base_temporary_directory.name) + + def _clear_base_temporary_directory(self) -> None: + if self._base_temporary_directory is not None: + self._base_temporary_directory.cleanup() + self._base_temporary_directory = None + self._base_temporary_directory_path = None + + def _check_pipeline(self, inputs: typing.Sequence[typing.Any]) -> None: + """ + Check with known inputs. + """ + + input_types = {} + for i, input_value in enumerate(inputs): + input_types['inputs.{i}'.format(i=i)] = type(input_value) + + self.pipeline.check(allow_placeholders=False, standard_pipeline=self.is_standard_pipeline, input_types=input_types) + + def _run_placeholder(self, step: pipeline_module.PlaceholderStep) -> None: + raise exceptions.InvalidPipelineError("Step {step_index} of pipeline '{pipeline_id}' is a placeholder but there should be no placeholders.".format( + step_index=self.current_step, + pipeline_id=self.pipeline.id, + )) + + # TODO: Make return type be equal to the current's class type, so that it adapts if this class is subclassed. + def _create_subpipeline(self, pipeline: pipeline_module.Pipeline, hyperparams: typing.Optional[typing.Sequence]) -> 'Runtime': + """ + Creates an instance of the subpipeline's runtime. + """ + + # We change the random seed in a deterministic way so that it does not matter in which order we run steps. + # Subpipelines are generally not a standard pipeline. + return type(self)( + pipeline, + hyperparams, + # TODO: Should we pass "problem_description" as well, but make it so that it does not try to mark columns again? + problem_description=None, + context=self.context, + random_seed=self.random_seed + self.current_step, + volumes_dir=self.volumes_dir, + scratch_dir=self.scratch_dir, + is_standard_pipeline=False, + environment=self.environment, + users=self.users, + ) + + def _run_subpipeline(self, step: pipeline_module.SubpipelineStep) -> None: + if step.pipeline is None: + raise exceptions.InvalidPipelineError("Pipeline has not been resolved.") + + subpipeline_inputs: typing.List[typing.Any] = [] + for i, data_reference in enumerate(step.inputs): + subpipeline_inputs.append(self.data_values[data_reference]) + + if self.hyperparams is not None: + hyperparams = self.hyperparams[self.current_step] + + # We checked this already in "_check_hyperparams". + assert utils.is_sequence(hyperparams), hyperparams + else: + hyperparams = None + + subpipeline = self._create_subpipeline(step.pipeline, hyperparams) + + if self.phase == metadata_base.PipelineRunPhase.FIT: + assert self.steps_state[self.current_step] is None + else: + subpipeline.set_params(typing.cast(typing.List, self.steps_state[self.current_step])) + + return_values_map = {} + return_values = set() + for i, output_id in enumerate(step.outputs): + # "output_id" can be "None" if this output is not used and should be skipped. + if output_id is not None: + data_reference = 'outputs.{i}'.format(i=i) + return_values.add(data_reference) + return_values_map['steps.{i}.{output_id}'.format(i=step.index, output_id=output_id)] = data_reference + + step_reference_prefix = 'steps.{i}.'.format(i=step.index) + for return_value in self.return_values: + # We process recursive data references for this subpipeline. + # We check that "return_value" is not in "return_values_map" because data + # references of the format "steps.{i}.{output_id}" have "step_reference_prefix" + # as a prefix but are not really a recursive data reference. + # But all references of that format are already in "return_values_map". + if return_value.startswith(step_reference_prefix) and return_value not in return_values_map: + data_reference = return_value[len(step_reference_prefix):] + # Data reference at this point should contain at least one dot, because all with the prefix + # which do not contain a dot we filtered out by checking them against "return_values_map". + assert '.' in data_reference, data_reference + return_values.add(data_reference) + return_values_map[return_value] = data_reference + + # We sort "return_values" to have deterministic order. + result = subpipeline._run(subpipeline_inputs, self.phase, return_values=sorted(return_values)) + self.pipeline_run.add_subpipeline_step(result.pipeline_run) + result.check_success() + + if self.phase == metadata_base.PipelineRunPhase.FIT: + assert self.steps_state[self.current_step] is None + self.steps_state[self.current_step] = subpipeline.get_params() + + for step_data_reference, subpipeline_data_reference in return_values_map.items(): + self.data_values[step_data_reference] = result.values[subpipeline_data_reference] + + def _get_singleton_value(self, value: typing.Any, is_argument: bool, name: str) -> typing.Any: + """ + A helper to extract a value from a singleton value (extracting a sole element of a + container of length 1). + """ + + if len(value) != 1: + if is_argument: + raise exceptions.InvalidPipelineError( + "Argument '{argument_name}' of step {step_index} of pipeline '{pipeline_id}' is singleton data, but available data is not.".format( + argument_name=name, + step_index=self.current_step, + pipeline_id=self.pipeline.id, + ), + ) + else: + raise exceptions.InvalidPipelineError( + "Hyper-parameter '{hyperparameter_name}' of step {step_index} of pipeline '{pipeline_id}' is singleton data, but available data is not.".format( + hyperparameter_name=name, + step_index=self.current_step, + pipeline_id=self.pipeline.id, + ), + ) + + return get_singleton_value(value) + + def _prepare_primitive_arguments(self, step: pipeline_module.PrimitiveStep) -> typing.Dict[str, typing.Any]: + arguments = {} + for argument_name, argument_description in step.arguments.items(): + + if argument_description['type'] == metadata_base.ArgumentType.DATA: + argument_value = self.data_values[argument_description['data']] + # We have to extract a singleton value out. + argument_value = self._get_singleton_value(argument_value, True, argument_name) + + elif argument_description['type'] == metadata_base.ArgumentType.CONTAINER: + if utils.is_sequence(argument_description['data']): + values = [self.data_values[data_reference] for data_reference in argument_description['data']] + # We have to create a container List. + argument_value = self._get_list_value(values) + else: + argument_value = self.data_values[argument_description['data']] + + else: + raise exceptions.UnexpectedValueError("Unknown argument type: {argument_type}".format(argument_type=argument_description['type'])) + + arguments[argument_name] = argument_value + + return arguments + + def _get_list_value(self, values: typing.Sequence) -> container.List: + """ + Creates a container List from ``values``. It reuses existing metadata in ``values`` + to create metadata of the container List. + """ + + container_list = container.List(values, { + 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, + 'structural_type': container.List, + 'dimension': { + 'length': len(values), + }, + }) + + for value_index, value in enumerate(values): + container_list.metadata = value.metadata.copy_to(container_list.metadata, (), (value_index,)) + + return container_list + + def _get_default_hyperparams(self, step: pipeline_module.PrimitiveStep) -> hyperparams_module.Hyperparams: + return step.get_primitive_hyperparams().defaults() + + def _get_runtime_hyperparams(self, step: pipeline_module.PrimitiveStep) -> typing.Dict: + if self.hyperparams is not None: + runtime_hyperparams = self.hyperparams[self.current_step] + + # We checked this already in "_check_hyperparams". + assert isinstance(runtime_hyperparams, (dict, frozendict.frozendict)), runtime_hyperparams + else: + runtime_hyperparams = {} + + return runtime_hyperparams + + def _get_pipeline_hyperparams(self, step: pipeline_module.PrimitiveStep) -> typing.Dict: + pipeline_hyperparams = {} + for hyperparameter_name, hyperparameter_description in step.hyperparams.items(): + if hyperparameter_description['type'] == metadata_base.ArgumentType.DATA: + if utils.is_sequence(hyperparameter_description['data']): + pipeline_hyperparams[hyperparameter_name] = [ + self._get_singleton_value(self.data_values[data_reference], False, hyperparameter_name) + for data_reference in hyperparameter_description['data'] + ] + else: + pipeline_hyperparams[hyperparameter_name] = self._get_singleton_value(self.data_values[hyperparameter_description['data']], False, hyperparameter_name) + + elif hyperparameter_description['type'] == metadata_base.ArgumentType.PRIMITIVE: + if utils.is_sequence(hyperparameter_description['data']): + primitive_references = hyperparameter_description['data'] + else: + primitive_references = typing.cast(typing.Sequence, [hyperparameter_description['data']]) + + primitives = [] + for primitive_reference in primitive_references: + # We make an instance of a primitive which is almost the same as the pipeline primitive + # (see "_create_pipeline_primitive"), but with a different random seed because of a different + # "current_step". Then we clone it (using "_clone_primitive") in "_handle_primitive_hyperparams" + # which uses the final random seed. This way we are handling all primitives in hyper-parameters + # the same no matter the source (it could be somebody somehow passes a primitive instance through + # produce method's output or something). + # TODO: See if an optimization (no additional clone) here is needed and how hard is to implement it. + # TODO: Try to re-use existing primitive instances. + # We currently do not store primitive instances of prior steps, but we could those we know we + # will need in later steps and then just use them here, instead of creating them from scratch. + primitive = self._create_primitive_reference_primitive(primitive_reference, hyperparameter_name) + primitives.append(primitive) + + if utils.is_sequence(hyperparameter_description['data']): + pipeline_hyperparams[hyperparameter_name] = primitives + else: + assert len(primitives) == 1 + + pipeline_hyperparams[hyperparameter_name] = primitives[0] # type: ignore + + elif hyperparameter_description['type'] == metadata_base.ArgumentType.CONTAINER: + pipeline_hyperparams[hyperparameter_name] = self.data_values[hyperparameter_description['data']] + + elif hyperparameter_description['type'] == metadata_base.ArgumentType.VALUE: + pipeline_hyperparams[hyperparameter_name] = hyperparameter_description['data'] + + else: + raise exceptions.UnexpectedValueError("Unknown hyper-parameter type: {hyperparameter_type}".format(hyperparameter_type=hyperparameter_description['type'])) + + return pipeline_hyperparams + + def _prepare_primitive_hyperparams(self, step: pipeline_module.PrimitiveStep) -> typing.Tuple[hyperparams_module.Hyperparams, typing.Dict]: + default_hyperparams = self._get_default_hyperparams(step) + pipeline_hyperparams = self._get_pipeline_hyperparams(step) + runtime_hyperparams = self._get_runtime_hyperparams(step) + + # Pipeline hyper-parameters should be disjoint with runtime hyper-parameters. + # We check this in "_check_hyperparams" call from the constructor. + assert set(pipeline_hyperparams.keys()).isdisjoint(set(runtime_hyperparams.keys())), (pipeline_hyperparams, runtime_hyperparams) + + hyperparams = default_hyperparams.replace(pipeline_hyperparams).replace(runtime_hyperparams) + + # We have to handle all primitive values present in hyper-parameters. + return self._handle_primitive_hyperparams(hyperparams, 0), pipeline_hyperparams + + def _filter_arguments(self, primitive_class: typing.Type[base.PrimitiveBase], method_name: str, arguments: typing.Dict[str, typing.Any]) -> typing.Dict[str, typing.Any]: + """ + Primitive as a whole gets arguments for all its methods, so here we then filter out + only those arguments expected by a given method. + """ + + method_arguments = primitive_class.metadata.query()['primitive_code'].get('instance_methods', {}).get(method_name, {}).get('arguments', []) + + filtered_arguments = {} + for argument_name in method_arguments: + if argument_name in arguments: + filtered_arguments[argument_name] = arguments[argument_name] + + return filtered_arguments + + def _get_primitive_volumes(self, primitive_class: typing.Type[base.PrimitiveBase]) -> typing.Dict: + volumes = {} + for entry in primitive_class.metadata.get_volumes(): + if self.volumes_dir is None: + raise exceptions.InvalidArgumentValueError( + "Primitive '{primitive_id}' of step {step_index} of pipeline '{pipeline_id}' requires static files (volumes) but volumes are not available.".format( + primitive_id=primitive_class.metadata.query()['id'], + step_index=self.current_step, + pipeline_id=self.pipeline.id, + ), + ) + + volume_path = os.path.join(self.volumes_dir, entry['file_digest']) + if not os.path.exists(volume_path): + raise exceptions.InvalidArgumentValueError( + "Primitive '{primitive_id}' of step {step_index} of pipeline '{pipeline_id}' requires static files (volume) but volume for key '{key}' is not available.".format( + primitive_id=primitive_class.metadata.query()['id'], + step_index=self.current_step, + pipeline_id=self.pipeline.id, + key=entry['key'], + ), + ) + + volumes[entry['key']] = volume_path + + return volumes + + def _get_primitive_temporary_directory(self, primitive_class: typing.Type[base.PrimitiveBase]) -> str: + return tempfile.mkdtemp(dir=self._base_temporary_directory_path) + + def _create_primitive_arguments(self, primitive_class: typing.Type[base.PrimitiveBase], hyperparams: hyperparams_module.Hyperparams, random_seed_offset: int) -> typing.Dict: + constructor_arguments = { + 'hyperparams': hyperparams, + # We change the random seed in a deterministic way so that it does not matter in which order we run steps. + 'random_seed': self.random_seed + self.current_step + random_seed_offset, + 'volumes': self._get_primitive_volumes(primitive_class), + 'temporary_directory': self._get_primitive_temporary_directory(primitive_class), + } + + filtered_arguments = self._filter_arguments(primitive_class, '__init__', constructor_arguments) + + return filtered_arguments + + def _create_primitive(self, primitive_class: typing.Type[base.PrimitiveBase], hyperparams: hyperparams_module.Hyperparams, random_seed_offset: int) -> base.PrimitiveBase: + """ + Creates an instance of a non-pipeline primitive. + + Constructor call is not recorded in pipeline run. + """ + + arguments = self._create_primitive_arguments(primitive_class, hyperparams, random_seed_offset) + + return primitive_class(**arguments) + + def _clone_primitive(self, primitive: base.PrimitiveBase, random_seed_offset: int) -> base.PrimitiveBase: + """ + Clone a primitive. It reuses hyper-parameters and params, but provides a + potentially different random seed and other constructor arguments. + + We are creating a new instance and not a deep copy because primitive instance might have + been created outside of the runtime and might not have valid constructor argument values. + """ + + # We have to handle all primitive values present in hyper-parameters. + # They are all already an instance, but we have to make their copies. + hyperparams = self._handle_primitive_hyperparams(primitive.hyperparams, random_seed_offset + 1) + + primitive_clone = self._create_primitive(type(primitive), hyperparams, random_seed_offset) + + primitive_clone.set_params(params=primitive.get_params()) + + return primitive_clone + + def _create_pipeline_primitive(self, primitive_class: typing.Type[base.PrimitiveBase], hyperparams: hyperparams_module.Hyperparams) -> base.PrimitiveBase: + """ + Creates an instance of a pipeline primitive. + + Constructor call is recorded in pipeline run. + """ + + arguments = self._create_primitive_arguments(primitive_class, hyperparams, 0) + + if 'random_seed' in arguments: + self.pipeline_run.set_primitive_step_random_seed(self.current_step, arguments['random_seed']) + + return self._call_primitive_method(primitive_class, arguments) + + def _create_hyperparameter_primitive(self, primitive_class: typing.Type[base.PrimitiveBase], random_seed_offset: int) -> base.PrimitiveBase: + """ + Creates an instance of the non-pipeline primitive with default hyper-parameters. + """ + + hyperparams_class = primitive_class.metadata.get_hyperparams() + + return self._create_primitive(primitive_class, hyperparams_class.defaults(), random_seed_offset) + + def _create_primitive_reference_primitive(self, primitive_reference: int, hyperparameter_name: str) -> base.PrimitiveBase: + """ + Creates an instance of a primitive based on its primitive reference (step index), meaning the instance + of a primitive is almost the same as the pipeline primitive (see "_create_pipeline_primitive") at that + step index, but with a different random seed because of a probably different "current_step". + + Constructor call is not recorded in pipeline run. + """ + + # It could point to a sub-pipeline and not primitive. + if not isinstance(self.pipeline.steps[primitive_reference], pipeline_module.PrimitiveStep): + raise exceptions.InvalidPipelineError( + "Hyper-parameter '{hyperparameter_name}' of step {step_index} of pipeline '{pipeline_id}' does not point to a primitive step (step {primitive_reference}).".format( # noqa + hyperparameter_name=hyperparameter_name, + step_index=self.current_step, + pipeline_id=self.pipeline.id, + primitive_reference=primitive_reference, + ), + ) + + step = typing.cast(pipeline_module.PrimitiveStep, self.pipeline.steps[primitive_reference]) + hyperparams, pipeline_hyperparams = self._prepare_primitive_hyperparams(step) + # We use 0 for "random_seed_offset" because we are creating a primitive instance + # which should be the same as the pipeline primitive (see "_create_pipeline_primitive"). + primitive = self._create_primitive(step.primitive, hyperparams, 0) + primitive.set_params(params=self.steps_state[primitive_reference]) + return primitive + + def _transform_primitive_hyperparameter(self, hyperparameter: hyperparams_module.Hyperparameter, value: typing.Any, index: int) -> typing.Any: + value_is_type = utils.is_type(value) + if value_is_type and issubclass(value, base.PrimitiveBase): + return self._create_hyperparameter_primitive(value, index) + elif not value_is_type and isinstance(value, base.PrimitiveBase): + return self._clone_primitive(value, index) + else: + # Not a primitive instance or a primitive class, do not do anything. + return value + + def _handle_primitive_hyperparams(self, hyperparams: base.Hyperparams, random_seed_offset: int) -> base.Hyperparams: + """ + Handles a special case when the value is a primitive instance or a primitive class. + In this case we have to make sure we create a new instance reusing its hyper-parameters, + or create an instance from the class using default hyper-parameters. + """ + + return hyperparams.transform_value(hyperparams, self._transform_primitive_hyperparameter, random_seed_offset) + + def _run_primitive(self, step: pipeline_module.PrimitiveStep) -> None: + if step.primitive is None: + raise exceptions.InvalidPipelineError("Primitive has not been resolved.") + + self.pipeline_run.add_primitive_step(step) + arguments = self._prepare_primitive_arguments(step) + + hyperparams, pipeline_hyperparams = self._prepare_primitive_hyperparams(step) + + if self.phase == metadata_base.PipelineRunPhase.FIT: + self.pipeline_run.set_primitive_step_hyperparams(self.current_step, hyperparams, pipeline_hyperparams) + + # We create a primitive just before it is being run. This assures that any primitives it depends on through its + # hyper-parameters have already been run (because they are in prior steps). Similarly, any pipeline-based value + # being passed to a hyper-parameter has already been computed. + primitive = self._create_pipeline_primitive(step.primitive, hyperparams) + + # If primitive step has no arguments we do not fit or produce it. It is meant to be used as + # unfitted primitive for another primitive's hyper-parameter. + if not arguments: + return + + if self.phase == metadata_base.PipelineRunPhase.FIT: + assert self.steps_state[self.current_step] is None + else: + primitive.set_params(params=self.steps_state[self.current_step]) + + if self.phase == metadata_base.PipelineRunPhase.FIT: + fit_multi_produce_arguments = self._filter_arguments(step.primitive, 'fit_multi_produce', dict(arguments, produce_methods=step.outputs)) + + # We fit and produce once, without any limits on iterations/time. + multi_call_result = self._call_primitive_method(primitive.fit_multi_produce, fit_multi_produce_arguments) + if not multi_call_result.has_finished: + # Because we have not set any limits on iterations/time, the primitive should finish and not stop early. + # One should be able to control through a hyper-parameter or hyper-parameters stopping criteria for the primitive. + raise exceptions.InvalidReturnValueError( + "\"fit_multi_produce\" call result should have \"has_finished\" set to true because iterations/time limits were set and the primitive should finish and not stop early.", + ) + outputs = multi_call_result.values + + elif self.phase == metadata_base.PipelineRunPhase.PRODUCE: + multi_produce_arguments = self._filter_arguments(step.primitive, 'multi_produce', dict(arguments, produce_methods=step.outputs)) + + # We produce once, without any limits on iterations/time. + multi_call_result = self._call_primitive_method(primitive.multi_produce, multi_produce_arguments) + if not multi_call_result.has_finished: + # Because we have not set any limits on iterations/time, the primitive should finish and not stop early. + # One should be able to control through a hyper-parameter or hyper-parameters stopping criteria for the primitive. + raise exceptions.InvalidReturnValueError( + "\"multi_produce\" call result should have \"has_finished\" set to true because iterations/time limits were set and the primitive should finish and not stop early.", + ) + outputs = multi_call_result.values + + else: + # TODO: Allow dispatch to a general method so that subclasses of this class can handle them if necessary. + raise exceptions.UnexpectedValueError("Unknown phase: {phase}".format(phase=self.phase)) + + if self.phase == metadata_base.PipelineRunPhase.FIT: + assert self.steps_state[self.current_step] is None + self.steps_state[self.current_step] = primitive.get_params() + + for output_id in step.outputs: + output_data_reference = 'steps.{i}.{output_id}'.format(i=step.index, output_id=output_id) + + if output_id in outputs: + self.data_values[output_data_reference] = outputs[output_id] + else: + raise exceptions.InvalidReturnValueError("Missing declared output '{output_id}' in computed primitive's outputs.".format(output_id=output_id)) + + def _call_primitive_method(self, method: typing.Callable, arguments: typing.Dict) -> typing.Any: + """ + Calls a primitive method (or constructor). Records relevant information in pipeline run. + + Parameters + ---------- + method: + Primitive's method or constructor to call. + arguments: + Arguments to pass to the method. + + Returns + ------- + The result of calling the method. It method is a constructor, + returns an instance. + """ + + # A special case for the constructor. + if inspect.isclass(method): + method_name = '__init__' + else: + method_name = method.__name__ + + pipeline_run_method_call_id = self.pipeline_run.add_method_call_to_primitive_step(self.current_step, method_name) + + callback = self.pipeline_run.get_method_call_logging_callback(pipeline_run_method_call_id) + logging_handler = utils.CallbackHandler(callback) + + root = logging.getLogger() + redirect_logger = logging.getLogger('redirect') + + old_level = root.level + old_handler_levels = [handler.level for handler in root.handlers] + old_propagate = redirect_logger.propagate + try: + # We are just about to modify the root logger level, so we change levels + # of all existing handlers to retain same configuration. + for handler in root.handlers: + # If existing handler has level already set to something more restrictive than what the + # root logger has, we do not change that. Otherwise, we set it to the root logger's level. + if handler.level < old_level: + handler.setLevel(old_level) + # Record all logging which happens during the call. + root.setLevel(logging.DEBUG) + root.addHandler(logging_handler) + # We do not want to print logging from "redirect_logger" because pass-through is enabled, so we + # disable propagation from it to the root logger (by default there is a stream handler on the root + # logger which prints all logging) and install our handler directly on the redirect logger. + redirect_logger.propagate = False + redirect_logger.addHandler(logging_handler) + + # TODO: All this redirection works in a single thread, what about multi-threaded or async? + # Reference engine is single threaded, but maybe a subclass would not be? + # We redirect all stdout/stderr to logging, but pass it through to stdout/stderr as well. + with utils.redirect_to_logging(logger=redirect_logger, pass_through=True): + with utils.global_randomness_warning(): + self.pipeline_run.method_call_started(pipeline_run_method_call_id) + + try: + result = method(**arguments) + except Exception as error: + self.pipeline_run.method_call_failed(pipeline_run_method_call_id, traceback.format_exc()) + + raise error + + self.pipeline_run.method_call_successful(pipeline_run_method_call_id) + + finally: + # Restore original logging configuration. + root.removeHandler(logging_handler) + root.setLevel(old_level) + for i, level in enumerate(old_handler_levels): + root.handlers[i].setLevel(level) + # Just to be consistent, if somebody is doing something with the same logger. + redirect_logger.propagate = old_propagate + redirect_logger.removeHandler(logging_handler) + + self.pipeline_run.set_method_call_result_metadata(pipeline_run_method_call_id, result) + + return result + + def _run_step(self, step: pipeline_module.StepBase) -> None: + if isinstance(step, pipeline_module.PlaceholderStep): + self._run_placeholder(step) + elif isinstance(step, pipeline_module.SubpipelineStep): + self._run_subpipeline(step) + elif isinstance(step, pipeline_module.PrimitiveStep): + self._run_primitive(step) + else: + # TODO: Allow dispatch to a general method so that subclasses of this class can handle them if necessary. + raise exceptions.UnexpectedValueError("Unknown step type: {step_type}".format(step_type=type(step))) + + def _do_run_step(self, step: pipeline_module.StepBase) -> None: + self.pipeline_run.step_started(self.current_step) + + try: + self._before_step_run() + self._run_step(step) + self._after_step_run() + except Exception as error: + self.pipeline_run.step_failed(self.current_step, traceback.format_exc()) + + raise exceptions.StepFailedError( + "Step {step_index} for pipeline {pipeline_id} failed.".format( + step_index=self.current_step, pipeline_id=self.pipeline.id, + ), + ) from error + + self.pipeline_run.step_successful(self.current_step) + + def _do_run(self) -> None: + for step_index, step in enumerate(self.pipeline.steps): + self.current_step = step_index + + self._do_run_step(step) + + def _run( + self, inputs: typing.Sequence[typing.Any], phase: metadata_base.PipelineRunPhase, + return_values: typing.Optional[typing.Sequence[str]] + ) -> Result: + self._check_pipeline(inputs) + + self._initialize_run_state(inputs, phase, return_values) + + self.pipeline_run.run_started() + + error: Exception = None + try: + self._do_run() + except Exception as run_error: + self.pipeline_run.run_failed(traceback.format_exc()) + + error = run_error + + if error is None: + self.pipeline_run.run_successful() + + self._populate_output_values() + + if self.is_standard_pipeline: + self.pipeline_run.set_predictions(self.data_values['outputs.0']) + + values = self._get_return_values(error) + + pipeline_run = self.pipeline_run + + self._clear_run_state() + + # TODO: What if some internal exception happens before we set this which leaves runtime in a changed state. + # This means that state has changed, but we have not set previous pipeline run. + # So if another phase is called, it might even by accident succeed, but have invalid + # previous pipeline run set which does not explain the state of the runtime. + # Maybe we should make sure we always set this ID, even when not returning a pipeline + # run so that it can be at least visible that some pipeline run is missing in the sequence. + self._previous_pipeline_run = pipeline_run + + return Result(pipeline_run, values, error) + + def _get_return_values(self, error: typing.Optional[Exception]) -> typing.Dict: + values = {} + for name in self.return_values: + try: + values[name] = self.data_values[name] + except KeyError as value_error: + # We try to return whichever values we can, even in the case of an error. + if error is None: + raise value_error + + return values + + def _before_step_run(self) -> None: + pass + + def _after_step_run(self) -> None: + self._delete_unnecessary_values() + + def _delete_unnecessary_values(self) -> None: + values_needed = set() + + # Which values are explicitly required to be kept until the end? + for value in self.return_values: + values_needed.add(value) + + # Outputs need values from steps. + for i, output_description in enumerate(self.pipeline.outputs): + if 'outputs.{i}'.format(i=i) in self.return_values: + values_needed.add(output_description['data']) + + # Future steps also need values. + for step in self.pipeline.steps[self.current_step + 1:]: + values_needed.update(step.get_input_data_references()) + + # Pipeline run for a standard pipeline needs predictions. + if self.is_standard_pipeline: + values_needed.add(self.pipeline.outputs[0]['data']) + + # Delete any value which is not needed anymore. + # We iterate over a list so that we can change dict while iterating. + for data_reference in list(self.data_values.keys()): + if data_reference not in values_needed: + del self.data_values[data_reference] + + def fit( + self, inputs: typing.Sequence[typing.Any], *, return_values: typing.Sequence[str] = None, + ) -> Result: + """ + Does a "fit" phase of the pipeline. + + Parameters + ---------- + inputs: + A list of inputs to the pipeline. + return_values: + A list of data references of all output values of all steps to return. + If ``None``, the output values of the whole pipeline are returned. + + Returns + ------- + A result object with kept values, pipeline run description, and any exception. + """ + + return self._run(inputs, metadata_base.PipelineRunPhase.FIT, return_values) + + def produce( + self, inputs: typing.Sequence[typing.Any], *, return_values: typing.Sequence[str] = None, + ) -> Result: + """ + Does a "produce" phase of the pipeline and returns outputs. + + Parameters + ---------- + inputs: + A list of inputs to the pipeline. + return_values: + A list of data references of all output values of all steps to return. + If ``None``, the output values of the whole pipeline are returned. + + Returns + ------- + A result object with kept values, pipeline run description, and any exception. + """ + + return self._run(inputs, metadata_base.PipelineRunPhase.PRODUCE, return_values) + + def get_params(self) -> typing.List[typing.Union[typing.Any, typing.List]]: + return self.steps_state + + def set_params(self, params: typing.List[typing.Union[typing.Any, typing.List]]) -> None: + if not isinstance(params, typing.List): + raise exceptions.InvalidArgumentValueError("Parameters not a list.") + + self._clear_run_state() + self.steps_state = params + + def _populate_output_values(self) -> None: + for i, output_description in enumerate(self.pipeline.outputs): + # Outputs might not be available because they were not requested to be returned from the run. + if output_description['data'] in self.data_values: + self.data_values['outputs.{i}'.format(i=i)] = self.data_values[output_description['data']] + + @classmethod + def _normalize_dataset_id(cls, dataset_id: str) -> str: + return DATASET_ID_REGEX.sub('', dataset_id) + + @classmethod + def _dataset_ids_match(cls, first_dataset_id: str, second_dataset_id: str) -> bool: + if first_dataset_id == second_dataset_id: + return True + + if cls._normalize_dataset_id(first_dataset_id) == cls._normalize_dataset_id(second_dataset_id): + return True + + return False + + @classmethod + def _mark_columns(cls, problem_inputs: typing.Sequence[typing.Dict], dataset: container.Dataset) -> typing.Tuple[container.Dataset, typing.Sequence[int]]: + dataset = dataset.copy() + dataset_id = dataset.metadata.query(())['id'] + + marked_problem_indices = [] + for problem_index, problem_input in enumerate(problem_inputs): + if not cls._dataset_ids_match(problem_input['dataset_id'], dataset_id): + continue + + marked_problem_indices.append(problem_index) + + for target in problem_input.get('targets', []): + if target['resource_id'] not in dataset: + raise exceptions.NotFoundError( + "Error marking target column: dataset does not contain resource with resource ID '{resource_id}'.".format( + resource_id=target['resource_id'], + ), + ) + if not isinstance(dataset[target['resource_id']], container.DataFrame): + raise TypeError( + "Error marking target column: resource '{resource_id}' is not a DataFrame.".format( + resource_id=target['resource_id'], + ), + ) + if not 0 <= target['column_index'] < dataset[target['resource_id']].shape[1]: + raise ValueError( + "Error marking target column: resource '{resource_id}' does not have a column with index '{column_index}'.".format( + resource_id=target['resource_id'], + column_index=target['column_index'], + ), + ) + + dataset.metadata = dataset.metadata.add_semantic_type( + (target['resource_id'], metadata_base.ALL_ELEMENTS, target['column_index']), + 'https://metadata.datadrivendiscovery.org/types/Target', + ) + dataset.metadata = dataset.metadata.add_semantic_type( + (target['resource_id'], metadata_base.ALL_ELEMENTS, target['column_index']), + 'https://metadata.datadrivendiscovery.org/types/TrueTarget', + ) + # If column is marked as a target, it cannot be attribute as well. + # This allows one to define in problem description otherwise attribute columns as targets. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/265 + dataset.metadata = dataset.metadata.remove_semantic_type( + (target['resource_id'], metadata_base.ALL_ELEMENTS, target['column_index']), + 'https://metadata.datadrivendiscovery.org/types/Attribute', + ) + + # TODO: Warn if privileged data columns are not set on attributes. + for privileged_data in problem_input.get('privileged_data', []): + if privileged_data['resource_id'] not in dataset: + raise exceptions.NotFoundError( + "Error marking privileged data column: dataset does not contain resource with resource ID '{resource_id}'.".format( + resource_id=privileged_data['resource_id'], + ), + ) + if not isinstance(dataset[privileged_data['resource_id']], container.DataFrame): + raise TypeError( + "Error marking privileged data column: resource '{resource_id}' is not a DataFrame.".format( + resource_id=privileged_data['resource_id'], + ), + ) + if not 0 <= privileged_data['column_index'] < dataset[privileged_data['resource_id']].shape[1]: + raise ValueError( + "Error marking privileged data column: resource '{resource_id}' does not have a column with index '{column_index}'.".format( + resource_id=privileged_data['resource_id'], + column_index=privileged_data['column_index'], + ), + ) + + dataset.metadata = dataset.metadata.add_semantic_type( + (privileged_data['resource_id'], metadata_base.ALL_ELEMENTS, privileged_data['column_index']), + 'https://metadata.datadrivendiscovery.org/types/PrivilegedData', + ) + + return dataset, marked_problem_indices + + +def _prepare_data_and_scoring_hyperparams(free_hyperparams: typing.Sequence, hyperparameter_values: typing.Dict) -> typing.Tuple[typing.Sequence, typing.Set[str]]: + """ + Values in ``hyperparameter_values`` should be serialized as JSON, as obtained by JSON-serializing + the output of hyper-parameter's ``value_to_json_structure`` method call. + """ + + hyperparams: typing.List[typing.Union[typing.Dict, typing.Sequence]] = [] + + hyperparameter_values_used = set() + + for free_hyperparams_for_step in free_hyperparams: + if isinstance(free_hyperparams_for_step, (dict, frozendict.frozendict)): + values = {} + for name, hyperparameter in free_hyperparams_for_step.items(): + if name in hyperparameter_values: + values[name] = hyperparameter.value_from_json_structure(json.loads(hyperparameter_values[name])) + hyperparameter_values_used.add(name) + hyperparams.append(values) + elif utils.is_sequence(free_hyperparams_for_step): + step_hyperparams, step_hyperparameter_values_used = _prepare_data_and_scoring_hyperparams(free_hyperparams_for_step, hyperparameter_values) + hyperparams.append(step_hyperparams) + hyperparameter_values_used.update(step_hyperparameter_values_used) + else: + raise exceptions.UnexpectedValueError("Unknown hyper-parameters type: {hyperparams_type}".format(hyperparams_type=type(free_hyperparams_for_step))) + + return hyperparams, hyperparameter_values_used + + +# TODO: Add debug logging. +def fit( + pipeline: pipeline_module.Pipeline, inputs: typing.Sequence[container.Dataset], *, + problem_description: typing.Optional[problem.Problem], context: metadata_base.Context, + hyperparams: typing.Sequence = None, random_seed: int = 0, volumes_dir: str = None, scratch_dir: str = None, + runtime_environment: pipeline_run_module.RuntimeEnvironment = None, is_standard_pipeline: bool = True, + expose_produced_outputs: bool = False, +) -> typing.Tuple[typing.Optional[Runtime], typing.Optional[container.DataFrame], Result]: + for input in inputs: + if not isinstance(input, container.Dataset): + raise TypeError("A standard pipeline's input should be of a container Dataset type, not {input_type}.".format( + input_type=type(input), + )) + + if is_standard_pipeline and len(pipeline.outputs) != 1: + raise ValueError("A standard pipeline should have exactly one output, not {outputs}.".format( + outputs=len(pipeline.outputs), + )) + + runtime = Runtime( + pipeline, hyperparams, + problem_description=problem_description, context=context, + random_seed=random_seed, volumes_dir=volumes_dir, scratch_dir=scratch_dir, + is_standard_pipeline=is_standard_pipeline, environment=runtime_environment, + ) + + if expose_produced_outputs: + return_values = sorted(pipeline.get_producing_outputs()) + else: + return_values = ['outputs.0'] + + result = runtime.fit(inputs, return_values=return_values) + + if result.has_error(): + return None, None, result + + output = result.values['outputs.0'] + + if not isinstance(output, container.DataFrame): + raise TypeError("A standard pipeline's output should be of a container DataFrame type, not {output_type}.".format( + output_type=type(output), + )) + + return runtime, output, result + + +# TODO: Add debug logging. +def produce( + fitted_pipeline: Runtime, test_inputs: typing.Sequence[container.Dataset], *, + expose_produced_outputs: bool = False, +) -> typing.Tuple[typing.Optional[container.DataFrame], Result]: + for test_input in test_inputs: + if not isinstance(test_input, container.Dataset): + raise TypeError("A standard pipeline's input should be of a container Dataset type, not {input_type}.".format( + input_type=type(test_input), + )) + + # This is checked in "fit" already, but maybe somebody fitter a pipeline not through "fit". + if fitted_pipeline.is_standard_pipeline and len(fitted_pipeline.pipeline.outputs) != 1: + raise ValueError("A standard pipeline should have exactly one output, not {outputs}.".format( + outputs=len(fitted_pipeline.pipeline.outputs), + )) + + if expose_produced_outputs: + return_values = sorted(fitted_pipeline.pipeline.get_producing_outputs()) + else: + return_values = ['outputs.0'] + + result = fitted_pipeline.produce(test_inputs, return_values=return_values) + if result.has_error(): + return None, result + + output = result.values['outputs.0'] + + if not isinstance(output, container.DataFrame): + raise TypeError("A standard pipeline's output should be of a container DataFrame type, not {output_type}.".format( + output_type=type(output), + )) + + return output, result + + +# TODO: Add debug logging. +def score( + predictions: container.DataFrame, score_inputs: typing.Sequence[container.Dataset], *, scoring_pipeline: pipeline_module.Pipeline, + problem_description: typing.Optional[problem.Problem], metrics: typing.Sequence[typing.Dict], predictions_random_seed: int = None, + context: metadata_base.Context, scoring_params: typing.Dict[str, str] = None, random_seed: int = 0, volumes_dir: str = None, + scratch_dir: str = None, runtime_environment: pipeline_run_module.RuntimeEnvironment = None, +) -> typing.Tuple[typing.Optional[container.DataFrame], Result]: + for score_input in score_inputs: + if not isinstance(score_input, container.Dataset): + raise TypeError("A scoring pipeline's input should be of a container Dataset type, not {input_type}.".format( + input_type=type(score_input), + )) + + if len(scoring_pipeline.outputs) != 1: + raise ValueError("A scoring pipeline should have exactly one output, not {outputs}.".format( + outputs=len(scoring_pipeline.outputs), + )) + + metrics_hyperparameter = [] + for metric in metrics: + # Structure should match what "value_from_json_structure" would + # return for "ComputeScoresPrimitive" hyper-parameter. + # TODO: Once "ComputeScoresPrimitive" is moved to core package, use its default hyper-parameters here. + metric_hyperparameter = {'metric': metric['metric'].name, 'k': None, 'pos_label': None} + metric_hyperparameter.update(metric.get('params', {})) + metrics_hyperparameter.append(metric_hyperparameter) + + if scoring_params is None: + scoring_params = {} + + if metrics_hyperparameter: + # We have to JSON-serialize it because "_prepare_data_and_scoring_hyperparams" + # expects all values to be JSON-serialized. + scoring_params['metrics'] = json.dumps(metrics_hyperparameter) + + scoring_hyperparams, scoring_params_used = _prepare_data_and_scoring_hyperparams(scoring_pipeline.get_free_hyperparams(), scoring_params) + + scoring_params_keys_set = set(scoring_params.keys()) + if scoring_params_keys_set - scoring_params_used: + logger.warning("Not all provided hyper-parameters for the scoring pipeline %(pipeline_id)s were used: %(unused_params)s", { + 'pipeline_id': scoring_pipeline.id, + 'unused_params': ', '.join(sorted(scoring_params_keys_set - scoring_params_used)), + }) + + runtime = Runtime( + scoring_pipeline, scoring_hyperparams, + problem_description=problem_description, context=context, + random_seed=random_seed, volumes_dir=volumes_dir, scratch_dir=scratch_dir, + environment=runtime_environment, + ) + + inputs = [predictions] + list(score_inputs) # type: ignore + + # Fit + produce on same data. + result = runtime.fit(inputs, return_values=['outputs.0']) + if result.has_error(): + return None, result + + output = result.values['outputs.0'] + + if not isinstance(output, container.DataFrame): + raise TypeError("A scoring pipeline's output should be of a container DataFrame type, not {output_type}.".format( + output_type=type(output), + )) + + if predictions_random_seed is not None: + output = combine_random_seed(output, predictions_random_seed) + + return output, result + + +# TODO: Add debug logging. +def prepare_data( + inputs: typing.Sequence[container.Dataset], *, data_pipeline: pipeline_module.Pipeline, problem_description: typing.Optional[problem.Problem], + data_params: typing.Dict[str, str], context: metadata_base.Context, random_seed: int = 0, volumes_dir: str = None, + scratch_dir: str = None, runtime_environment: pipeline_run_module.RuntimeEnvironment = None, +) -> typing.Tuple[typing.List, Result]: + """ + Values in ``data_params`` should be serialized as JSON, as obtained by JSON-serializing + the output of hyper-parameter's ``value_to_json_structure`` method call. + """ + + for input in inputs: + if not isinstance(input, container.Dataset): + raise TypeError("A data preparation pipeline's input should be of a container Dataset type, not {input_type}.".format( + input_type=type(input), + )) + + if len(data_pipeline.outputs) != 3: + raise ValueError("A data preparation pipeline should have exactly three outputs, not {outputs}.".format( + outputs=len(data_pipeline.outputs), + )) + + if 'number_of_folds' in data_params: + number_of_folds = int(data_params['number_of_folds']) + else: + # For now we assume other data preparation pipelines do only one fold. We should standardize + # more hyper-parameters to gather how many folds have to be made (and not really folds, but + # more how many input indices have to be passed to the pipeline). + number_of_folds = 1 + + data_hyperparams, data_params_used = _prepare_data_and_scoring_hyperparams(data_pipeline.get_free_hyperparams(), data_params) + + data_params_keys_set = set(data_params.keys()) + if data_params_keys_set - data_params_used: + logger.warning("Not all provided hyper-parameters for the data preparation pipeline {pipeline_id} were used: {unused_params}".format( + pipeline_id=data_pipeline.id, + unused_params=sorted(data_params_keys_set - data_params_used), + )) + + runtime = Runtime( + data_pipeline, data_hyperparams, + problem_description=problem_description, context=context, + random_seed=random_seed, volumes_dir=volumes_dir, + scratch_dir=scratch_dir, environment=runtime_environment, + ) + + # Fit + produce on same data. The inputs are the list of indices of folds + # to generate and a dataset to split. + result = runtime.fit([container.List(range(number_of_folds))] + list(inputs), return_values=['outputs.0', 'outputs.1', 'outputs.2']) # type: ignore + if result.has_error(): + return [], result + + outputs = [result.values['outputs.0'], result.values['outputs.1'], result.values['outputs.2']] + + for output in outputs: + if not isinstance(output, container.List): + raise TypeError("A data preparation pipeline's output should be of a container List type, not {input_type}.".format( + input_type=type(output), + )) + if len(output) != number_of_folds: + raise ValueError("A data preparation pipeline's output should contain {number_of_folds} datasets, not {length}.".format( + number_of_folds=number_of_folds, + length=len(output), + )) + + return outputs, result + + +# TODO: Add debug logging. +def evaluate( + pipeline: pipeline_module.Pipeline, inputs: typing.Sequence[container.Dataset], *, data_pipeline: pipeline_module.Pipeline, + scoring_pipeline: pipeline_module.Pipeline, problem_description: typing.Optional[problem.Problem], + data_params: typing.Dict[str, str], metrics: typing.Sequence[typing.Dict], context: metadata_base.Context, + scoring_params: typing.Dict[str, str] = None, hyperparams: typing.Sequence = None, random_seed: int = 0, + data_random_seed: int = 0, scoring_random_seed: int = 0, volumes_dir: str = None, + scratch_dir: str = None, runtime_environment: pipeline_run_module.RuntimeEnvironment = None, +) -> typing.Tuple[typing.List[container.DataFrame], MultiResult]: + """ + Values in ``data_params`` should be serialized as JSON, as obtained by JSON-serializing + the output of hyper-parameter's ``value_to_json_structure`` method call. + """ + + outputs, data_result = prepare_data( + inputs, data_pipeline=data_pipeline, problem_description=problem_description, data_params=data_params, + context=context, random_seed=data_random_seed, volumes_dir=volumes_dir, + scratch_dir=scratch_dir, runtime_environment=runtime_environment, + ) + if data_result.has_error(): + return [], MultiResult([data_result]) + + fold_group_uuid = uuid.uuid4() + + all_scores: typing.List[container.DataFrame] = [] + all_results = MultiResult() + for fold_index, (train_inputs, test_inputs, score_inputs) in enumerate(zip(*outputs)): + fitted_pipeline, predictions, fit_result = fit( + pipeline, [train_inputs], problem_description=problem_description, context=context, hyperparams=hyperparams, + random_seed=random_seed, volumes_dir=volumes_dir, scratch_dir=scratch_dir, + runtime_environment=runtime_environment, + ) + + # Modifies "fit_result.pipeline_run" in-place. + combine_pipeline_runs( + fit_result.pipeline_run, data_pipeline_run=data_result.pipeline_run, + fold_group_uuid=fold_group_uuid, fold_index=fold_index, + ) + + all_results.append(fit_result) + if fit_result.has_error(): + assert all_results.has_error() + return all_scores, all_results + + predictions, produce_result = produce(fitted_pipeline, [test_inputs]) + + # Modifies "produce_result.pipeline_run" in-place. + combine_pipeline_runs( + produce_result.pipeline_run, data_pipeline_run=data_result.pipeline_run, + fold_group_uuid=fold_group_uuid, fold_index=fold_index + ) + + all_results.append(produce_result) + if produce_result.has_error(): + assert all_results.has_error() + return all_scores, all_results + + scores, score_result = score( + predictions, [score_inputs], scoring_pipeline=scoring_pipeline, problem_description=problem_description, metrics=metrics, + predictions_random_seed=random_seed, scoring_params=scoring_params, context=context, random_seed=scoring_random_seed, + volumes_dir=volumes_dir, scratch_dir=scratch_dir, runtime_environment=runtime_environment, + ) + + # Modifies "produce_result.pipeline_run" in-place. + combine_pipeline_runs( + produce_result.pipeline_run, scoring_pipeline_run=score_result.pipeline_run, + ) + # Sets the error, if there are any. + produce_result.error = score_result.error + + # We modified "produce_result.pipeline_run" in-place and "produce_result" + # is already among "all_results", so we do not add it again. + if score_result.has_error(): + assert all_results.has_error() + return all_scores, all_results + + # Modifies "produce_result.pipeline_run" in-place. + combine_pipeline_runs( + produce_result.pipeline_run, metrics=metrics, scores=scores, + ) + + all_scores.append(scores) + + return all_scores, all_results + + +is_uri = deprecate.function(message="use d3m.utils.is_uri instead")(utils.is_uri) + +get_dataset = deprecate.function(message="use d3m.container.dataset.get_dataset instead")(dataset_module.get_dataset) +get_problem = deprecate.function(message="use d3m.metadata.problem.get_problem instead")(problem.get_problem) +get_pipeline = deprecate.function(message="use d3m.metadata.pipeline.get_pipeline instead")(pipeline_module.get_pipeline) + + +@deprecate.function(message="use d3m.utils.get_datasets_and_problems instead") +def _get_datasets_and_problems( + datasets_dir: str, handle_score_split: bool = True, +) -> typing.Tuple[typing.Dict[str, str], typing.Dict[str, str]]: + return utils.get_datasets_and_problems(datasets_dir, handle_score_split) + + +def _resolve_pipeline_run_datasets( + pipeline_run_datasets: typing.Sequence[typing.Dict[str, str]], *, + dataset_resolver: typing.Callable, compute_digest: dataset_module.ComputeDigest, strict_digest: bool, + strict_resolving: bool, datasets_dir: typing.Optional[str], handle_score_split: bool, +) -> typing.Sequence[container.Dataset]: + resolved_datasets = [] + + for dataset_reference in pipeline_run_datasets: + resolved_dataset = dataset_resolver( + dataset_reference['id'], compute_digest=compute_digest, strict_digest=strict_digest, + datasets_dir=datasets_dir, handle_score_split=handle_score_split, + ) + + resolved_dataset_digest = resolved_dataset.metadata.query(()).get('digest', None) + + if resolved_dataset_digest != dataset_reference['digest']: + if strict_resolving: + raise exceptions.DigestMismatchError( + "Digest for dataset '{dataset_id}' does not match the one specified in the dataset reference. " + "Dataset reference digest: {dataset_digest}. Resolved dataset digest: {resolved_dataset_digest}.".format( + dataset_id=dataset_reference['id'], + dataset_digest=dataset_reference['digest'], + resolved_dataset_digest=resolved_dataset_digest, + ) + ) + else: + logger.warning( + "Digest for dataset '%(dataset_id)s' does not match the one specified in the dataset reference. " + "Dataset reference digest: %(dataset_digest)s. Resolved dataset digest: %(resolved_dataset_digest)s.", + { + 'dataset_id': dataset_reference['id'], + 'dataset_digest': dataset_reference['digest'], + 'resolved_dataset_digest': resolved_dataset_digest, + }, + ) + + resolved_datasets.append(resolved_dataset) + + return resolved_datasets + + +def parse_pipeline_run( + pipeline_run_file: typing.IO[typing.Any], pipeline_search_paths: typing.Sequence[str], datasets_dir: typing.Optional[str], *, + pipeline_resolver: typing.Callable = None, dataset_resolver: typing.Callable = None, + problem_resolver: typing.Callable = None, strict_resolving: bool = False, + compute_digest: dataset_module.ComputeDigest = dataset_module.ComputeDigest.ONLY_IF_MISSING, + strict_digest: bool = False, handle_score_split: bool = True, +) -> typing.Sequence[typing.Dict[str, typing.Any]]: + if pipeline_resolver is None: + pipeline_resolver = pipeline_module.get_pipeline + if dataset_resolver is None: + dataset_resolver = dataset_module.get_dataset + if problem_resolver is None: + problem_resolver = problem.get_problem + + pipeline_runs = list(utils.yaml_load_all(pipeline_run_file)) + + if not pipeline_runs: + raise exceptions.InvalidArgumentValueError("Pipeline run file must contain at least one pipeline run document.") + + for pipeline_run in pipeline_runs: + try: + pipeline_run_module.validate_pipeline_run(pipeline_run) + except jsonschema.exceptions.ValidationError as error: + raise exceptions.InvalidArgumentValueError("Provided pipeline run document is not valid.") from error + + pipeline_run['datasets'] = _resolve_pipeline_run_datasets( + pipeline_run['datasets'], dataset_resolver=dataset_resolver, + compute_digest=compute_digest, strict_digest=strict_digest, + strict_resolving=strict_resolving, datasets_dir=datasets_dir, + handle_score_split=handle_score_split, + ) + + if 'problem' in pipeline_run: + pipeline_run['problem'] = problem_resolver( + pipeline_run['problem']['id'], + strict_digest=strict_digest, + datasets_dir=datasets_dir, + handle_score_split=handle_score_split, + ) + + pipeline_run['pipeline'] = pipeline_resolver( + pipeline_run['pipeline']['id'], + strict_resolving=strict_resolving, + strict_digest=strict_digest, + pipeline_search_paths=pipeline_search_paths, + ) + + if 'data_preparation' in pipeline_run['run']: + pipeline_run['run']['data_preparation']['pipeline'] = pipeline_resolver( + pipeline_run['run']['data_preparation']['pipeline']['id'], + strict_resolving=strict_resolving, + strict_digest=strict_digest, + pipeline_search_paths=pipeline_search_paths, + ) + + if 'scoring' in pipeline_run['run']: + if 'datasets' in pipeline_run['run']['scoring']: + assert 'data_preparation' not in pipeline_run['run'] + pipeline_run['run']['scoring']['datasets'] = _resolve_pipeline_run_datasets( + pipeline_run['run']['scoring']['datasets'], dataset_resolver=dataset_resolver, + compute_digest=compute_digest, strict_digest=strict_digest, strict_resolving=strict_resolving, + datasets_dir=datasets_dir, handle_score_split=handle_score_split, + ) + + if pipeline_run['run']['scoring']['pipeline']['id'] == DEFAULT_SCORING_PIPELINE_ID: + pipeline_run['run']['scoring']['pipeline'] = pipeline_resolver( + DEFAULT_SCORING_PIPELINE_PATH, + strict_resolving=strict_resolving, + strict_digest=strict_digest, + pipeline_search_paths=pipeline_search_paths, + ) + else: + pipeline_run['run']['scoring']['pipeline'] = pipeline_resolver( + pipeline_run['run']['scoring']['pipeline']['id'], + strict_resolving=strict_resolving, + strict_digest=strict_digest, + pipeline_search_paths=pipeline_search_paths, + ) + + return pipeline_runs + + +def _get_runtime_hyperparams_from_pipeline_run(pipeline: pipeline_module.Pipeline, pipeline_run_steps: typing.Sequence[typing.Dict]) -> typing.Sequence[typing.Union[typing.Dict, typing.Sequence]]: + free_hyperparams = pipeline.get_free_hyperparams() + + # We want to allow missing steps for failed pipeline runs. + if len(free_hyperparams) >= len(pipeline_run_steps): + pipeline_run_steps = list(pipeline_run_steps) + for i in range(len(pipeline_run_steps), len(free_hyperparams)): + pipeline_run_steps.append({}) + else: + raise exceptions.InvalidPipelineRunError("Number of steps in the pipeline run does not match the number of steps of the pipeline.") + + hyperparams: typing.List[typing.Union[typing.Dict, typing.Sequence]] = [] + + for free_hyperparams_for_step, pipeline_run_step in zip(free_hyperparams, pipeline_run_steps): + if isinstance(free_hyperparams_for_step, (dict, frozendict.frozendict)): + values = {} + hyperparams_from_step = pipeline_run_step.get('hyperparams', {}) + for name, hyperparameter in free_hyperparams_for_step.items(): + if name in hyperparams_from_step: + if hyperparams_from_step[name]['type'] == metadata_base.ArgumentType.VALUE.name: + values[name] = hyperparameter.value_from_json_structure(hyperparams_from_step[name]['data']) + else: + raise exceptions.UnexpectedValueError("Hyper-parameter '{name}' of type '{type}' cannot be set at runtime.".format(name=name, type=hyperparams_from_step[name]['type'])) + hyperparams.append(values) + + extra_hyperparams_set = set(hyperparams_from_step.keys()) - set(free_hyperparams_for_step.keys()) + if extra_hyperparams_set: + logger.warning("Pipeline run contains values for additional hyper-parameters: %(extra_hyperparams)s", { + 'extra_hyperparams': sorted(extra_hyperparams_set), + }) + + elif utils.is_sequence(free_hyperparams_for_step): + step_hyperparams = _get_runtime_hyperparams_from_pipeline_run(free_hyperparams_for_step, pipeline_run_step.get('steps', [])) + hyperparams.append(step_hyperparams) + else: + raise exceptions.UnexpectedValueError("Unknown hyper-parameters type: {hyperparams_type}".format(hyperparams_type=type(free_hyperparams_for_step))) + + return hyperparams + + +def _get_data_and_scoring_params_from_pipeline_run(pipeline_run_steps: typing.Sequence[typing.Dict]) -> typing.Dict: + params: typing.Dict[str, typing.Any] = {} + + for pipeline_run_step in pipeline_run_steps: + if pipeline_run_step['type'] == metadata_base.PipelineStepType.PRIMITIVE.name: + new_params = {} + + for hyperparameter_name, hyperparameter in pipeline_run_step.get('hyperparams', {}).items(): + if hyperparameter['type'] == metadata_base.ArgumentType.VALUE.name: + # We are comparing JSON serializations, so we need it to be deterministic, so we sort keys. + new_params[hyperparameter_name] = json.dumps(hyperparameter['data'], sort_keys=True) + else: + raise exceptions.UnexpectedValueError("Hyper-parameter '{name}' of type '{type}' cannot be set at runtime.".format(name=hyperparameter_name, type=hyperparameter['type'])) + + elif pipeline_run_step['type'] == metadata_base.PipelineStepType.SUBPIPELINE.name: + new_params = _get_data_and_scoring_params_from_pipeline_run(pipeline_run_step.get('steps', [])) + + else: + raise exceptions.UnexpectedValueError("Unknown step type: {step_type}".format(step_type=pipeline_run_step['type'])) + + for name, value in new_params.items(): + if name in params: + if params[name] != value: + raise exceptions.UnexpectedValueError( + "Hyper-parameter '{name}' does not have the same value across the whole pipeline: {value1} vs {value2}.".format( + name=name, value1=params[name], value2=value, + ), + ) + else: + params[name] = value + + return params + + +def combine_random_seed(scores: container.DataFrame, random_seed: int) -> container.DataFrame: + random_seed_column = container.DataFrame({'randomSeed': [random_seed] * scores.shape[0]}) + # We add the new column at the end so that we do not have to do complicated changes to the metadata. + output_scores = pandas.concat([scores, random_seed_column], axis=1) + # There is one more column now, so we update metadata for it. + output_scores.metadata = scores.metadata.update((metadata_base.ALL_ELEMENTS,), { + 'dimension': { + 'length': output_scores.shape[1], + }, + }) + output_scores.metadata = output_scores.metadata.update_column(output_scores.shape[1] - 1, { + 'name': 'randomSeed', + 'structural_type': int, + }) + + return output_scores + + +def combine_folds(scores_list: typing.List[container.DataFrame]) -> container.DataFrame: + # We combine multiple scores tables into one output table by adding a "fold" column. + for fold, scores in enumerate(scores_list): + fold_column = container.DataFrame({'fold': [fold] * scores.shape[0]}) + # We add the new column at the end so that we do not have to do complicated + # changes to the metadata. + scores_list[fold] = pandas.concat([scores, fold_column], axis=1) + # There is one more column now, so we update metadata for it. + scores_list[fold].metadata = scores.metadata.update((metadata_base.ALL_ELEMENTS,), { + 'dimension': { + 'length': scores_list[fold].shape[1], + }, + }) + scores_list[fold].metadata = scores_list[fold].metadata.update_column(scores_list[fold].shape[1] - 1, { + 'name': 'fold', + 'structural_type': int, + }) + + scores = pandas.concat(scores_list, axis=0).reset_index(drop=True) + # We reuse metadata from the first fold and update the number of rows which is now + # combined across all folds. + scores.metadata = scores_list[0].metadata.update((), { + 'dimension': { + 'length': scores.shape[0], + }, + }) + + return scores + + +def combine_pipeline_runs( + standard_pipeline_run: pipeline_run_module.PipelineRun, *, + data_pipeline_run: pipeline_run_module.PipelineRun = None, scoring_pipeline_run: pipeline_run_module.PipelineRun = None, + score_inputs: typing.Sequence[typing.Any] = None, metrics: typing.Sequence[typing.Dict] = None, scores: container.DataFrame = None, + fold_group_uuid: uuid.UUID = None, fold_index: int = None, +) -> None: + fold_args_provided = (item is None for item in (fold_group_uuid, fold_index)) + if any(fold_args_provided) and not all(fold_args_provided): + raise exceptions.InvalidArgumentValueError("If any of 'fold_group_uuid' and 'fold_index' are provided, they must all be provided.") + + scores_args_provided = (item is None for item in (scores, metrics)) + if any(scores_args_provided) and not all(scores_args_provided): + raise exceptions.InvalidArgumentValueError("If any of 'scores' or 'metrics' is provided, they must both be provided.") + + if data_pipeline_run is not None: + standard_pipeline_run.set_data_preparation_pipeline_run(data_pipeline_run) + + if fold_group_uuid is not None: + standard_pipeline_run.set_fold_group(fold_group_uuid, fold_index) + + if scoring_pipeline_run is not None: + standard_pipeline_run.set_scoring_pipeline_run(scoring_pipeline_run, score_inputs) + + if scores is not None: + standard_pipeline_run.set_scores(scores, metrics) + + +@deprecate.function(message="use extended DataFrame.to_csv method instead") +def export_dataframe(dataframe: container.DataFrame, output_file: typing.IO[typing.Any] = None) -> typing.Optional[str]: + return dataframe.to_csv(output_file) + + +def _check_duplicate_metrics(metrics: typing.Sequence[typing.Dict]) -> None: + """ + In results from scoring we identify each score by its metric name. So to map those rows in scoring + output back to requested metrics, names must be unique. Otherwise we would not know to which + metric configuration the score belongs to. + """ + + only_metrics = [metric['metric'] for metric in metrics] + + if utils.has_duplicates(only_metrics): + raise exceptions.InvalidArgumentValueError("Same metric listed multiple times.") + + +def get_metrics_from_list(metrics: typing.Sequence[str]) -> typing.Sequence[typing.Dict]: + metric_descriptions = [{'metric': problem.PerformanceMetric[metric]} for metric in metrics] + + _check_duplicate_metrics(metric_descriptions) + + return metric_descriptions + + +def get_metrics_from_problem_description(problem_description: typing.Optional[problem.Problem]) -> typing.Sequence[typing.Dict]: + if problem_description is None: + return [] + + metric_descriptions = problem_description['problem'].get('performance_metrics', []) + + _check_duplicate_metrics(metric_descriptions) + + return metric_descriptions + + +def _output_pipeline_runs(arguments: argparse.Namespace, pipeline_runs: typing.Sequence[pipeline_run_module.PipelineRun]) -> None: + if not getattr(arguments, 'output_run', None): + return + + first = True + for pipeline_run in pipeline_runs: + pipeline_run.to_yaml(arguments.output_run, appending=not first) + first = False + + # Make sure the handle is flushed so that no data is lost. CLI file handles are generally + # used outside of a context manager which would otherwise handle that. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/436 + arguments.output_run.flush() + + +def fit_handler( + arguments: argparse.Namespace, *, pipeline_resolver: typing.Callable = None, + pipeline_run_parser: typing.Callable = None, dataset_resolver: typing.Callable = None, + problem_resolver: typing.Callable = None, +) -> None: + if pipeline_resolver is None: + pipeline_resolver = pipeline_module.get_pipeline + if pipeline_run_parser is None: + pipeline_run_parser = parse_pipeline_run + if dataset_resolver is None: + dataset_resolver = dataset_module.get_dataset + if problem_resolver is None: + problem_resolver = problem.get_problem + + context = metadata_base.Context[arguments.context] + compute_digest = dataset_module.ComputeDigest[getattr(arguments, 'compute_digest', dataset_module.ComputeDigest.ONLY_IF_MISSING.name)] + runtime_environment = pipeline_run_module.RuntimeEnvironment( + worker_id=getattr(arguments, 'worker_id', None), + ) + + if getattr(arguments, 'input_run', None) is not None: + parsed_pipeline_runs = pipeline_run_parser( + arguments.input_run, getattr(arguments, 'pipeline_search_paths', []), getattr(arguments, 'datasets_dir', None), + pipeline_resolver=pipeline_resolver, dataset_resolver=dataset_resolver, problem_resolver=problem_resolver, + strict_resolving=getattr(arguments, 'strict_resolving', False), + compute_digest=compute_digest, strict_digest=getattr(arguments, 'strict_digest', False), + ) + + if len(parsed_pipeline_runs) != 1: + raise exceptions.InvalidArgumentValueError( + "Fit requires exactly one pipeline run. {pipeline_runs} provided.".format(pipeline_runs=len(parsed_pipeline_runs)) + ) + if parsed_pipeline_runs[0]['run']['phase'] != metadata_base.PipelineRunPhase.FIT.name: + raise exceptions.InvalidArgumentValueError( + "Fit requires a FIT phase pipeline run. {phase} phase provided.".format(phase=parsed_pipeline_runs[0]['run']['phase']) + ) + fit_pipeline_run = parsed_pipeline_runs[0] + + pipeline = fit_pipeline_run['pipeline'] + problem_description = fit_pipeline_run.get('problem', None) + inputs = fit_pipeline_run['datasets'] + # Currently, "random_seed" is not yet required. + random_seed = fit_pipeline_run.get('random_seed', 0) + hyperparams = _get_runtime_hyperparams_from_pipeline_run(fit_pipeline_run['pipeline'], fit_pipeline_run.get('steps', [])) + # Currently, "is_standard_pipeline" is not yet required. + is_standard_pipeline = fit_pipeline_run['run'].get('is_standard_pipeline', True) + + else: + pipeline = pipeline_resolver( + arguments.pipeline, + strict_resolving=getattr(arguments, 'strict_resolving', False), + strict_digest=getattr(arguments, 'strict_digest', False), + pipeline_search_paths=getattr(arguments, 'pipeline_search_paths', []), + ) + + if getattr(arguments, 'problem', None) is not None: + problem_description = problem_resolver(arguments.problem, strict_digest=getattr(arguments, 'strict_digest', False)) + else: + problem_description = None + + inputs = [ + dataset_resolver( + input_uri, compute_digest=compute_digest, strict_digest=getattr(arguments, 'strict_digest', False), + ) + for input_uri in getattr(arguments, 'inputs', []) + ] + + random_seed = getattr(arguments, 'random_seed', 0) + # We use default hyper-parameter values for now. + hyperparams = None + is_standard_pipeline = getattr(arguments, 'standard_pipeline', True) + + expose_produced_outputs = getattr(arguments, 'expose_produced_outputs_dir', None) is not None + + fitted_pipeline, predictions, result = fit( + pipeline, inputs, + problem_description=problem_description, + context=context, + hyperparams=hyperparams, + random_seed=random_seed, + volumes_dir=getattr(arguments, 'volumes_dir', None), + scratch_dir=getattr(arguments, 'scratch_dir', None), + runtime_environment=runtime_environment, + is_standard_pipeline=is_standard_pipeline, + expose_produced_outputs=expose_produced_outputs, + ) + + if expose_produced_outputs: + save_steps_outputs(result, arguments.expose_produced_outputs_dir) + + _output_pipeline_runs(arguments, [result.pipeline_run]) + + result.check_success() + + if getattr(arguments, 'save', None) is not None: + pickle.dump(fitted_pipeline, arguments.save) + # Make sure the handle is flushed so that no data is lost. CLI file handles are generally + # used outside of a context manager which would otherwise handle that. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/436 + arguments.save.flush() + + if getattr(arguments, 'output', None) is not None: + assert is_standard_pipeline + predictions.to_csv(arguments.output) + + +# We have "pipeline_resolver" and "problem_resolver" as arguments (even if we are not +# using them in this function) so that the signature is the same for all handlers. +def produce_handler( + arguments: argparse.Namespace, *, pipeline_resolver: typing.Callable = None, + pipeline_run_parser: typing.Callable = None, dataset_resolver: typing.Callable = None, + problem_resolver: typing.Callable = None, +) -> None: + if pipeline_run_parser is None: + pipeline_run_parser = parse_pipeline_run + if dataset_resolver is None: + dataset_resolver = dataset_module.get_dataset + + compute_digest = dataset_module.ComputeDigest[getattr(arguments, 'compute_digest', dataset_module.ComputeDigest.ONLY_IF_MISSING.name)] + + fitted_pipeline = pickle.load(arguments.fitted_pipeline) + + if not fitted_pipeline.is_standard_pipeline and getattr(arguments, 'output', None) is not None: + raise exceptions.InvalidArgumentValueError("You cannot save predictions for a non-standard pipeline.") + + if getattr(arguments, 'input_run', None) is not None: + parsed_pipeline_runs = pipeline_run_parser( + arguments.input_run, getattr(arguments, 'pipeline_search_paths', []), getattr(arguments, 'datasets_dir', None), + pipeline_resolver=pipeline_resolver, dataset_resolver=dataset_resolver, problem_resolver=problem_resolver, + strict_resolving=getattr(arguments, 'strict_resolving', False), + compute_digest=compute_digest, strict_digest=getattr(arguments, 'strict_digest', False), + ) + + if len(parsed_pipeline_runs) != 1: + raise exceptions.InvalidArgumentValueError( + "Produce requires exactly one pipeline run. {pipeline_runs} provided.".format(pipeline_runs=len(parsed_pipeline_runs)) + ) + if parsed_pipeline_runs[0]['run']['phase'] != metadata_base.PipelineRunPhase.PRODUCE.name: + raise exceptions.InvalidArgumentValueError( + "Produce requires a PRODUCE phase pipeline run. {phase} phase provided.".format(phase=parsed_pipeline_runs[0]['run']['phase']) + ) + produce_pipeline_run = parsed_pipeline_runs[0] + + # TODO: Check that pipeline (and hyperparams, is_standard_pipeline flag) and problem match those in the fitted_pipeline. + + test_inputs = produce_pipeline_run['datasets'] + + else: + test_inputs = [ + dataset_resolver( + input_uri, compute_digest=compute_digest, strict_digest=getattr(arguments, 'strict_digest', False), + ) + for input_uri in getattr(arguments, 'test_inputs', []) + ] + + expose_produced_outputs = getattr(arguments, 'expose_produced_outputs_dir', None) is not None + + predictions, result = produce(fitted_pipeline, test_inputs, expose_produced_outputs=expose_produced_outputs) + + if expose_produced_outputs: + save_steps_outputs(result, arguments.expose_produced_outputs_dir) + + _output_pipeline_runs(arguments, [result.pipeline_run]) + + result.check_success() + + if getattr(arguments, 'output', None) is not None: + assert fitted_pipeline.is_standard_pipeline + predictions.to_csv(arguments.output) + + +# We have "problem_resolver" as an arguments (even if we are not +# using it in this function) so that the signature is the same for all handlers. +def score_handler( + arguments: argparse.Namespace, *, pipeline_resolver: typing.Callable = None, + pipeline_run_parser: typing.Callable = None, dataset_resolver: typing.Callable = None, + problem_resolver: typing.Callable = None, +) -> None: + if pipeline_resolver is None: + pipeline_resolver = pipeline_module.get_pipeline + if pipeline_run_parser is None: + pipeline_run_parser = parse_pipeline_run + if dataset_resolver is None: + dataset_resolver = dataset_module.get_dataset + + context = metadata_base.Context[arguments.context] + compute_digest = dataset_module.ComputeDigest[getattr(arguments, 'compute_digest', dataset_module.ComputeDigest.ONLY_IF_MISSING.name)] + runtime_environment = pipeline_run_module.RuntimeEnvironment( + worker_id=getattr(arguments, 'worker_id', None), + ) + + fitted_pipeline = pickle.load(arguments.fitted_pipeline) + + if not fitted_pipeline.is_standard_pipeline: + raise exceptions.InvalidArgumentValueError("You cannot score a non-standard pipeline.") + + if getattr(arguments, 'input_run', None) is not None: + parsed_pipeline_runs = pipeline_run_parser( + arguments.input_run, getattr(arguments, 'pipeline_search_paths', []), getattr(arguments, 'datasets_dir', None), + pipeline_resolver=pipeline_resolver, dataset_resolver=dataset_resolver, problem_resolver=problem_resolver, + strict_resolving=getattr(arguments, 'strict_resolving', False), + compute_digest=compute_digest, strict_digest=getattr(arguments, 'strict_digest', False), + ) + + if len(parsed_pipeline_runs) != 1: + raise exceptions.InvalidArgumentValueError( + "Score requires exactly one pipeline run. {pipeline_runs} provided.".format(pipeline_runs=len(parsed_pipeline_runs)) + ) + if parsed_pipeline_runs[0]['run']['phase'] != metadata_base.PipelineRunPhase.PRODUCE.name: + raise exceptions.InvalidArgumentValueError( + "Score requires a PRODUCE phase pipeline run. {phase} phase provided.".format(phase=parsed_pipeline_runs[0]['run']['phase']) + ) + produce_pipeline_run = parsed_pipeline_runs[0] + + if 'scoring' not in produce_pipeline_run['run']: + raise exceptions.InvalidArgumentValueError("Score requires a pipeline run with scoring.") + if 'datasets' not in produce_pipeline_run['run']['scoring']: + raise exceptions.InvalidArgumentValueError("Score requires scoring datasets to be referenced in the PRODUCE phase pipeline run.") + + # TODO: Check that pipeline (and hyperparams, is_standard_pipeline flag) and problem match those in the fitted_pipeline. + + scoring_pipeline = produce_pipeline_run['run']['scoring']['pipeline'] + test_inputs = produce_pipeline_run['datasets'] + score_inputs = produce_pipeline_run['run']['scoring']['datasets'] + # Currently, "random_seed" is not yet required. + random_seed = produce_pipeline_run['run']['scoring'].get('random_seed', 0) + # We do not have to set metrics, because they should already be included in hyper-paramters. + metrics: typing.Sequence[typing.Dict] = [] + scoring_params = _get_data_and_scoring_params_from_pipeline_run(produce_pipeline_run['run']['scoring'].get('steps', [])) + + else: + scoring_pipeline = pipeline_resolver( + arguments.scoring_pipeline, + strict_resolving=getattr(arguments, 'strict_resolving', False), + strict_digest=getattr(arguments, 'strict_digest', False), + pipeline_search_paths=getattr(arguments, 'pipeline_search_paths', []), + ) + + test_inputs = [ + dataset_resolver( + input_uri, compute_digest=compute_digest, strict_digest=getattr(arguments, 'strict_digest', False), + ) + for input_uri in getattr(arguments, 'test_inputs', []) + ] + score_inputs = [ + dataset_resolver( + score_input_uri, compute_digest=compute_digest, strict_digest=getattr(arguments, 'strict_digest', False), + ) + for score_input_uri in getattr(arguments, 'score_inputs', []) + ] + + random_seed = getattr(arguments, 'random_seed', 0) + + if getattr(arguments, 'metrics', None) is not None: + metrics = get_metrics_from_list(arguments.metrics) + else: + metrics = get_metrics_from_problem_description(fitted_pipeline.problem_description) + + if getattr(arguments, 'scoring_params', None) is not None: + scoring_params = {name: value for name, value in arguments.scoring_params} + else: + scoring_params = {} + + expose_produced_outputs = getattr(arguments, 'expose_produced_outputs_dir', None) is not None + + predictions, produce_result = produce(fitted_pipeline, test_inputs, expose_produced_outputs=expose_produced_outputs) + + if expose_produced_outputs: + save_steps_outputs(produce_result, arguments.expose_produced_outputs_dir) + + if produce_result.has_error(): + _output_pipeline_runs(arguments, [produce_result.pipeline_run]) + + produce_result.check_success() + + assert False + + if getattr(arguments, 'output', None) is not None: + predictions.to_csv(arguments.output) + + scores, score_result = score( + predictions, + score_inputs, + scoring_pipeline=scoring_pipeline, + problem_description=fitted_pipeline.problem_description, + metrics=metrics, + predictions_random_seed=fitted_pipeline.random_seed, + scoring_params=scoring_params, + context=context, + random_seed=random_seed, + volumes_dir=getattr(arguments, 'volumes_dir', None), + scratch_dir=getattr(arguments, 'scratch_dir', None), + runtime_environment=runtime_environment, + ) + + # Modifies "produce_result.pipeline_run" in-place. + combine_pipeline_runs( + produce_result.pipeline_run, scoring_pipeline_run=score_result.pipeline_run, score_inputs=score_inputs, + ) + + if score_result.has_error(): + _output_pipeline_runs(arguments, [produce_result.pipeline_run]) + + score_result.check_success() + + assert False + + # Modifies "produce_pipeline_run" in-place. + combine_pipeline_runs( + produce_result.pipeline_run, metrics=metrics, scores=scores, + ) + + _output_pipeline_runs(arguments, [produce_result.pipeline_run]) + + if getattr(arguments, 'scores', None) is not None: + scores.to_csv(arguments.scores) + + +def fit_produce_handler( + arguments: argparse.Namespace, *, pipeline_resolver: typing.Callable = None, + pipeline_run_parser: typing.Callable = None, dataset_resolver: typing.Callable = None, + problem_resolver: typing.Callable = None, +) -> None: + if pipeline_resolver is None: + pipeline_resolver = pipeline_module.get_pipeline + if pipeline_run_parser is None: + pipeline_run_parser = parse_pipeline_run + if dataset_resolver is None: + dataset_resolver = dataset_module.get_dataset + if problem_resolver is None: + problem_resolver = problem.get_problem + + context = metadata_base.Context[arguments.context] + compute_digest = dataset_module.ComputeDigest[getattr(arguments, 'compute_digest', dataset_module.ComputeDigest.ONLY_IF_MISSING.name)] + runtime_environment = pipeline_run_module.RuntimeEnvironment( + worker_id=getattr(arguments, 'worker_id', None), + ) + + if getattr(arguments, 'input_run', None) is not None: + parsed_pipeline_runs = pipeline_run_parser( + arguments.input_run, getattr(arguments, 'pipeline_search_paths', []), getattr(arguments, 'datasets_dir', None), + pipeline_resolver=pipeline_resolver, dataset_resolver=dataset_resolver, problem_resolver=problem_resolver, + strict_resolving=getattr(arguments, 'strict_resolving', False), + compute_digest=compute_digest, strict_digest=getattr(arguments, 'strict_digest', False), + ) + + if len(parsed_pipeline_runs) != 2: + raise exceptions.InvalidArgumentValueError( + "Fit-produce requires exactly two pipeline runs. {pipeline_runs} provided.".format(pipeline_runs=len(parsed_pipeline_runs)) + ) + # TODO: We might not want to require that the order in the file is strict. + # We could just require that pipeline runs belong together (using previous_pipeline_run) + # and are of FIT and PRODUCE phase and then run them in the correct order. + pipeline_run_0_phase = parsed_pipeline_runs[0]['run']['phase'] + if pipeline_run_0_phase != metadata_base.PipelineRunPhase.FIT.name: + raise exceptions.InvalidArgumentValueError( + "Fit-produce requires the first pipeline run to be a FIT phase. {phase} phase provided.".format(phase=pipeline_run_0_phase) + ) + pipeline_run_1_phase = parsed_pipeline_runs[1]['run']['phase'] + if pipeline_run_1_phase != metadata_base.PipelineRunPhase.PRODUCE.name: + raise exceptions.InvalidArgumentValueError( + "Fit-produce requires the second pipeline run to be a PRODUCE phase. {phase} phase provided.".format(phase=pipeline_run_1_phase) + ) + fit_pipeline_run = parsed_pipeline_runs[0] + produce_pipeline_run = parsed_pipeline_runs[1] + + if produce_pipeline_run['previous_pipeline_run']['id'] != fit_pipeline_run['id']: + raise exceptions.InvalidArgumentValueError("Fit-produce requires that the PRODUCE phase pipeline run must reference FIT phase pipeline run in \"previous_pipeline_run\".") + if fit_pipeline_run['pipeline'].id != produce_pipeline_run['pipeline'].id or fit_pipeline_run['pipeline'].get_digest() != produce_pipeline_run['pipeline'].get_digest(): + raise exceptions.InvalidArgumentValueError("Fit-produce requires that both the FIT phase and PRODUCE phase pipeline runs reference the same pipeline.") + if fit_pipeline_run['problem']['id'] != produce_pipeline_run['problem']['id'] or fit_pipeline_run['problem'].get_digest() != produce_pipeline_run['problem'].get_digest(): + raise exceptions.InvalidArgumentValueError("Fit-produce requires that both the FIT phase and PRODUCE phase pipeline runs reference the same problem description.") + + # TODO: Check that hyperparams match between both pipeline runs (but allow failed runs). + # TODO: Check that inputs match between both pipeline runs. + + pipeline = fit_pipeline_run['pipeline'] + problem_description = fit_pipeline_run.get('problem', None) + inputs = fit_pipeline_run['datasets'] + test_inputs = produce_pipeline_run['datasets'] + # Currently, "random_seed" is not yet required. + random_seed = fit_pipeline_run.get('random_seed', 0) + hyperparams = _get_runtime_hyperparams_from_pipeline_run(fit_pipeline_run['pipeline'], fit_pipeline_run.get('steps', [])) + # Currently, "is_standard_pipeline" is not yet required. + is_standard_pipeline = fit_pipeline_run['run'].get('is_standard_pipeline', True) + + else: + pipeline = pipeline_resolver( + arguments.pipeline, + strict_resolving=getattr(arguments, 'strict_resolving', False), + strict_digest=getattr(arguments, 'strict_digest', False), + pipeline_search_paths=getattr(arguments, 'pipeline_search_paths', []), + ) + + if getattr(arguments, 'problem', None) is not None: + problem_description = problem_resolver(arguments.problem, strict_digest=getattr(arguments, 'strict_digest', False)) + else: + problem_description = None + + inputs = [ + dataset_resolver( + input_uri, compute_digest=compute_digest, strict_digest=getattr(arguments, 'strict_digest', False), + ) + for input_uri in getattr(arguments, 'inputs', []) + ] + test_inputs = [ + dataset_resolver( + input_uri, compute_digest=compute_digest, strict_digest=getattr(arguments, 'strict_digest', False), + ) + for input_uri in getattr(arguments, 'test_inputs', []) + ] + + random_seed = getattr(arguments, 'random_seed', 0) + # We use default hyper-parameter values for now. + hyperparams = None + is_standard_pipeline = getattr(arguments, 'standard_pipeline', True) + + fitted_pipeline, predictions, fit_result = fit( + pipeline, inputs, + problem_description=problem_description, + context=context, + hyperparams=hyperparams, + random_seed=random_seed, + volumes_dir=getattr(arguments, 'volumes_dir', None), + scratch_dir=getattr(arguments, 'scratch_dir', None), + runtime_environment=runtime_environment, + is_standard_pipeline=is_standard_pipeline, + ) + + if fit_result.has_error(): + _output_pipeline_runs(arguments, [fit_result.pipeline_run]) + + fit_result.check_success() + + assert False + + if getattr(arguments, 'save', None) is not None: + pickle.dump(fitted_pipeline, arguments.save) + # Make sure the handle is flushed so that no data is lost. CLI file handles are generally + # used outside of a context manager which would otherwise handle that. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/436 + arguments.save.flush() + + expose_produced_outputs = getattr(arguments, 'expose_produced_outputs_dir', None) is not None + + predictions, produce_result = produce(fitted_pipeline, test_inputs, expose_produced_outputs=expose_produced_outputs) + + if expose_produced_outputs: + save_steps_outputs(produce_result, arguments.expose_produced_outputs_dir) + + _output_pipeline_runs(arguments, [fit_result.pipeline_run, produce_result.pipeline_run]) + + produce_result.check_success() + + if getattr(arguments, 'output', None) is not None: + assert is_standard_pipeline + predictions.to_csv(arguments.output) + + +def fit_score_handler( + arguments: argparse.Namespace, *, pipeline_resolver: typing.Callable = None, + pipeline_run_parser: typing.Callable = None, dataset_resolver: typing.Callable = None, + problem_resolver: typing.Callable = None, +) -> None: + if pipeline_resolver is None: + pipeline_resolver = pipeline_module.get_pipeline + if pipeline_run_parser is None: + pipeline_run_parser = parse_pipeline_run + if dataset_resolver is None: + dataset_resolver = dataset_module.get_dataset + if problem_resolver is None: + problem_resolver = problem.get_problem + + context = metadata_base.Context[arguments.context] + compute_digest = dataset_module.ComputeDigest[getattr(arguments, 'compute_digest', dataset_module.ComputeDigest.ONLY_IF_MISSING.name)] + runtime_environment = pipeline_run_module.RuntimeEnvironment( + worker_id=getattr(arguments, 'worker_id', None), + ) + + if getattr(arguments, 'input_run', None) is not None: + parsed_pipeline_runs = pipeline_run_parser( + arguments.input_run, getattr(arguments, 'pipeline_search_paths', []), getattr(arguments, 'datasets_dir', None), + pipeline_resolver=pipeline_resolver, dataset_resolver=dataset_resolver, problem_resolver=problem_resolver, + strict_resolving=getattr(arguments, 'strict_resolving', False), + compute_digest=compute_digest, strict_digest=getattr(arguments, 'strict_digest', False), + ) + + if len(parsed_pipeline_runs) != 2: + raise exceptions.InvalidArgumentValueError( + "Fit-score requires exactly two pipeline runs. {pipeline_runs} provided.".format(pipeline_runs=len(parsed_pipeline_runs)) + ) + # TODO: We might not want to require that the order in the file is strict. + # We could just require that pipeline runs belong together (using previous_pipeline_run) + # and are of FIT and PRODUCE phase and then run them in the correct order. + pipeline_run_0_phase = parsed_pipeline_runs[0]['run']['phase'] + if pipeline_run_0_phase != metadata_base.PipelineRunPhase.FIT.name: + raise exceptions.InvalidArgumentValueError( + "Fit-score requires the first pipeline run to be a FIT phase. {phase} phase provided.".format(phase=pipeline_run_0_phase) + ) + pipeline_run_1_phase = parsed_pipeline_runs[1]['run']['phase'] + if pipeline_run_1_phase != metadata_base.PipelineRunPhase.PRODUCE.name: + raise exceptions.InvalidArgumentValueError( + "Fit-score requires the second pipeline run to be a PRODUCE phase. {phase} phase provided.".format(phase=pipeline_run_1_phase) + ) + fit_pipeline_run = parsed_pipeline_runs[0] + produce_pipeline_run = parsed_pipeline_runs[1] + + if produce_pipeline_run['previous_pipeline_run']['id'] != fit_pipeline_run['id']: + raise exceptions.InvalidArgumentValueError("Fit-produce requires that the PRODUCE phase pipeline run must reference FIT phase pipeline run in \"previous_pipeline_run\".") + if fit_pipeline_run['pipeline'].id != produce_pipeline_run['pipeline'].id or fit_pipeline_run['pipeline'].get_digest() != produce_pipeline_run['pipeline'].get_digest(): + raise exceptions.InvalidArgumentValueError("Fit-produce requires that both the FIT phase and PRODUCE phase pipeline runs reference the same pipeline.") + if fit_pipeline_run['problem']['id'] != produce_pipeline_run['problem']['id'] or fit_pipeline_run['problem'].get_digest() != produce_pipeline_run['problem'].get_digest(): + raise exceptions.InvalidArgumentValueError("Fit-produce requires that both the FIT phase and PRODUCE phase pipeline runs reference the same problem description.") + if 'scoring' not in produce_pipeline_run['run']: + raise exceptions.InvalidArgumentValueError("Fit-score requires the PRODUCE phase pipeline run to be a pipeline run with scoring.") + if 'datasets' not in produce_pipeline_run['run']['scoring']: + raise exceptions.InvalidArgumentValueError("Fit-score requires scoring datasets to be referenced in the PRODUCE phase pipeline run.") + + # TODO: Check that hyperparams match between both pipeline runs (but allow failed runs). + # TODO: Check that inputs match between both pipeline runs. + # TODO: Check that scoring pipelines match between both pipeline runs. + + pipeline = fit_pipeline_run['pipeline'] + scoring_pipeline = produce_pipeline_run['run']['scoring']['pipeline'] + problem_description = fit_pipeline_run.get('problem', None) + inputs = fit_pipeline_run['datasets'] + test_inputs = produce_pipeline_run['datasets'] + score_inputs = produce_pipeline_run['run']['scoring']['datasets'] + # Currently, "random_seed" is not yet required. + random_seed = fit_pipeline_run.get('random_seed', 0) + hyperparams = _get_runtime_hyperparams_from_pipeline_run(fit_pipeline_run['pipeline'], fit_pipeline_run.get('steps', [])) + # Currently, "random_seed" is not yet required. + scoring_random_seed = produce_pipeline_run['run']['scoring'].get('random_seed', 0) + # We do not have to set metrics, because they should already be included in hyper-paramters. + metrics: typing.Sequence[typing.Dict] = [] + scoring_params = _get_data_and_scoring_params_from_pipeline_run(produce_pipeline_run['run']['scoring'].get('steps', [])) + + else: + pipeline = pipeline_resolver( + arguments.pipeline, + strict_resolving=getattr(arguments, 'strict_resolving', False), + strict_digest=getattr(arguments, 'strict_digest', False), + pipeline_search_paths=getattr(arguments, 'pipeline_search_paths', []), + ) + scoring_pipeline = pipeline_resolver( + arguments.scoring_pipeline, + strict_resolving=getattr(arguments, 'strict_resolving', False), + strict_digest=getattr(arguments, 'strict_digest', False), + pipeline_search_paths=getattr(arguments, 'pipeline_search_paths', []), + ) + + if getattr(arguments, 'problem', None) is not None: + problem_description = problem_resolver(arguments.problem, strict_digest=getattr(arguments, 'strict_digest', False)) + else: + problem_description = None + + inputs = [ + dataset_resolver( + input_uri, compute_digest=compute_digest, strict_digest=getattr(arguments, 'strict_digest', False), + ) + for input_uri in getattr(arguments, 'inputs', []) + ] + test_inputs = [ + dataset_resolver( + input_uri, compute_digest=compute_digest, strict_digest=getattr(arguments, 'strict_digest', False), + ) + for input_uri in getattr(arguments, 'test_inputs', []) + ] + score_inputs = [ + dataset_resolver( + score_input_uri, compute_digest=compute_digest, strict_digest=getattr(arguments, 'strict_digest', False), + ) + for score_input_uri in getattr(arguments, 'score_inputs', []) + ] + + random_seed = getattr(arguments, 'random_seed', 0) + hyperparams = None + scoring_random_seed = getattr(arguments, 'scoring_random_seed', 0) + + if getattr(arguments, 'metrics', None) is not None: + metrics = get_metrics_from_list(arguments.metrics) + else: + metrics = get_metrics_from_problem_description(problem_description) + + if getattr(arguments, 'scoring_params', None) is not None: + scoring_params = {name: value for name, value in arguments.scoring_params} + else: + scoring_params = {} + + fitted_pipeline, predictions, fit_result = fit( + pipeline, inputs, + problem_description=problem_description, + context=context, + hyperparams=hyperparams, + random_seed=random_seed, + volumes_dir=getattr(arguments, 'volumes_dir', None), + scratch_dir=getattr(arguments, 'scratch_dir', None), + runtime_environment=runtime_environment, + ) + + if fit_result.has_error(): + _output_pipeline_runs(arguments, [fit_result.pipeline_run]) + + fit_result.check_success() + + assert False + + if getattr(arguments, 'save', None) is not None: + pickle.dump(fitted_pipeline, arguments.save) + # Make sure the handle is flushed so that no data is lost. CLI file handles are generally + # used outside of a context manager which would otherwise handle that. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/436 + arguments.save.flush() + + expose_produced_outputs = getattr(arguments, 'expose_produced_outputs_dir', None) is not None + + predictions, produce_result = produce(fitted_pipeline, test_inputs, expose_produced_outputs=expose_produced_outputs) + + if expose_produced_outputs: + save_steps_outputs(produce_result, arguments.expose_produced_outputs_dir) + + if produce_result.has_error(): + _output_pipeline_runs(arguments, [fit_result.pipeline_run, produce_result.pipeline_run]) + + produce_result.check_success() + + assert False + + if getattr(arguments, 'output', None) is not None: + predictions.to_csv(arguments.output) + + scores, score_result = score( + predictions, score_inputs, + scoring_pipeline=scoring_pipeline, + problem_description=problem_description, + metrics=metrics, + predictions_random_seed=fitted_pipeline.random_seed, + scoring_params=scoring_params, context=context, + random_seed=scoring_random_seed, + volumes_dir=getattr(arguments, 'volumes_dir', None), + scratch_dir=getattr(arguments, 'scratch_dir', None), + runtime_environment=runtime_environment, + ) + + # Modifies "produce_result.pipeline_run" in-place. + combine_pipeline_runs( + produce_result.pipeline_run, scoring_pipeline_run=score_result.pipeline_run, score_inputs=score_inputs, + ) + + if score_result.has_error(): + _output_pipeline_runs(arguments, [fit_result.pipeline_run, produce_result.pipeline_run]) + + score_result.check_success() + + assert False + + # Modifies "produce_result.pipeline_run" in-place. + combine_pipeline_runs( + produce_result.pipeline_run, metrics=metrics, scores=scores, + ) + + _output_pipeline_runs(arguments, [fit_result.pipeline_run, produce_result.pipeline_run]) + + if getattr(arguments, 'scores', None) is not None: + scores.to_csv(arguments.scores) + + +# We have "pipeline_run_parser" as an arguments (even if we are not +# using it in this function) so that the signature is the same for all handlers. +def score_predictions_handler( + arguments: argparse.Namespace, *, pipeline_resolver: typing.Callable = None, + pipeline_run_parser: typing.Callable = None, dataset_resolver: typing.Callable = None, + problem_resolver: typing.Callable = None, +) -> None: + if pipeline_resolver is None: + pipeline_resolver = pipeline_module.get_pipeline + if dataset_resolver is None: + dataset_resolver = dataset_module.get_dataset + if problem_resolver is None: + problem_resolver = problem.get_problem + + context = metadata_base.Context[arguments.context] + compute_digest = dataset_module.ComputeDigest[getattr(arguments, 'compute_digest', dataset_module.ComputeDigest.ONLY_IF_MISSING.name)] + runtime_environment = pipeline_run_module.RuntimeEnvironment( + worker_id=getattr(arguments, 'worker_id', None), + ) + + scoring_pipeline = pipeline_resolver( + arguments.scoring_pipeline, + strict_resolving=getattr(arguments, 'strict_resolving', False), + strict_digest=getattr(arguments, 'strict_digest', False), + pipeline_search_paths=getattr(arguments, 'pipeline_search_paths', []), + ) + + if getattr(arguments, 'problem', None) is not None: + problem_description = problem_resolver(arguments.problem, strict_digest=getattr(arguments, 'strict_digest', False)) + else: + problem_description = None + + score_inputs = [ + dataset_resolver( + score_input_uri, compute_digest=compute_digest, strict_digest=getattr(arguments, 'strict_digest', False), + ) + for score_input_uri in getattr(arguments, 'score_inputs', []) + ] + + predictions_dataframe = pandas.read_csv( + arguments.predictions, + # We do not want to do any conversion of values at this point. + # This should be done by primitives later on. + dtype=str, + # We always expect one row header. + header=0, + # We want empty strings and not NaNs. + na_filter=False, + encoding='utf8', + low_memory=False, + memory_map=True, + ) + predictions_random_seed = getattr(arguments, 'predictions_random_seed', None) + scoring_random_seed = getattr(arguments, 'scoring_random_seed', 0) + + if getattr(arguments, 'metrics', None) is not None: + metrics = get_metrics_from_list(arguments.metrics) + else: + metrics = get_metrics_from_problem_description(problem_description) + + if getattr(arguments, 'scoring_params', None) is not None: + scoring_params = {name: value for name, value in arguments.scoring_params} + else: + scoring_params = {} + + # Convert pandas DataFrame to container DataFrame. + predictions = container.DataFrame(predictions_dataframe, generate_metadata=True) + + if getattr(arguments, 'output', None) is not None: + predictions.to_csv(arguments.output) + + scores, score_result = score( + predictions, score_inputs, + scoring_pipeline=scoring_pipeline, + problem_description=problem_description, + metrics=metrics, + predictions_random_seed=predictions_random_seed, + scoring_params=scoring_params, + context=context, + random_seed=scoring_random_seed, + volumes_dir=getattr(arguments, 'volumes_dir', None), + scratch_dir=getattr(arguments, 'scratch_dir', None), + runtime_environment=runtime_environment, + ) + + score_result.check_success() + + if getattr(arguments, 'scores', None) is not None: + scores.to_csv(arguments.scores) + + +def evaluate_handler( + arguments: argparse.Namespace, *, pipeline_resolver: typing.Callable = None, pipeline_run_parser: typing.Callable = None, + dataset_resolver: typing.Callable = None, problem_resolver: typing.Callable = None, +) -> None: + if pipeline_resolver is None: + pipeline_resolver = pipeline_module.get_pipeline + if pipeline_run_parser is None: + pipeline_run_parser = parse_pipeline_run + if dataset_resolver is None: + dataset_resolver = dataset_module.get_dataset + if problem_resolver is None: + problem_resolver = problem.get_problem + + context = metadata_base.Context[arguments.context] + compute_digest = dataset_module.ComputeDigest[getattr(arguments, 'compute_digest', dataset_module.ComputeDigest.ONLY_IF_MISSING.name)] + runtime_environment = pipeline_run_module.RuntimeEnvironment( + worker_id=getattr(arguments, 'worker_id', None), + ) + + if getattr(arguments, 'input_run', None) is not None: + parsed_pipeline_runs = pipeline_run_parser( + arguments.input_run, getattr(arguments, 'pipeline_search_paths', []), getattr(arguments, 'datasets_dir', None), + pipeline_resolver=pipeline_resolver, dataset_resolver=dataset_resolver, problem_resolver=problem_resolver, + strict_resolving=getattr(arguments, 'strict_resolving', False), + compute_digest=compute_digest, strict_digest=getattr(arguments, 'strict_digest', False), + ) + + # TODO: Support more than 2 pipeline runs (cross validation). + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/407 + if len(parsed_pipeline_runs) != 2: + raise exceptions.InvalidArgumentValueError( + "Evaluate requires exactly two pipeline runs. {pipeline_runs} provided.".format(pipeline_runs=len(parsed_pipeline_runs)) + ) + # TODO: We might not want to require that the order in the file is strict. + # We could just require that pipeline runs belong together (using previous_pipeline_run) + # and are of FIT and PRODUCE phase and then run them in the correct order. + pipeline_run_0_phase = parsed_pipeline_runs[0]['run']['phase'] + if pipeline_run_0_phase != metadata_base.PipelineRunPhase.FIT.name: + raise exceptions.InvalidArgumentValueError( + "Evaluate requires the first pipeline run to be a FIT phase. {phase} phase provided.".format(phase=pipeline_run_0_phase) + ) + pipeline_run_1_phase = parsed_pipeline_runs[1]['run']['phase'] + if pipeline_run_1_phase != metadata_base.PipelineRunPhase.PRODUCE.name: + raise exceptions.InvalidArgumentValueError( + "Evaluate requires the second pipeline run to be a PRODUCE phase. {phase} phase provided.".format(phase=pipeline_run_1_phase) + ) + fit_pipeline_run = parsed_pipeline_runs[0] + produce_pipeline_run = parsed_pipeline_runs[1] + + if produce_pipeline_run['previous_pipeline_run']['id'] != fit_pipeline_run['id']: + raise exceptions.InvalidArgumentValueError("Evaluate requires that the PRODUCE phase pipeline run must reference FIT phase pipeline run in \"previous_pipeline_run\".") + if fit_pipeline_run['pipeline'].id != produce_pipeline_run['pipeline'].id or fit_pipeline_run['pipeline'].get_digest() != produce_pipeline_run['pipeline'].get_digest(): + raise exceptions.InvalidArgumentValueError("Evaluate requires that both the FIT phase and PRODUCE phase pipeline runs reference the same pipeline.") + if fit_pipeline_run['problem']['id'] != produce_pipeline_run['problem']['id'] or fit_pipeline_run['problem'].get_digest() != produce_pipeline_run['problem'].get_digest(): + raise exceptions.InvalidArgumentValueError("Evaluate requires that both the FIT phase and PRODUCE phase pipeline runs reference the same problem description.") + if 'scoring' not in produce_pipeline_run['run']: + raise exceptions.InvalidArgumentValueError("Evaluate requires the PRODUCE phase pipeline run to be a pipeline run with scoring.") + if 'data_preparation' not in produce_pipeline_run['run']: + raise exceptions.InvalidArgumentValueError("Evaluate requires the FIT phase pipeline run to be a pipeline run with data preparation.") + + # TODO: Check that hyperparams match between both pipeline runs (but allow failed runs). + # TODO: Check that inputs match between both pipeline runs. + # TODO: Check that data preparation pipelines match between both pipeline runs. + # TODO: Check that scoring pipelines match between both pipeline runs. + + pipeline = fit_pipeline_run['pipeline'] + data_pipeline = fit_pipeline_run['run']['data_preparation']['pipeline'] + scoring_pipeline = produce_pipeline_run['run']['scoring']['pipeline'] + problem_description = fit_pipeline_run.get('problem', None) + inputs = fit_pipeline_run['datasets'] + # Currently, "random_seed" is not yet required. + random_seed = fit_pipeline_run.get('random_seed', 0) + hyperparams = _get_runtime_hyperparams_from_pipeline_run(fit_pipeline_run['pipeline'], fit_pipeline_run.get('steps', [])) + # Currently, "random_seed" is not yet required. + data_random_seed = fit_pipeline_run['run']['data_preparation'].get('random_seed', 0) + # Currently, "random_seed" is not yet required. + scoring_random_seed = produce_pipeline_run['run']['scoring'].get('random_seed', 0) + # We do not have to set metrics, because they should already be included in hyper-paramters. + metrics: typing.Sequence[typing.Dict] = [] + data_params = _get_data_and_scoring_params_from_pipeline_run(fit_pipeline_run['run']['data_preparation'].get('steps', [])) + scoring_params = _get_data_and_scoring_params_from_pipeline_run(produce_pipeline_run['run']['scoring'].get('steps', [])) + + else: + pipeline = pipeline_resolver( + arguments.pipeline, + strict_resolving=getattr(arguments, 'strict_resolving', False), + strict_digest=getattr(arguments, 'strict_digest', False), + pipeline_search_paths=getattr(arguments, 'pipeline_search_paths', []), + ) + data_pipeline = pipeline_resolver( + arguments.data_pipeline, + strict_resolving=getattr(arguments, 'strict_resolving', False), + strict_digest=getattr(arguments, 'strict_digest', False), + pipeline_search_paths=getattr(arguments, 'pipeline_search_paths', []), + ) + scoring_pipeline = pipeline_resolver( + arguments.scoring_pipeline, + strict_resolving=getattr(arguments, 'strict_resolving', False), + strict_digest=getattr(arguments, 'strict_digest', False), + pipeline_search_paths=getattr(arguments, 'pipeline_search_paths', []), + ) + + if getattr(arguments, 'problem', None) is not None: + problem_description = problem_resolver(arguments.problem, strict_digest=getattr(arguments, 'strict_digest', False)) + else: + problem_description = None + + inputs = [ + dataset_resolver( + input_uri, compute_digest=compute_digest, strict_digest=getattr(arguments, 'strict_digest', False), + ) + for input_uri in getattr(arguments, 'inputs', []) + ] + + random_seed = getattr(arguments, 'random_seed', 0) + hyperparams = None + data_random_seed = getattr(arguments, 'data_random_seed', 0) + scoring_random_seed = getattr(arguments, 'scoring_random_seed', 0) + + if getattr(arguments, 'metrics', None) is not None: + metrics = get_metrics_from_list(arguments.metrics) + else: + metrics = get_metrics_from_problem_description(problem_description) + + if getattr(arguments, 'data_params', None) is not None: + data_params = {name: value for name, value in arguments.data_params} + else: + data_params = {} + + if getattr(arguments, 'data_split_file', None) is not None: + split_file = pandas.read_csv( + arguments.data_split_file, + # We do not want to do any conversion of values at this point. + # This should be done by primitives later on. + dtype=str, + # We always expect one row header. + header=0, + # We want empty strings and not NaNs. + na_filter=False, + encoding='utf8', + low_memory=False, + memory_map=True, + ) + + # We use just the "d3mIndex" column and ignore multi-key indices. + # This works for now because it seems that every current multi-key + # dataset in fact has an unique value in "d3mIndex" alone. + # See: https://gitlab.com/datadrivendiscovery/data-supply/issues/117 + # Hyper-parameter value has to be JSON-serialized. + data_params['primary_index_values'] = json.dumps(list(split_file.loc[split_file['type'] == 'TEST']['d3mIndex'])) + + if getattr(arguments, 'scoring_params', None) is not None: + scoring_params = {name: value for name, value in arguments.scoring_params} + else: + scoring_params = {} + + scores_list, results_list = evaluate( + pipeline, inputs, + data_pipeline=data_pipeline, + scoring_pipeline=scoring_pipeline, + problem_description=problem_description, + data_params=data_params, + metrics=metrics, + scoring_params=scoring_params, + context=context, + hyperparams=hyperparams, + random_seed=random_seed, + data_random_seed=data_random_seed, + scoring_random_seed=scoring_random_seed, + volumes_dir=getattr(arguments, 'volumes_dir', None), + scratch_dir=getattr(arguments, 'scratch_dir', None), + runtime_environment=runtime_environment, + ) + + _output_pipeline_runs(arguments, results_list.pipeline_runs) + + results_list.check_success() + + scores = combine_folds(scores_list) + + if getattr(arguments, 'scores', None) is not None: + scores.to_csv(arguments.scores) + + +def save_steps_outputs(results: typing.Union[Result, MultiResult], output_dir: str) -> None: + if isinstance(results, Result): + for key, step_output in results.values.items(): + container_utils.save_container(step_output, os.path.join(output_dir, key)) + elif isinstance(results, MultiResult): + for i, result in enumerate(results): + for key, step_output in result.values.items(): + container_utils.save_container(step_output, os.path.join(output_dir, str(i), key)) + else: + raise exceptions.UnexpectedTypeError("Type: {results_type}".format(results_type=type(results))) + + +def main(argv: typing.Sequence) -> None: + # We have to disable importing while type checking because it makes + # an import cycle in mypy which makes many typing errors. + if not typing.TYPE_CHECKING: + # Importing here to prevent import cycle. + from d3m import cli + + logging.basicConfig() + + logger.warning("This CLI is deprecated. Use \"python3 -m d3m runtime\" instead.") + + parser = argparse.ArgumentParser(description="Run D3M pipelines.") + cli.runtime_configure_parser(parser) + + arguments = parser.parse_args(argv[1:]) + + cli.runtime_handler(arguments, parser) + + +if __name__ == '__main__': + main(sys.argv) diff --git a/d3m/d3m/types.py b/d3m/d3m/types.py new file mode 100644 index 0000000..d1b3bd6 --- /dev/null +++ b/d3m/d3m/types.py @@ -0,0 +1,24 @@ +import numpy # type: ignore + +from d3m import container + +__all__ = ('Data', 'Container') + +# Open an issue if these standard types are too restrictive for you, +# but the idea is that callers should know in advance which data types +# are being passed in and out of primitives to be able to implement +# their introspection, serialization, and so on. + +simple_data_types = ( + str, bytes, bool, float, int, numpy.integer, numpy.float64, numpy.bool_, type(None), +) + +# A tuple representing all standard container types. +Container = ( + container.ndarray, container.DataFrame, + container.List, container.Dataset, +) + +# A tuple representing all standard data types. Data types are those which +# can be contained inside container types. +Data = Container + simple_data_types + (dict,) diff --git a/d3m/d3m/utils.py b/d3m/d3m/utils.py new file mode 100644 index 0000000..e3a1624 --- /dev/null +++ b/d3m/d3m/utils.py @@ -0,0 +1,1823 @@ +import abc +import argparse +import base64 +import collections +import contextlib +import copy +import datetime +import decimal +import enum +import functools +import gzip +import hashlib +import inspect +import json +import logging +import numbers +import operator +import os +import os.path +import pickle +import random +import re +import types +import typing +import sys +import unittest +import uuid +from urllib import parse as url_parse + +import custom_inherit # type: ignore +import frozendict # type: ignore +import git # type: ignore +import jsonpath_ng # type: ignore +import jsonschema # type: ignore +import numpy # type: ignore +import pandas # type: ignore +import typing_inspect # type: ignore +import yaml # type: ignore +import pyrsistent # type: ignore +from jsonschema import validators # type: ignore +from numpy import random as numpy_random # type: ignore +from pytypes import type_util # type: ignore + +import d3m +from d3m import deprecate, exceptions + +if yaml.__with_libyaml__: + from yaml import CSafeLoader as SafeLoader, CSafeDumper as SafeDumper # type: ignore +else: + from yaml import SafeLoader, SafeDumper + +logger = logging.getLogger(__name__) + +NONE_TYPE: typing.Type = type(None) + +# Only types without elements can be listed here. If they are elements, we have to +# check all elements as well. +KNOWN_IMMUTABLE_TYPES = ( + str, int, float, bool, numbers.Integral, decimal.Decimal, + numbers.Real, numpy.integer, numpy.float32, numpy.float64, numpy.bool_, bytes, + datetime.date, datetime.time, datetime.datetime, NONE_TYPE, enum.Enum, +) + +HASH_ID_NAMESPACE = uuid.UUID('8614b2cc-89ef-498e-9254-833233b3959b') + +PACKAGE_BASE = os.path.dirname(d3m.__file__) + + +def current_git_commit(path: str, search_parent_directories: bool = True) -> str: + """ + Returns a git commit hash of the repo at ``path`` or above if ``search_parent_directories`` is ``True``. + + When used to get a commit hash of a Python package, for this to work, the package has + to be installed in "editable" mode (``pip install -e``). + + Parameters + ---------- + path: + A path to repo or somewhere under the repo. + search_parent_directories: + Whether to search for a git repository in directories above ``path``. + + Returns + ------- + A git commit hash. + """ + + repo = git.Repo(path=path, search_parent_directories=search_parent_directories) + return repo.head.object.hexsha + + +# Using typing.TypeVar in type signature does not really work, so we are using type instead. +# See: https://github.com/python/typing/issues/520 +def get_type_arguments(cls: type, *, unique_names: bool = False) -> typing.Dict[type, type]: + """ + Returns a mapping between type arguments and their types of a given class ``cls``. + + Parameters + ---------- + cls: + A class to return mapping for. + unique_names: + Should we force unique names of type parameters. + + Returns + ------- + A mapping from type argument to its type. + """ + + # Using typing.TypeVar in type signature does not really work, so we are using type instead. + # See: https://github.com/python/typing/issues/520 + result: typing.Dict[type, type] = {} + + for base_class in inspect.getmro(typing_inspect.get_origin(cls)): + if base_class == typing.Generic: + break + + if not typing_inspect.is_generic_type(base_class): + continue + + parameters = typing_inspect.get_parameters(base_class) + + # We are using _select_Generic_superclass_parameters and not get_Generic_parameters + # so that we can handle the case where the result is None. + # See: https://github.com/Stewori/pytypes/issues/20 + arguments = type_util._select_Generic_superclass_parameters(cls, base_class) + + if arguments is None: + arguments = [typing.Any] * len(parameters) + + if len(parameters) != len(arguments): + raise TypeError("Number of parameters does not match number of arguments.") + + for parameter, argument in zip(parameters, arguments): + if type_util.resolve_fw_decl(argument, module_name=base_class.__module__, globs=dir(sys.modules[base_class.__module__]))[1]: + argument = argument.__forward_value__ + + visited: typing.Set[type] = set() + while typing_inspect.is_typevar(argument) and argument in result: + if argument in visited: + raise RuntimeError("Loop while resolving type variables.") + visited.add(argument) + + argument = result[argument] + + if parameter == argument: + argument = typing.Any + + if parameter in result: + if result[parameter] != argument: + raise TypeError("Different types for same parameter across class bases: {type1} vs. {type2}".format( + type1=result[parameter], + type2=argument, + )) + else: + result[parameter] = argument + + if unique_names: + type_parameter_names = [parameter.__name__ for parameter in result.keys()] + + type_parameter_names_set = set(type_parameter_names) + + if len(type_parameter_names) != len(type_parameter_names_set): + for name in type_parameter_names_set: + type_parameter_names.remove(name) + raise TypeError("Same name reused across different type parameters: {extra_names}".format(extra_names=type_parameter_names)) + + return result + + +def is_instance(obj: typing.Any, cls: typing.Union[type, typing.Tuple[type]]) -> bool: + # We do not want really to check generators. A workaround. + # See: https://github.com/Stewori/pytypes/issues/49 + if isinstance(obj, types.GeneratorType): + return False + + if isinstance(cls, tuple): + cls = typing.Union[cls] # type: ignore + + # "bound_typevars" argument has to be passed for this function to + # correctly work with type variables. + # See: https://github.com/Stewori/pytypes/issues/24 + return type_util._issubclass(type_util.deep_type(obj), cls, bound_typevars={}) + + +def is_subclass(subclass: type, superclass: typing.Union[type, typing.Tuple[type]]) -> bool: + # "bound_typevars" argument has to be passed for this function to + # correctly work with type variables. + # See: https://github.com/Stewori/pytypes/issues/24 + return type_util._issubclass(subclass, superclass, bound_typevars={}) + + +def get_type(obj: typing.Any) -> type: + typ = type_util.deep_type(obj, depth=1) + + if is_subclass(typ, type_util.Empty): + typ = typing_inspect.get_last_args(typ)[0] + + return typ + + +def is_instance_method_on_class(method: typing.Any) -> bool: + if is_class_method_on_class(method): + return False + + if inspect.isfunction(method): + return True + + if getattr(method, '__func__', None): + return True + + return False + + +def is_class_method_on_class(method: typing.Any) -> bool: + return inspect.ismethod(method) + + +def is_instance_method_on_object(method: typing.Any, object: typing.Any) -> bool: + if not inspect.ismethod(method): + return False + + if method.__self__ is object: + return True + + return False + + +def is_class_method_on_object(method: typing.Any, object: typing.Any) -> bool: + if not inspect.ismethod(method): + return False + + if method.__self__ is type(object): + return True + + return False + + +def is_type(obj: typing.Any) -> bool: + return isinstance(obj, type) or obj is typing.Any or typing_inspect.is_tuple_type(obj) or typing_inspect.is_union_type(obj) + + +def type_to_str(obj: type) -> str: + return type_util.type_str(obj, assumed_globals={}, update_assumed_globals=False) + + +def get_type_hints(func: typing.Callable) -> typing.Dict[str, typing.Any]: + # To skip decorators. Same stop function as used in "inspect.signature". + func = inspect.unwrap(func, stop=(lambda f: hasattr(f, '__signature__'))) + return type_util.get_type_hints(func) + + +yaml_warning_issued = False + + +def yaml_dump_all(documents: typing.Sequence[typing.Any], stream: typing.IO[typing.Any] = None, **kwds: typing.Any) -> typing.Any: + global yaml_warning_issued + + if not yaml.__with_libyaml__ and not yaml_warning_issued: + yaml_warning_issued = True + logger.warning("cyaml not found, using a slower pure Python YAML implementation.") + + return yaml.dump_all(documents, stream, Dumper=SafeDumper, **kwds) + + +def yaml_dump(data: typing.Any, stream: typing.IO[typing.Any] = None, **kwds: typing.Any) -> typing.Any: + global yaml_warning_issued + + if not yaml.__with_libyaml__ and not yaml_warning_issued: + yaml_warning_issued = True + logger.warning("cyaml not found, using a slower pure Python YAML implementation.") + + return yaml.dump_all([data], stream, Dumper=SafeDumper, **kwds) + + +def yaml_load_all(stream: typing.Union[str, typing.IO[typing.Any]]) -> typing.Any: + global yaml_warning_issued + + if not yaml.__with_libyaml__ and not yaml_warning_issued: + yaml_warning_issued = True + logger.warning("cyaml not found, using a slower pure Python YAML implementation.") + + return yaml.load_all(stream, SafeLoader) + + +def yaml_load(stream: typing.Union[str, typing.IO[typing.Any]]) -> typing.Any: + global yaml_warning_issued + + if not yaml.__with_libyaml__ and not yaml_warning_issued: + yaml_warning_issued = True + logger.warning("cyaml not found, using a slower pure Python YAML implementation.") + + return yaml.load(stream, SafeLoader) + + +def yaml_add_representer(value_type: typing.Type, represented: typing.Callable) -> None: + yaml.Dumper.add_representer(value_type, represented) + yaml.SafeDumper.add_representer(value_type, represented) + + if yaml.__with_libyaml__: + yaml.CDumper.add_representer(value_type, represented) # type: ignore + yaml.CSafeDumper.add_representer(value_type, represented) # type: ignore + + +class EnumMeta(enum.EnumMeta): + def __new__(mcls, class_name, bases, namespace, **kwargs): # type: ignore + def __reduce_ex__(self: typing.Any, proto: int) -> typing.Any: + return self.__class__, (self._value_,) + + if '__reduce_ex__' not in namespace: + namespace['__reduce_ex__'] = __reduce_ex__ + + cls = super().__new__(mcls, class_name, bases, namespace, **kwargs) + + def yaml_representer(dumper, data): # type: ignore + return yaml.ScalarNode('tag:yaml.org,2002:str', data.name) + + yaml_add_representer(cls, yaml_representer) + + return cls + + +class Enum(enum.Enum, metaclass=EnumMeta): + """ + An extension of `Enum` base class where: + + * Instances are equal to their string names, too. + * It registers itself with "yaml" module to serialize itself as a string. + * Allows dynamic registration of additional values using ``register_value``. + """ + + def __eq__(self, other): # type: ignore + if isinstance(other, str): + return self.name == other + + return super().__eq__(other) + + # It must hold a == b => hash(a) == hash(b). Because we allow enums to be equal to names, + # the easiest way to assure the condition is to hash everything according to their names. + def __hash__(self): # type: ignore + return hash(self.name) + + @classmethod + def register_value(cls, name: str, value: typing.Any) -> typing.Any: + # This code is based on Python's "EnumMeta.__new__" code, see + # comments there for more information about the code. + # It uses internals of Python's Enum so it is potentially fragile. + + __new__, save_new, use_args = type(cls)._find_new_({}, cls._member_type_, cls) # type: ignore + + dynamic_attributes = { + k for c in cls.mro() + for k, v in c.__dict__.items() + if isinstance(v, types.DynamicClassAttribute) + } + + if not isinstance(value, tuple): + args: typing.Tuple[typing.Any, ...] = (value,) + else: + args = value + if cls._member_type_ is tuple: # type: ignore + args = (args,) + + if not use_args: + enum_member = __new__(cls) + if not hasattr(enum_member, '_value_'): + enum_member._value_ = value + else: + enum_member = __new__(cls, *args) + if not hasattr(enum_member, '_value_'): + if cls._member_type_ is object: # type: ignore + enum_member._value_ = value + else: + enum_member._value_ = cls._member_type_(*args) # type: ignore + value = enum_member._value_ + enum_member._name_ = name + enum_member.__objclass__ = cls + enum_member.__init__(*args) + for canonical_member in cls._member_map_.values(): # type: ignore + if canonical_member._value_ == enum_member._value_: + enum_member = canonical_member + break + else: + cls._member_names_.append(name) # type: ignore + if name not in dynamic_attributes: + setattr(cls, name, enum_member) + cls._member_map_[name] = enum_member # type: ignore + try: + cls._value2member_map_[value] = enum_member # type: ignore + except TypeError: + pass + + +# Return type has to be "Any" because mypy does not support enums generated dynamically +# and complains about missing attributes when trying to access them. +def create_enum_from_json_schema_enum( + class_name: str, obj: typing.Dict, json_paths: typing.Union[typing.Sequence[str], str], *, + module: str = None, qualname: str = None, base_class: type = None +) -> typing.Any: + if qualname is None: + qualname = class_name + + if isinstance(json_paths, str): + names = _get_names(obj, json_paths) + else: + names = [] + for path in json_paths: + names += _get_names(obj, path) + + # Make the list contain unique names. It keeps the original order in Python 3.6+ + # because dicts are ordered. We use the same string for both the name and the value. + pairs = [(name, name) for name in dict.fromkeys(names).keys()] + + return Enum(value=class_name, names=pairs, module=module, qualname=qualname, type=base_class) # type: ignore + + +def _get_names(obj: typing.Dict, path: str) -> typing.List: + json_path_expression = jsonpath_ng.parse(path) + return [match.value for match in json_path_expression.find(obj)] + + +# This allows other modules to register additional immutable values and types. +# We are doing it this way to overcome issues with import cycles. +additional_immutable_values: typing.Tuple[typing.Any, ...] = () +additional_immutable_types: typing.Tuple[type, ...] = () + + +def make_immutable_copy(obj: typing.Any) -> typing.Any: + """ + Converts a given ``obj`` into an immutable copy of it, if possible. + + Parameters + ---------- + obj: + Object to convert. + + Returns + ------- + An immutable copy of ``obj``. + """ + + if any(obj is immutable_value for immutable_value in additional_immutable_values): + return obj + + if isinstance(obj, numpy.matrix): + # One cannot iterate over a matrix segment by segment. You always get back + # a matrix (2D structure) and not an array of rows or columns. By converting + # it to an array such iteration segment by segment works. + obj = numpy.array(obj) + + if isinstance(obj, KNOWN_IMMUTABLE_TYPES): + # Because str is among known immutable types, it will not be picked apart as a sequence. + return obj + if additional_immutable_types and isinstance(obj, additional_immutable_types): + return obj + if is_type(obj): + # Assume all types are immutable. + return obj + if isinstance(obj, typing.Mapping): + # We simply always preserve order of the mapping. Because we want to make sure also mapping's + # values are converted to immutable values, we cannot simply use MappingProxyType. + return frozendict.FrozenOrderedDict((make_immutable_copy(k), make_immutable_copy(v)) for k, v in obj.items()) + if isinstance(obj, typing.Set): + return frozenset(make_immutable_copy(o) for o in obj) + if isinstance(obj, tuple): + # To preserve named tuples. + return type(obj)(make_immutable_copy(o) for o in obj) + if isinstance(obj, pandas.DataFrame): + return tuple(make_immutable_copy(o) for o in obj.itertuples(index=False, name=None)) + if isinstance(obj, (typing.Sequence, numpy.ndarray)): + return tuple(make_immutable_copy(o) for o in obj) + + raise TypeError("{obj} is not known to be immutable.".format(obj=obj)) + + +def check_immutable(obj: typing.Any) -> None: + """ + Checks that ``obj`` is immutable. Raises an exception if this is not true. + + Parameters + ---------- + obj: + Object to check. + """ + + obj_type = type(obj) + + # First check common cases. + if any(obj is immutable_value for immutable_value in additional_immutable_values): + return + if obj_type in KNOWN_IMMUTABLE_TYPES: + return + if obj_type is frozendict.FrozenOrderedDict: + for k, v in obj.items(): + check_immutable(k) + check_immutable(v) + return + if obj_type is tuple: + for o in obj: + check_immutable(o) + return + + if isinstance(obj, KNOWN_IMMUTABLE_TYPES): + return + if additional_immutable_types and isinstance(obj, additional_immutable_types): + return + if isinstance(obj, tuple): + # To support named tuples. + for o in obj: + check_immutable(o) + return + if is_type(obj): + # Assume all types are immutable. + return + if obj_type is frozenset: + for o in obj: + check_immutable(o) + return + + raise TypeError("{obj} is not known to be immutable.".format(obj=obj)) + + +class Metaclass(custom_inherit._DocInheritorBase): + """ + A metaclass which makes sure docstrings are inherited. + + It knows how to merge numpy-style docstrings and merge parent sections with + child sections. For example, then it is not necessary to repeat documentation + for parameters if they have not changed. + """ + + @staticmethod + def class_doc_inherit(prnt_doc: str = None, child_doc: str = None) -> typing.Optional[str]: + return custom_inherit.store['numpy'](prnt_doc, child_doc) + + @staticmethod + def attr_doc_inherit(prnt_doc: str = None, child_doc: str = None) -> typing.Optional[str]: + return custom_inherit.store['numpy'](prnt_doc, child_doc) + + +class AbstractMetaclass(abc.ABCMeta, Metaclass): + """ + A metaclass which makes sure docstrings are inherited. For use with abstract classes. + """ + + +class GenericMetaclass(typing.GenericMeta, Metaclass): + """ + A metaclass which makes sure docstrings are inherited. For use with generic classes (which are also abstract). + """ + + +class RefResolverNoRemote(validators.RefResolver): + def resolve_remote(self, uri: str) -> typing.Any: + raise exceptions.NotSupportedError("Remote resolving disabled: {uri}".format(uri=uri)) + + +def enum_validator(validator, enums, instance, schema): # type: ignore + if isinstance(instance, Enum): + instance = instance.name + + yield from validators.Draft7Validator.VALIDATORS['enum'](validator, enums, instance, schema) + + +def json_schema_is_string(checker: jsonschema.TypeChecker, instance: typing.Any) -> bool: + if isinstance(instance, Enum): + return True + else: + return validators.Draft7Validator.TYPE_CHECKER.is_type(instance, 'string') + + +def json_schema_is_object(checker: jsonschema.TypeChecker, instance: typing.Any) -> bool: + if isinstance(instance, (frozendict.frozendict, frozendict.FrozenOrderedDict)): + return True + else: + return validators.Draft7Validator.TYPE_CHECKER.is_type(instance, 'object') + + +def json_schema_is_array(checker: jsonschema.TypeChecker, instance: typing.Any) -> bool: + if isinstance(instance, (tuple, set)): + return True + else: + return validators.Draft7Validator.TYPE_CHECKER.is_type(instance, 'array') + + +JsonSchemaTypeChecker = validators.Draft7Validator.TYPE_CHECKER.redefine_many({ + 'string': json_schema_is_string, + 'object': json_schema_is_object, + 'array': json_schema_is_array, +}) + + +# JSON schema validator with the following extension: +# +# * If a value is an instance of Python enumeration, its name is checked against JSON +# schema enumeration, instead of the value itself. When converting to a proper JSON +# these values should be enumeration's name. +Draft7Validator = validators.extend( + validators.Draft7Validator, + validators={ + 'enum': enum_validator, + }, + type_checker=JsonSchemaTypeChecker, +) + + +draft7_format_checker = copy.deepcopy(jsonschema.draft7_format_checker) + + +@draft7_format_checker.checks('python-type') +def json_schema_is_python_type(instance: typing.Any) -> bool: + return is_type(instance) or isinstance(instance, str) + + +# We cannot use "Draft7Validator" as a type (MyPy complains), so we are using +# "validators.Draft7Validator", which has the same interface. +def load_schema_validators(schemas: typing.Dict, load_validators: typing.Sequence[str]) -> typing.List[validators.Draft7Validator]: + schema_validators = [] + + for schema_filename in load_validators: + for schema_uri, schema_json in schemas.items(): + if os.path.basename(schema_uri) == schema_filename: + break + else: + raise exceptions.InvalidArgumentValueError("Cannot find schema '{schema_filename}'.".format(schema_filename=schema_filename)) + + # We validate schemas using unmodified validator. + validators.Draft7Validator.check_schema(schema_json) + + validator = Draft7Validator( + schema=schema_json, + resolver=RefResolverNoRemote(schema_json['id'], schema_json, schemas), + format_checker=draft7_format_checker, + ) + + schema_validators.append(validator) + + return schema_validators + + +def datetime_for_json(timestamp: datetime.datetime) -> str: + # Since Python 3.6 "astimezone" can be called on naive instances + # that are presumed to represent system local time. + # We remove timezone information before formatting to not have "+00:00" added and + # we then manually add "Z" instead (which has equivalent meaning). + return timestamp.astimezone(datetime.timezone.utc).replace(tzinfo=None).isoformat('T') + 'Z' + + +class JsonEncoder(json.JSONEncoder): + """ + JSON encoder with extensions, among them the main ones are: + + * Frozen dict is encoded as a dict. + * Python types are encoded into strings describing them. + * Python enumerations are encoded into their string names. + * Sets are encoded into lists. + * Encodes ndarray and DataFrame as nested lists. + * Encodes datetime into ISO format with UTC timezone. + * Everything else which cannot be encoded is converted to a string. + + You probably want to use `to_json_structure` and not this class, because `to_json_structure` + also encodes ``NaN`, ``Infinity``, and ``-Infinity`` as strings. + + It does not necessary make a JSON which can then be parsed back to reconstruct original value. + """ + + def default(self, o: typing.Any) -> typing.Any: + # Importing here to prevent import cycle. + from d3m.metadata import base + + if isinstance(o, numpy.matrix): + # One cannot iterate over a matrix segment by segment. You always get back + # a matrix (2D structure) and not an array of rows or columns. By converting + # it to an array such iteration segment by segment works. + o = numpy.array(o) + + if isinstance(o, frozendict.frozendict): + return dict(o) + if isinstance(o, frozendict.FrozenOrderedDict): + return collections.OrderedDict(o) + if is_type(o): + return type_to_str(o) + if isinstance(o, Enum): + return o.name + if o is base.ALL_ELEMENTS: + return repr(o) + if o is base.NO_VALUE: + return repr(o) + # For encoding numpy.int64, numpy.float64 already works. + if isinstance(o, numpy.integer): + return int(o) + if isinstance(o, numpy.bool_): + return bool(o) + if isinstance(o, typing.Mapping): + return collections.OrderedDict(o) + if isinstance(o, typing.Set): + return sorted(o, key=str) + if isinstance(o, pandas.DataFrame): + return list(o.itertuples(index=False, name=None)) + if isinstance(o, (typing.Sequence, numpy.ndarray)): + return list(o) + if isinstance(o, decimal.Decimal): + return float(o) + if isinstance(o, bytes): + return base64.b64encode(o).decode('utf8') + if isinstance(o, datetime.datetime): + return datetime_for_json(o) + + try: + return super().default(o) + except TypeError: + return str(o) + + +def normalize_numbers(obj: typing.Dict) -> typing.Dict: + return json.loads(json.dumps(obj), parse_int=float) + + +json_constant_map = { + '-Infinity': str(float('-Infinity')), + 'Infinity': str(float('Infinity')), + 'NaN': str(float('NaN')), +} + + +def to_json_structure(obj: typing.Any) -> typing.Any: + """ + In addition to what `JsonEncoder` encodes, this function also encodes as strings + float ``NaN``, ``Infinity``, and ``-Infinity``. + + It does not necessary make a JSON structure which can then be parsed back to reconstruct + original value. For that use ``to_reversible_json_structure``. + """ + + # We do not use "allow_nan=False" here because we will handle those values during loading. + # "JsonEncoder.default" is not called for float values so we cannot handle them there. + # See: https://bugs.python.org/issue36841 + json_string = json.dumps(obj, cls=JsonEncoder) + + return json.loads( + json_string, + parse_constant=lambda constant: json_constant_map[constant], + ) + + +def _json_key(key: typing.Any) -> str: + if isinstance(key, str): + return key + else: + raise TypeError("Key must be a string, not '{key_type}'.".format(key_type=type(key))) + + +def to_reversible_json_structure(obj: typing.Any) -> typing.Any: + """ + Operation is not idempotent. + """ + + if isinstance(obj, (str, bool, NONE_TYPE)): + return obj + + obj_type = type(obj) + + if _is_int(obj_type): + # To make sure it is Python int. + obj = int(obj) + + return obj + + elif _is_float(obj_type): + # To make sure it is Python float. + obj = float(obj) + + if not numpy.isfinite(obj): + return { + 'encoding': 'pickle', + 'description': str(obj), + 'value': base64.b64encode(pickle.dumps(obj)).decode('utf8'), + } + else: + return obj + + elif isinstance(obj, typing.Mapping): + if 'encoding' in obj and 'value' in obj: + return { + 'encoding': 'escape', + 'value': {_json_key(k): to_reversible_json_structure(v) for k, v in obj.items()}, + } + else: + return {_json_key(k): to_reversible_json_structure(v) for k, v in obj.items()} + + # We do not use "is_sequence" because we do not want to convert all sequences, + # because it can be loosing important information. + elif isinstance(obj, (tuple, list)): + return [to_reversible_json_structure(v) for v in obj] + + else: + return { + 'encoding': 'pickle', + 'description': str(obj), + 'value': base64.b64encode(pickle.dumps(obj)).decode('utf8'), + } + + +def from_reversible_json_structure(obj: typing.Any) -> typing.Any: + if is_instance(obj, typing.Union[str, int, float, bool, NONE_TYPE]): + return obj + + elif isinstance(obj, typing.Mapping): + if 'encoding' in obj and 'value' in obj: + if obj['encoding'] == 'pickle': + # TODO: Limit the types of values being able to load to prevent arbitrary code execution by a malicious pickle. + return pickle.loads(base64.b64decode(obj['value'].encode('utf8'))) + if obj['encoding'] == 'escape': + return {_json_key(k): from_reversible_json_structure(v) for k, v in obj['value'].items()} + else: + raise ValueError("Unsupported encoding '{encoding}'.".format(encoding=obj['encoding'])) + else: + return {_json_key(k): from_reversible_json_structure(v) for k, v in obj.items()} + + # We do not use "is_sequence" because we do not want to convert all sequences, + # because it can be loosing important information. + elif isinstance(obj, (tuple, list)): + return [from_reversible_json_structure(v) for v in obj] + + else: + raise TypeError("Unsupported type '{value_type}'.".format(value_type=type(obj))) + + +class StreamToLogger: + def __init__(self, logger: logging.Logger, level: typing.Union[str, int], pass_through_stream: typing.TextIO = None) -> None: + self.logger = logger + self.level = logging._checkLevel(level) # type: ignore + self.pending_line = "" + self.closed = False + self.pass_through_stream = pass_through_stream + + # Here we are trying to test for the case of a recursive loop which can happen + # if you are using "logging.StreamHandler" in your logging configuration (e.g., to + # output logging to a console) and configure it after "redirect_to_logging' context + # manager has been entered. + def _check_recursion(self) -> bool: + # We start at "2" so that we start from outside of this file. + frame = sys._getframe(2) + line_number = None + try: + i = 0 + # If loop is happening, it is generally looping inside less than 10 frames, + # so we exit after 20 frames (just to make sure, all these values are ballpark + # values) to optimize. + while frame and i < 20: + if frame.f_code.co_filename == __file__: + # The first (in fact the last from call perspective) time we are + # in this file. + if line_number is None: + line_number = frame.f_lineno + # If we were in the same file and line already higher in the stack, + # we are in a recursive loop. + elif line_number == frame.f_lineno: + return True + frame = frame.f_back + i += 1 + finally: + del frame + + return False + + def write(self, buffer: str) -> int: + if self.closed: + raise ValueError("Stream is closed.") + + if self._check_recursion(): + # We are being called by a logger in a recursive loop. Because this message has already been logged, + # it is safe for us to just drop it to break a recursive loop. + return 0 + + # We only write complete lines to the logger. Any incomplete line will be saved to "pending_line", and flushed + # if "flush" is called or the context manager is closed. + bytes_written = 0 + lines = (self.pending_line + buffer).split('\n') + # Since we split on "\n", the last string in the list of lines will be an empty string if the last character + # in the buffer is a newline, which is what we want in this case as it resets the "pending_line" to empty. + # Otherwise the last string in the list of lines are characters after the last "\n", which is again what we + # want, setting the "pending_line" to characters not logged this time. + self.pending_line = lines[-1] + for line in lines[:-1]: + # Whitespace lines should not be logged. + if line.strip(): + self.logger.log(self.level, line.rstrip()) + bytes_written += len(line) + + if self.pass_through_stream is not None: + self.pass_through_stream.write(buffer) + + return bytes_written + + def writelines(self, lines: typing.List[str]) -> None: + if self.closed: + raise ValueError("Stream is closed.") + + if self._check_recursion(): + # We are being called by a logger in a recursive loop. Because this message has already been logged, + # it is safe for us to just drop it to break a recursive loop. + return + + for line in lines: + if line.strip(): + self.logger.log(self.level, line.rstrip()) + + if self.pass_through_stream is not None: + if hasattr(self.pass_through_stream, 'writelines'): + self.pass_through_stream.writelines(lines) + else: + for line in lines: + self.pass_through_stream.write(line) + + def flush(self) -> None: + if self.closed: + raise ValueError("Stream is closed.") + + if self.pending_line.strip(): + self.logger.log(self.level, self.pending_line.rstrip()) + + if self.pass_through_stream is not None: + self.pass_through_stream.flush() + + def close(self) -> None: + if self.closed: + return + + if self.pending_line.strip(): + self.logger.log(self.level, self.pending_line.rstrip()) + self.closed = True + + def seekable(self) -> bool: + return False + + def seek(self, offset: int, whence: int = 0) -> int: + raise OSError("Stream is not seekable.") + + def tell(self) -> int: + raise OSError("Stream is not seekable.") + + def truncate(self, size: int = None) -> int: + raise OSError("Stream is not seekable.") + + def writable(self) -> bool: + return True + + def isatty(self) -> bool: + return False + + def readable(self) -> bool: + return False + + def read(self, n: int = -1) -> typing.AnyStr: + raise OSError("Stream is write-only.") + + def readline(self, limit: int = -1) -> typing.AnyStr: + raise OSError("Stream is write-only.") + + def readlines(self, hint: int = -1) -> typing.List[typing.AnyStr]: + raise OSError("Stream is write-only.") + + def fileno(self) -> int: + raise OSError("Stream does not use a file descriptor.") + + +class redirect_to_logging(contextlib.AbstractContextManager): + """ + A Python context manager which redirects all writes to stdout and stderr + to Python logging. + + Primitives should use logging to log messages, but maybe they are not doing + that or there are other libraries they are using which are not doing that. + One can then use this context manager to assure that (at least all Python) + writes to stdout and stderr by primitives are redirected to logging:: + + with redirect_to_logging(logger=PrimitiveClass.logger): + primitive = PrimitiveClass(...) + primitive.set_training_data(...) + primitive.fit(...) + primitive.produce(...) + """ + + # These are class variables to ensure that they are shared among all instances. + # We use a list to make this context manager re-entrant. + _old_stdouts: typing.List[typing.TextIO] = [] + _old_stderrs: typing.List[typing.TextIO] = [] + + def __init__(self, logger: logging.Logger = None, stdout_level: typing.Union[int, str] = 'INFO', stderr_level: typing.Union[int, str] = 'ERROR', pass_through: bool = True) -> None: + if logger is None: + self.logger = logging.getLogger('redirect') + else: + self.logger = logger + + self.stdout_level = logging._checkLevel(stdout_level) # type: ignore + self.stderr_level = logging._checkLevel(stderr_level) # type: ignore + self.pass_through = pass_through + + def __enter__(self) -> logging.Logger: + self._old_stdouts.append(sys.stdout) + self._old_stderrs.append(sys.stderr) + if self.pass_through: + stdout_pass_through = self._old_stdouts[0] + stderr_pass_through = self._old_stderrs[0] + else: + stdout_pass_through = None + stderr_pass_through = None + sys.stdout = typing.cast(typing.TextIO, StreamToLogger(self.logger, self.stdout_level, stdout_pass_through)) + sys.stderr = typing.cast(typing.TextIO, StreamToLogger(self.logger, self.stdout_level, stderr_pass_through)) + return self.logger + + def __exit__(self, exc_type: typing.Optional[typing.Type[BaseException]], + exc_value: typing.Optional[BaseException], + traceback: typing.Optional[types.TracebackType]) -> typing.Optional[bool]: + sys.stdout.close() + sys.stderr.close() + sys.stdout = self._old_stdouts.pop() + sys.stderr = self._old_stderrs.pop() + return None + + +class CallbackHandler(logging.Handler): + """ + Calls a ``callback`` with logging records as they are without any conversion except for: + + * formatting the logging message and adding it to the record object + * assuring ``asctime`` is set + * converts exception ``exc_info`` into exception's name + * making sure ``args`` are JSON-compatible or removing it + * making sure there are no null values + """ + + def __init__(self, callback: typing.Callable) -> None: + super().__init__(logging.DEBUG) + + self.callback = callback + + def emit(self, record: logging.LogRecord) -> None: + try: + self.callback(self.prepare(record)) + except Exception: + self.handleError(record) + + def prepare(self, record: logging.LogRecord) -> typing.Dict: + self.format(record) + + # If "asctime" is not set, we do it ourselves. + if not hasattr(record, 'asctime'): + if self.formatter: + fmt = self.formatter + else: + fmt = logging._defaultFormatter # type: ignore + record.asctime = fmt.formatTime(record, fmt.datefmt) + + output = copy.copy(record.__dict__) + + # Exceptions are not JSON compatible. + if 'exc_info' in output: + if output['exc_info']: + if isinstance(output['exc_info'], BaseException): + output['exc_type'] = type_to_str(type(output['exc_info'])) + else: + output['exc_type'] = type_to_str(type(output['exc_info'][1])) + del output['exc_info'] + + if 'args' in output: + try: + output['args'] = to_json_structure(output['args']) + except Exception: + # We assume this means "args" is not JSON compatible. + del output['args'] + + # We iterate over a list so that we can change dict while iterating. + for key, value in list(output.items()): + if value is None: + del output[key] + + return output + + +def _called_from_outside(modules: typing.Sequence[types.ModuleType]) -> bool: + # 0 == this function, 1 == wrapper, 2 == caller + frame = sys._getframe(2) + try: + if not frame: + caller_module_name = None + else: + caller_module_name = frame.f_globals.get('__name__', None) + finally: + del frame + + return all(caller_module_name != module.__name__ for module in modules) + + +def _decorate_all_methods(modules: typing.Sequence[types.ModuleType], src_obj: typing.Any, dst_obj: typing.Any, decorator: typing.Callable, ignore: typing.Set) -> None: + for name, function in inspect.getmembers(src_obj): + if name.startswith('_'): + continue + + if name in ignore: + continue + + # Wrap the method with the decorator. + if isinstance(function, (types.FunctionType, types.MethodType, types.BuiltinFunctionType, types.BuiltinMethodType)): + # For simplicity we use the name of the first module. + decorated_function = decorator(modules, modules[0].__name__, name, function) + setattr(dst_obj, name, decorated_function) + + # When functions are imported to other modules, we have to update those imported functions as well. + # Here we iterate over known modules and check if original function was copied over. If it was, + # we set it to the new decorated function. + for module in modules: + if getattr(module, name, None) == function: + setattr(module, name, decorated_function) + + +_random_warnings_enabled: typing.List[bool] = [] +_random_sources_patched = False + + +def _random_warning_decorator(modules: typing.Sequence[types.ModuleType], module_path: str, function_name: str, f: typing.Callable) -> typing.Callable: + @functools.wraps(f) + def wrapper(*args: typing.Any, **kwargs: typing.Any) -> typing.Any: + global _random_warnings_enabled + + # Some methods call into other methods. We do not want to issue a warning in such cases. + if _random_warnings_enabled and _random_warnings_enabled[-1] and _called_from_outside(modules): + log_once( + logger, + logging.WARNING, + "Using global/shared random source using '%(module_path)s.%(function_name)s' can make execution not reproducible.", + { + 'module_path': module_path, + 'function_name': function_name, + }, + stack_info=True, + ) + + return f(*args, **kwargs) + + return wrapper + + +class _RandomState(numpy_random.RandomState): + """ + A subclass just so that we can set somewhere decorated methods. The original class is read-only. + """ + + +def _patch_random_sources() -> None: + global _random_sources_patched + + if _random_sources_patched: + return + _random_sources_patched = True + + # We patch the global Python random number generator instance by decorating all methods. + # Used to support "global_randomness_warning" context manager. + # We do not issue warning for calling "getstate". + _decorate_all_methods([random], random._inst, random._inst, _random_warning_decorator, {'getstate'}) # type: ignore + + # For global NumPy random number generator we create a new random state instance first (of our subclass), + # and copy the state over. This is necessary because original random state instance has read-only methods. + old_rand = numpy.random.mtrand._rand + numpy.random.mtrand._rand = _RandomState() + numpy.random.mtrand._rand.set_state(old_rand.get_state()) + + # We do not issue warning for calling "get_state". + _decorate_all_methods([numpy.random, numpy.random.mtrand], old_rand, numpy.random.mtrand._rand, _random_warning_decorator, {'get_state'}) # type: ignore + + if hasattr(numpy_random, 'default_rng'): + old_default_rng = numpy_random.default_rng + + def default_rng(seed: typing.Any = None) -> typing.Any: + if seed is None: + log_once( + logger, + logging.WARNING, + "Using 'numpy.random.default_rng' without a seed can make execution not reproducible.", + stack_info=True, + ) + + return old_default_rng(seed) + + numpy_random.default_rng = default_rng + + +class global_randomness_warning(contextlib.AbstractContextManager): + """ + A Python context manager which issues a warning if global sources of + randomness are used. Currently it checks Python built-in global random + source, NumPy global random source, and NumPy ``default_rng`` being + used without a seed. + """ + + def __init__(self, enable: bool = True) -> None: + self.enable = enable + _patch_random_sources() + + def __enter__(self) -> None: + _random_warnings_enabled.append(self.enable) + + def __exit__(self, exc_type: typing.Optional[typing.Type[BaseException]], + exc_value: typing.Optional[BaseException], + traceback: typing.Optional[types.TracebackType]) -> typing.Optional[bool]: + _random_warnings_enabled.pop() + return None + + +def get_full_name(value: typing.Any) -> str: + return '{module}.{name}'.format(module=value.__module__, name=value.__name__) + + +def has_duplicates(data: typing.Sequence) -> bool: + """ + Returns ``True`` if ``data`` has duplicate elements. + + It works both with hashable and not-hashable elements. + """ + + try: + return len(set(data)) != len(data) + except TypeError: + n = len(data) + for i in range(n): + for j in range(i + 1, n): + if data[i] == data[j]: + return True + return False + + +@contextlib.contextmanager +def silence() -> typing.Generator: + """ + Hides logging and stdout output. + """ + + with unittest.TestCase().assertLogs(level=logging.DEBUG): + with redirect_to_logging(pass_through=False): + # Just to log something, otherwise "assertLogs" can fail. + logging.getLogger().debug("Silence.") + + yield + + +@deprecate.arguments('source', message="argument ignored") +def columns_sum(inputs: typing.Any, *, source: typing.Any = None) -> typing.Any: + """ + Computes sum per column. + """ + + # Importing here to prevent import cycle. + from d3m import container + + if isinstance(inputs, container.DataFrame): # type: ignore + results = container.DataFrame(inputs.agg(['sum']).reset_index(drop=True), generate_metadata=True) # type: ignore + return results + + elif isinstance(inputs, container.ndarray) and len(inputs.shape) == 2: + return numpy.sum(inputs, axis=0, keepdims=True) + + else: + raise exceptions.InvalidArgumentTypeError("Unsupported container type to sum: {type}".format( + type=type(inputs), + )) + + +def list_files(base_directory: str) -> typing.Sequence[str]: + files = [] + + base_directory = base_directory.rstrip(os.path.sep) + base_directory_prefix_length = len(base_directory) + 1 + for dirpath, dirnames, filenames in os.walk(base_directory): + for filename in filenames: + filepath = os.path.join(dirpath, filename) + + # We do not use "os.path.relpath" because it is to general + # and it first try to construct absolute path which is slow. + files.append(filepath[base_directory_prefix_length:]) + + # We sort to have a canonical order. + files = sorted(files) + + return files + + +def _is_int(typ: type) -> bool: + # We support more types than those listed in "d3m.types.simple_data_types". + return issubclass(typ, (int, numpy.integer, numbers.Integral)) + + +def is_int(typ: type) -> bool: + return _is_int(typ) and not issubclass(typ, bool) + + +def _is_float(typ: type) -> bool: + # We support more types than those listed in "d3m.types.simple_data_types". + return issubclass(typ, (float, numpy.float32, numpy.float64, decimal.Decimal, numbers.Real)) + + +def is_float(typ: type) -> bool: + return _is_float(typ) and not is_int(typ) + + +def is_numeric(typ: type) -> bool: + return is_int(typ) or _is_float(typ) + + +def compute_hash_id(obj: typing.Dict) -> str: + """ + Input should be a JSON compatible structure. + """ + + obj = copy.copy(obj) + + if 'id' in obj: + del obj['id'] + + # We iterate over a list so that we can change dict while iterating. + for key in list(obj.keys()): + # Do not count any private field into hash. + if key.startswith('_'): + del obj[key] + + # We have to use "normalize_numbers" first so that we normalize numbers. + # We cannot do this just with a custom encoder because encoders are not + # called for float values so we cannot handle them there. + # See: https://bugs.python.org/issue36841 + to_hash_id = json.dumps(normalize_numbers(obj), sort_keys=True) + + return str(uuid.uuid5(HASH_ID_NAMESPACE, to_hash_id)) + + +def compute_digest(obj: typing.Dict, extra_data: bytes = None) -> str: + """ + Input should be a JSON compatible structure. + """ + + obj = copy.copy(obj) + + if 'digest' in obj: + del obj['digest'] + + # We iterate over a list so that we can change dict while iterating. + for key in list(obj.keys()): + # Do not count any private field into digest. + if key.startswith('_'): + del obj[key] + + # We have to use "normalize_numbers" first so that we normalize numbers. + # We cannot do this just with a custom encoder because encoders are not + # called for float values so we cannot handle them there. + # See: https://bugs.python.org/issue36841 + to_digest = json.dumps(normalize_numbers(obj), sort_keys=True) + + digest = hashlib.sha256(to_digest.encode('utf8')) + + if extra_data is not None: + digest.update(extra_data) + + return digest.hexdigest() + + +def is_sequence(value: typing.Any) -> bool: + return isinstance(value, typing.Sequence) and not isinstance(value, (str, bytes)) + + +def get_dict_path(input_dict: typing.Dict, path: typing.Sequence[typing.Any]) -> typing.Any: + value: typing.Any = input_dict + + for segment in path: + value = value.get(segment, None) + + if value is None: + return None + + return value + + +def set_dict_path(input_dict: typing.Dict, path: typing.Sequence[typing.Any], value: typing.Any) -> None: + if not path: + raise exceptions.InvalidArgumentValueError("\"path\" has to be non-empty.") + + for segment in path[:-1]: + if segment not in input_dict: + input_dict[segment] = {} + input_dict = input_dict[segment] + + input_dict[path[-1]] = value + + +def register_yaml_representers() -> None: + def yaml_representer_numpy_float(dumper: yaml.Dumper, data: typing.Any) -> typing.Any: + return dumper.represent_float(float(data)) + + def yaml_representer_numpy_int(dumper: yaml.Dumper, data: typing.Any) -> typing.Any: + return dumper.represent_int(int(data)) + + def yaml_representer_numpy_bool(dumper: yaml.Dumper, data: typing.Any) -> typing.Any: + return dumper.represent_bool(bool(data)) + + representers = [ + {'type': numpy.float32, 'representer': yaml_representer_numpy_float}, + {'type': numpy.float64, 'representer': yaml_representer_numpy_float}, + {'type': numpy.int32, 'representer': yaml_representer_numpy_int}, + {'type': numpy.int64, 'representer': yaml_representer_numpy_int}, + {'type': numpy.integer, 'representer': yaml_representer_numpy_int}, + {'type': numpy.bool_, 'representer': yaml_representer_numpy_bool}, + ] + + for representer in representers: + yaml_add_representer(representer['type'], representer['representer']) + + +# Registers additional regexp for floating point resolver. +# See: https://github.com/yaml/pyyaml/issues/173 +def register_yaml_resolvers() -> None: + tag = 'tag:yaml.org,2002:float' + regexp = re.compile(r'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+]?[0-9]+)? + |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+) + |\.[0-9_]+(?:[eE][-+]?[0-9]+)?)$''', re.X) + first = list(u'-+0123456789.') + + yaml.Dumper.add_implicit_resolver(tag, regexp, first) + yaml.SafeDumper.add_implicit_resolver(tag, regexp, first) + yaml.Loader.add_implicit_resolver(tag, regexp, first) + yaml.SafeLoader.add_implicit_resolver(tag, regexp, first) + + if yaml.__with_libyaml__: + yaml.CDumper.add_implicit_resolver(tag, regexp, first) # type: ignore + yaml.CSafeDumper.add_implicit_resolver(tag, regexp, first) # type: ignore + yaml.CLoader.add_implicit_resolver(tag, regexp, first) # type: ignore + yaml.CSafeLoader.add_implicit_resolver(tag, regexp, first) # type: ignore + + +def matches_structural_type(source_structural_type: type, target_structural_type: typing.Union[str, type]) -> bool: + if isinstance(target_structural_type, str): + return type_to_str(source_structural_type) == target_structural_type + else: + return is_subclass(source_structural_type, target_structural_type) + + +# Register YAML representers and resolvers. +register_yaml_representers() +register_yaml_resolvers() + + +class PMap(pyrsistent.PMap): + """ + Extends `pyrsistent.PMap` to (by default) iterate over its items in sorted order. + """ + + def iterkeys(self, *, sort: bool = True, reverse: bool = False) -> typing.Iterable: + for k, _ in self.iteritems(sort=sort, reverse=reverse): + yield k + + def itervalues(self, *, sort: bool = True, reverse: bool = False) -> typing.Iterable: + for _, v in self.iteritems(sort=sort, reverse=reverse): + yield v + + def iteritems(self, *, sort: bool = True, reverse: bool = False) -> typing.Iterable: + if sort: + yield from sorted(super().iteritems(), key=operator.itemgetter(0), reverse=reverse) + else: + yield from super().iteritems() + + # In Python 3 this is also an iterable. + def values(self, *, sort: bool = True, reverse: bool = False) -> typing.Iterable: + return self.itervalues(sort=sort, reverse=reverse) + + # In Python 3 this is also an iterable. + def keys(self, *, sort: bool = True, reverse: bool = False) -> typing.Iterable: + return self.iterkeys(sort=sort, reverse=reverse) + + # In Python 3 this is also an iterable. + def items(self, *, sort: bool = True, reverse: bool = False) -> typing.Iterable: + return self.iteritems(sort=sort, reverse=reverse) + + def evolver(self) -> 'Evolver': + return Evolver(self) + + def __reduce__(self) -> typing.Tuple[typing.Callable, typing.Tuple[typing.Dict]]: + return pmap, (dict(self),) + + +class Evolver(pyrsistent.PMap._Evolver): + def persistent(self) -> PMap: + if self.is_dirty(): + self._original_pmap = PMap(self._size, self._buckets_evolver.persistent()) + + return self._original_pmap + + +# It is OK to use a mutable default value here because it is never changed in-place. +def pmap(initial: typing.Mapping = {}, pre_size: int = 0) -> PMap: + super_pmap = pyrsistent.pmap(initial, pre_size) + + return PMap(super_pmap._size, super_pmap._buckets) + + +EMPTY_PMAP = pmap() + + +def is_uri(uri: str) -> bool: + """ + Test if a given string is an URI. + + Parameters + ---------- + uri: + A potential URI to test. + + Returns + ------- + ``True`` if string is an URI, ``False`` otherwise. + """ + + try: + parsed_uri = url_parse.urlparse(uri, allow_fragments=False) + except Exception: + return False + + return parsed_uri.scheme != '' + + +def fix_uri(uri: str, *, allow_relative_path: bool = True) -> str: + """ + Make a real file URI from a path. + + Parameters + ---------- + uri: + An input URI. + allow_relative_path: + Allow path to be relative? + + Returns + ------- + A fixed URI. + """ + + if is_uri(uri): + return uri + + if not uri.startswith('/') and not allow_relative_path: + raise exceptions.InvalidArgumentValueError(f"Path cannot be relative: {uri}") + + # Make absolute and normalize at the same time. + uri = os.path.abspath(uri) + + return 'file://{uri}'.format(uri=uri) + + +def outside_package_context() -> typing.Optional[deprecate.Context]: + frame = sys._getframe(1) + try: + while frame: + if frame.f_code.co_filename == '' or os.path.commonpath([PACKAGE_BASE, frame.f_code.co_filename]) != PACKAGE_BASE: + return deprecate.Context(None, None, frame.f_code.co_filename, frame.f_globals.get('__name__', None), frame.f_lineno) + + frame = frame.f_back + + finally: + del frame + + return None + + +already_logged: typing.Set[typing.Tuple[deprecate.Context, deprecate.Context]] = set() + + +def log_once(logger: logging.Logger, level: int, msg: str, *args: typing.Any, **kwargs: typing.Any) -> None: + frame = sys._getframe(1) + try: + if not frame: + function_context = None + else: + function_context = deprecate.Context(str(level), msg, frame.f_code.co_filename, frame.f_globals.get('__name__', None), frame.f_lineno) + finally: + del frame + + module_context = outside_package_context() + + context = (module_context, function_context) + + if context in already_logged: + return + + if module_context is not None and function_context is not None: + already_logged.add(context) + + logger.log(level, msg, *args, **kwargs) + + +# A workaround to handle also binary stdin/stdout. +# See: https://gitlab.com/datadrivendiscovery/d3m/issues/353 +# See: https://bugs.python.org/issue14156 +# Moreover, if filename ends in ".gz" it decompresses the file as well. +class FileType(argparse.FileType): + def __call__(self, string: str) -> typing.IO[typing.Any]: + if string.endswith('.gz'): + # "gzip.open" has as a default binary mode, + # but we want text mode as a default. + if 't' not in self._mode and 'b' not in self._mode: # type: ignore + mode = self._mode + 't' # type: ignore + else: + mode = self._mode # type: ignore + + try: + return gzip.open(string, mode=mode, encoding=self._encoding, errors=self._errors) # type: ignore + except OSError as error: + message = argparse._("can't open '%s': %s") # type: ignore + raise argparse.ArgumentTypeError(message % (string, error)) + + handle = super().__call__(string) + + if string == '-' and 'b' in self._mode: # type: ignore + handle = handle.buffer # type: ignore + + return handle + + +def open(file: str, mode: str = 'r', buffering: int = -1, encoding: str = None, errors: str = None) -> typing.IO[typing.Any]: + try: + return FileType(mode=mode, bufsize=buffering, encoding=encoding, errors=errors)(file) + except argparse.ArgumentTypeError as error: + original_error = error.__context__ + + # So that we are outside of the except clause. + raise original_error + + +def filter_local_location_uris(doc: typing.Dict, *, empty_value: typing.Any = None) -> None: + if 'location_uris' in doc: + location_uris = [] + for location_uri in doc['location_uris']: + try: + parsed_uri = url_parse.urlparse(location_uri, allow_fragments=False) + except Exception: + continue + + if parsed_uri.scheme == 'file': + continue + + location_uris.append(location_uri) + + if location_uris: + doc['location_uris'] = location_uris + elif empty_value is not None: + doc['location_uris'] = empty_value + else: + del doc['location_uris'] + + if 'location_base_uris' in doc: + location_base_uris = [] + for location_base_uri in doc['location_base_uris']: + try: + parsed_uri = url_parse.urlparse(location_base_uri, allow_fragments=False) + except Exception: + continue + + if parsed_uri.scheme == 'file': + continue + + location_base_uris.append(location_base_uri) + + if location_base_uris: + doc['location_base_uris'] = location_base_uris + elif empty_value is not None: + doc['location_base_uris'] = empty_value + else: + del doc['location_base_uris'] + + +def json_structure_equals( + obj1: typing.Any, obj2: typing.Any, ignore_keys: typing.Set = None, +) -> bool: + """ + Parameters + ---------- + obj1: + JSON serializable object to compare with ``obj2``. + obj2: + JSON serializable object to compare with ``obj1``. + ignore_keys: + If ``obj1`` and ``obj2`` are of type ``Mapping``, any keys found in this set will not be considered to + determine whether ``obj1`` and ``obj2`` are equal. + + Returns + ------- + A boolean indicating whether ``obj1`` and ``obj2`` are equal. + """ + + if ignore_keys is None: + ignore_keys = set() + + if isinstance(obj1, collections.Mapping) and isinstance(obj2, collections.Mapping): + for key1 in obj1: + if key1 in ignore_keys: + continue + if key1 not in obj2: + return False + if not json_structure_equals(obj1[key1], obj2[key1], ignore_keys): + return False + + for key2 in obj2: + if key2 in ignore_keys: + continue + if key2 not in obj1: + return False + # Already checked if values are equal. + + return True + + elif is_sequence(obj1) and is_sequence(obj2): + if len(obj1) != len(obj2): + return False + for i, (item1, item2) in enumerate(zip(obj1, obj2)): + if not json_structure_equals(item1, item2, ignore_keys): + return False + return True + + else: + return obj1 == obj2 + + +@functools.lru_cache() +def get_datasets_and_problems( + datasets_dir: str, handle_score_split: bool = True, +) -> typing.Tuple[typing.Dict[str, str], typing.Dict[str, str]]: + if datasets_dir is None: + raise exceptions.InvalidArgumentValueError("Datasets directory has to be provided.") + + datasets: typing.Dict[str, str] = {} + problem_descriptions: typing.Dict[str, str] = {} + problem_description_contents: typing.Dict[str, typing.Dict] = {} + + for dirpath, dirnames, filenames in os.walk(datasets_dir, followlinks=True): + if 'datasetDoc.json' in filenames: + # Do not traverse further (to not parse "datasetDoc.json" or "problemDoc.json" if they + # exists in raw data filename). + dirnames[:] = [] + + dataset_path = os.path.join(os.path.abspath(dirpath), 'datasetDoc.json') + + try: + with open(dataset_path, 'r', encoding='utf8') as dataset_file: + dataset_doc = json.load(dataset_file) + + dataset_id = dataset_doc['about']['datasetID'] + # Handle a special case for SCORE dataset splits (those which have "targets.csv" file). + # They are the same as TEST dataset splits, but we present them differently, so that + # SCORE dataset splits have targets as part of data. Because of this we also update + # corresponding dataset ID. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/176 + if handle_score_split and os.path.exists(os.path.join(dirpath, '..', 'targets.csv')) and dataset_id.endswith('_TEST'): + dataset_id = dataset_id[:-5] + '_SCORE' + + if dataset_id in datasets: + logger.warning( + "Duplicate dataset ID '%(dataset_id)s': '%(old_dataset)s' and '%(dataset)s'", { + 'dataset_id': dataset_id, + 'dataset': dataset_path, + 'old_dataset': datasets[dataset_id], + }, + ) + else: + datasets[dataset_id] = dataset_path + + except (ValueError, KeyError): + logger.exception( + "Unable to read dataset '%(dataset)s'.", { + 'dataset': dataset_path, + }, + ) + + if 'problemDoc.json' in filenames: + # We continue traversing further in this case. + + problem_path = os.path.join(os.path.abspath(dirpath), 'problemDoc.json') + + try: + with open(problem_path, 'r', encoding='utf8') as problem_file: + problem_doc = json.load(problem_file) + + problem_id = problem_doc['about']['problemID'] + # Handle a special case for SCORE dataset splits (those which have "targets.csv" file). + # They are the same as TEST dataset splits, but we present them differently, so that + # SCORE dataset splits have targets as part of data. Because of this we also update + # corresponding problem ID. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/176 + if handle_score_split and os.path.exists(os.path.join(dirpath, '..', 'targets.csv')) and problem_id.endswith('_TEST'): + problem_id = problem_id[:-5] + '_SCORE' + + # Also update dataset references. + for data in problem_doc.get('inputs', {}).get('data', []): + if data['datasetID'].endswith('_TEST'): + data['datasetID'] = data['datasetID'][:-5] + '_SCORE' + + with open(problem_path, 'r', encoding='utf8') as problem_file: + problem_description = json.load(problem_file) + + if problem_id in problem_descriptions and problem_description != problem_description_contents[problem_id]: + logger.warning( + "Duplicate problem ID '%(problem_id)s': '%(old_problem)s' and '%(problem)s'", { + 'problem_id': problem_id, + 'problem': problem_path, + 'old_problem': problem_descriptions[problem_id], + }, + ) + else: + problem_descriptions[problem_id] = problem_path + problem_description_contents[problem_id] = problem_description + + except (ValueError, KeyError): + logger.exception( + "Unable to read problem description '%(problem)s'.", { + 'problem': problem_path, + }, + ) + + return datasets, problem_descriptions diff --git a/d3m/docs/_static/custom.css b/d3m/docs/_static/custom.css new file mode 100644 index 0000000..966f953 --- /dev/null +++ b/d3m/docs/_static/custom.css @@ -0,0 +1,38 @@ +/* Making index have only one column. */ +.genindextable td { + display: table-row; +} + +/* No need to make space on the right of the TOC smaller and smaller for every level. */ +.sphinxsidebar ul ul { + margin-right: 0; +} + +/* Let sidebar sticky to the top of the viewport when scrolling down. */ +.sphinxsidebar { + position: sticky; + top: 0; +} + +@media only screen and (min-width: 1250px) { + /* Wider sidebar on large screens. */ + .sphinxsidebar { + width: 350px !important; + } + + .document .bodywrapper { + margin-left: 350px !important; + } + + /* Increase the header height by factor 1.25. */ + body > .related { + line-height: 40px; + font-size: 1.125em; + } + + /* Keep the footer height as it was. */ + body > .related ~ .related { + line-height: 32px; + font-size: 0.9em; + } +} diff --git a/d3m/docs/_templates/toc.html b/d3m/docs/_templates/toc.html new file mode 100644 index 0000000..2acc35a --- /dev/null +++ b/d3m/docs/_templates/toc.html @@ -0,0 +1,8 @@ +{# + Similar to "localtoc.html" but without a link in the heading so + that the color of the heading matches other headings in the sidebar. +#} +{%- if display_toc %} +

      {{ _('Table of Contents') }}

      + {{ toc }} +{%- endif %} diff --git a/d3m/docs/_templates/versions.html b/d3m/docs/_templates/versions.html new file mode 100644 index 0000000..1475a6a --- /dev/null +++ b/d3m/docs/_templates/versions.html @@ -0,0 +1,11 @@ +
      diff --git a/d3m/docs/about.rst b/d3m/docs/about.rst new file mode 100644 index 0000000..85a43b1 --- /dev/null +++ b/d3m/docs/about.rst @@ -0,0 +1,12 @@ +:orphan: + +.. _about: + +About Data Driven Discovery program +----------------------------------- + +DARPA Data Driven Discovery (D3M) Program is researching ways to get +machines to build machine learning pipelines automatically. It is split +into three layers: TA1 (primitives), TA2 (systems which combine +primitives automatically into pipelines and executes them), and TA3 +(end-users interfaces). diff --git a/d3m/docs/conf.py b/d3m/docs/conf.py new file mode 100644 index 0000000..447d502 --- /dev/null +++ b/d3m/docs/conf.py @@ -0,0 +1,210 @@ +# -*- coding: utf-8 -*- +# +# Configuration file for the Sphinx documentation builder. +# +# This file does only contain a selection of the most common options. For a +# full list see the documentation: +# http://www.sphinx-doc.org/en/master/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import datetime +import os +import sys + +sys.path.insert(0, os.path.abspath('.')) +import d3m + + +# -- Project information ----------------------------------------------------- + +project = 'D3M' +project_lowercase = project.lower() + +# The short X.Y version +version = d3m.__version__ +# The full version, including alpha/beta/rc tags +release = version + +author = d3m.__author__ +copyright = '2017-{year}, {author}'.format(year=datetime.datetime.now().year, author=author) + + +# -- General configuration --------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', + 'sphinx.ext.todo', + 'sphinx.ext.mathjax', + 'sphinx.ext.ifconfig', + 'sphinx.ext.napoleon', + 'sphinx_autodoc_typehints', + 'sphinxcontrib.fulltoc', + 'recommonmark', + 'sphinx.ext.linkcode', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] + +# The master toctree document. +master_doc = 'index' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path . +exclude_patterns = [] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'nature' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# The default sidebars (for documents that don't match any pattern) are +# defined by theme itself. Builtin themes are using these templates by +# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', +# 'searchbox.html']``. +# +html_sidebars = { + '**': [ + 'toc.html', + 'versions.html', + 'searchbox.html', + ] +} + +html_title = "{project} {version}".format(project=project, version=version) +html_show_sourcelink = False +html_copy_source = False +modindex_common_prefix = ['d3m.'] + + +# -- Options for HTMLHelp output --------------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = project_lowercase + + +# -- Options for LaTeX output ------------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, '{name}.tex'.format(name=project_lowercase), project, author, 'manual'), +] + + +# -- Options for manual page output ------------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, project_lowercase, d3m.__description__, [author], 1) +] + + +# -- Options for Texinfo output ---------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, project_lowercase, project, author, project, d3m.__description__, 'Miscellaneous'), +] + + +# -- Extension configuration ------------------------------------------------- + +# -- Options for intersphinx extension --------------------------------------- + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = { + 'https://docs.python.org/': None, + 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), + 'numpy': ('https://docs.scipy.org/doc/numpy/', None), + #'numpy': ('https://numpydoc.readthedocs.io/en/latest/', None), + 'scikit-learn': ('https://scikit-learn.org/stable/', None), + 'mypy': ('https://mypy.readthedocs.io/en/stable/', None), + 'setuptools': ('https://setuptools.readthedocs.io/en/latest/', None), +} + +# -- Options for todo extension ---------------------------------------------- + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = True + + +def setup(app): + app.add_stylesheet('custom.css') + + +def linkcode_resolve(domain, info): + if domain != 'py': + return None + if not info['module']: + return None + return 'https://gitlab.com/datadrivendiscovery/d3m/blob/{version}/{path}.py'.format(version=version, path=info['module'].replace('.', '/')) diff --git a/d3m/docs/discovery.rst b/d3m/docs/discovery.rst new file mode 100644 index 0000000..7e246f4 --- /dev/null +++ b/d3m/docs/discovery.rst @@ -0,0 +1,140 @@ +Primitives discovery +================================ + +Primitives D3M namespace +------------------------ + +The :mod:`d3m.primitives` module exposes all primitives under the same +``d3m.primitives`` namespace. + +This is achieved using :ref:`Python entry points `. +Python packages containing primitives should register them and expose +them under the common namespace by adding an entry like the following to +package's ``setup.py``: + +.. code:: python + + entry_points = { + 'd3m.primitives': [ + 'primitive_namespace.PrimitiveName = my_package.my_module:PrimitiveClassName', + ], + }, + +The example above would expose the +``my_package.my_module.PrimitiveClassName`` primitive under +``d3m.primitives.primitive_namespace.PrimitiveName``. + +Configuring ``entry_points`` in your ``setup.py`` does not just put +primitives into a common namespace, but also helps with discovery of +your primitives on the system. Then your package with primitives just +have to be installed on the system and can be automatically discovered +and used by any other Python code. + + **Note:** Only primitive classes are available through the + ``d3m.primitives`` namespace, no other symbols from a source + module. In the example above, only ``PrimitiveClassName`` is + available, not other symbols inside ``my_module`` (except if they + are other classes also added to entry points). + + **Note:** Modules under ``d3m.primitives`` are created dynamically + at run-time based on information from entry points. So some tools + (IDEs, code inspectors, etc.) might not find them because there are + no corresponding files and directories under ``d3m.primitives`` + module. You have to execute Python code for modules to be available. + Static analysis cannot find them. + +Primitives discovery on PyPi +---------------------------- + +To facilitate automatic discovery of primitives on PyPi (or any other +compatible Python Package Index), publish a package with a keyword +``d3m_primitive`` in its ``setup.py`` configuration: + +.. code:: python + + keywords='d3m_primitive' + + **Note:** Be careful when automatically discovering, installing, and + using primitives from unknown sources. While primitives are designed + to be bootstrapable and automatically installable without human + involvement, there are no isolation mechanisms yet in place for + running potentially malicious primitives. Currently recommended way + is to use manually curated lists of known primitives. + +d3m.index API +-------------------------- + +The :mod:`d3m.index` module exposes the following Python utility functions. + +``search`` +~~~~~~~~~~ + +Returns a list of primitive paths (Python paths under ``d3m.primitives`` +namespace) for all known (discoverable through entry points) primitives, +or limited by the ``primitive_path_prefix`` search argument. + +``get_primitive`` +~~~~~~~~~~~~~~~~~ + +Loads (if not already) a primitive class and returns it. + +``get_primitive_by_id`` +~~~~~~~~~~~~~~~~~~~~~~~ + +Returns a primitive class based on its ID from all currently loaded +primitives. + +``get_loaded_primitives`` +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Returns a list of all currently loaded primitives. + +``load_all`` +~~~~~~~~~~~~ + +Loads all primitives available and populates ``d3m.primitives`` +namespace with them. + +``register_primitive`` +~~~~~~~~~~~~~~~~~~~~~~ + +Registers a primitive under ``d3m.primitives`` namespace. + +This is useful to register primitives not necessary installed on the +system or which are generated at runtime. It is also useful for testing +purposes. + +``discover`` +~~~~~~~~~~~~ + +Returns package names from PyPi which provide D3M primitives. + +This is determined by them having a ``d3m_primitive`` among package +keywords. + +Command line +------------ + +The :mod:`d3m.index` module also provides a command line interface by +running ``python3 -m d3m index``. The following commands are currently +available. + +Use ``-h`` or ``--help`` argument to obtain more information about each +command and its arguments. + +``python3 -m d3m index search`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Searches locally available primitives. Lists registered Python paths for +primitives installed on the system. + +``python3 -m d3m index discover`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Discovers primitives available on PyPi. Lists package names containing +D3M primitives on PyPi. + +``python3 -m d3m index describe`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Generates a JSON description of a primitive. diff --git a/d3m/docs/index.rst b/d3m/docs/index.rst new file mode 100644 index 0000000..84179b9 --- /dev/null +++ b/d3m/docs/index.rst @@ -0,0 +1,34 @@ +D3M core package's documentation +================================ + +:Version: |version| + +This is documentation for the common code for D3M project, +the ``d3m`` core package. + +.. toctree:: + :maxdepth: 2 + + installation + quickstart + tutorial + interfaces + discovery + metadata + primitives_base_classes + pipeline + reference + primitive-checklist + +Miscellaneous pages +------------------- + +* :ref:`about` +* :ref:`repostructure` + +Indices and tables +------------------ + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/d3m/docs/installation.rst b/d3m/docs/installation.rst new file mode 100644 index 0000000..a9fe9b0 --- /dev/null +++ b/d3m/docs/installation.rst @@ -0,0 +1,112 @@ +Installation +------------ + +This package works with Python 3.6+ and pip 19+. You need to have the following +packages installed on the system (for Debian/Ubuntu): + +- ``libssl-dev`` +- ``libcurl4-openssl-dev`` +- ``libyaml-dev`` + +You can install latest stable version from `PyPI `__: + +:: + + $ pip3 install d3m + +To install latest development version: + +:: + + $ pip3 install -e git+https://gitlab.com/datadrivendiscovery/d3m.git@devel#egg=d3m + +When cloning a repository, clone it recursively to get also git +submodules: + +:: + + $ git clone --recursive https://gitlab.com/datadrivendiscovery/d3m.git + +Testing +------- + +To ensure consistent performance of the D3M package a test suite and performance benchmarks are ran in the CI pipeline after every commit. +If a commit fails tests or introduces significant performance regression the pipeline fails. + +Running tests +~~~~~~~~~~~~~ + +To run the test suite locally run: + +:: + + $ ./run_tests.py + +Running benchmarks +~~~~~~~~~~~~~~~~~~ + +If you want to run benchmarks locally you first need to install asv: + +:: + + $ pip install asv + +then clone the D3M repository: + +:: + + $ git clone git@gitlab.com:datadrivendiscovery/d3m.git + $ cd d3m/tests + +and run the benchmarks on a set of git commits. The following command: + +:: + + asv continuous --config asv.conf.json -f 1.1 devel HEAD + +will benchmarks changes between last commit to `devel` and latest commit to currently active feature branch. +Make sure the code you want to benchmark is commited into active git branch. + +To inspect performance changes between last two commits in the active branch run: + +:: + + $ asv continuous --config asv.conf.json -f 1.1 HEAD + · Creating environments + · Discovering benchmarks + ·· Uninstalling from virtualenv-py3.6 + ·· Installing a1bb2749 into virtualenv-py3.6. + · Running 4 total benchmarks (2 commits * 1 environments * 2 benchmarks) + [ 0.00%] · For d3m commit 3759f7a7 (round 1/2): + [ 0.00%] ·· Building for virtualenv-py3.6. + [ 0.00%] ·· Benchmarking virtualenv-py3.6 + [ 12.50%] ··· Running (metadata.DatasetMetadata.time_update_0k--).. + [ 25.00%] · For d3m commit a1bb2749 (round 1/2): + [ 25.00%] ·· Building for virtualenv-py3.6. + [ 25.00%] ·· Benchmarking virtualenv-py3.6 + [ 37.50%] ··· Running (metadata.DatasetMetadata.time_update_0k--).. + [ 50.00%] · For d3m commit a1bb2749 (round 2/2): + [ 50.00%] ·· Benchmarking virtualenv-py3.6 + [ 62.50%] ··· metadata.DatasetMetadata.time_update_0k 2.84±0.4ms + [ 75.00%] ··· metadata.DatasetMetadata.time_update_1k 174±4ms + [ 75.00%] · For d3m commit 3759f7a7 (round 2/2): + [ 75.00%] ·· Building for virtualenv-py3.6. + [ 75.00%] ·· Benchmarking virtualenv-py3.6 + [ 87.50%] ··· metadata.DatasetMetadata.time_update_0k 5.59±0.5ms + [100.00%] ··· metadata.DatasetMetadata.time_update_1k 714±10ms + before after ratio + [3759f7a7] [a1bb2749] + + - 5.59±0.5ms 2.84±0.4ms 0.51 metadata.DatasetMetadata.time_update_0k + - 714±10ms 174±4ms 0.24 metadata.DatasetMetadata.time_update_1k + + +During development, you can run a particular benchmark using the current environment and code by:: + + $ asv dev --config asv.conf.json --bench 'metadata.DatasetToJsonStructure.time_to_json_structure.*' + +For additional reference the following resources can be useful: + +- `Pandas performance test suite guide __` +- `Asv usage guide __` +- `Astropy benchmarks __` diff --git a/d3m/docs/interfaces.rst b/d3m/docs/interfaces.rst new file mode 100644 index 0000000..559697c --- /dev/null +++ b/d3m/docs/interfaces.rst @@ -0,0 +1,248 @@ +TA1 API for primitives +==================================== + +A collection of standard Python interfaces for TA1 primitives. All +primitives should extend one of the base classes available and +optionally implement available mixins. + +Design principles +----------------- + +Standard TA1 primitive interfaces have been designed to be possible for +TA2 systems to call primitives automatically and combine them into +pipelines. + +Some design principles applied: + +- Use of a de facto standard language for "glue" between different + components and libraries, Python. +- Use of keyword-only arguments for all methods so that caller does not + have to worry about the order of arguments. +- Every primitive should implement only one functionality, more or less + a function, with clear inputs and outputs. All parameters of the + function do not have to be known in advance and function can be + "fitted" as part of the training step of the pipeline. +- Use of Python 3 typing extensions to annotate methods and classes + with typing information to make it easier for TA2 systems to prune + incompatible combinations of inputs and outputs and to reuse existing + Python type-checking tooling. +- Typing information can serve both detecting issues and + incompatibilities in primitive implementations and help with pipeline + construction. +- All values being passed through a primitive have metadata associated + with them. +- Primitives can operate only at a metadata level to help guide the + pipeline construction process without having to operate on data + itself. +- Primitive metadata is close to the source, primitive code, and not in + separate files to minimize chances that it is goes out of sync. + Metadata which can be automatically determined from the code should + be automatically determined from the code. Similarly for data + metadata. +- All randomness of primitives is captured by a random seed argument to + assure reproducibility. +- Operations can work in iterations, under time budgets, and caller + might not always want to compute values fully. +- Through use of mixins primitives can signal which capabilities they + support. +- Primitives are to be composed and executed in a data-flow manner. + +Main concepts +------------- + +Interface classes, mixins, and methods are documented in detail through +use of docstrings and typing annotations. Here we note some higher-level +concept which can help understand basic ideas behind interfaces and what +they are trying to achieve, the big picture. This section is not +normative. + +A primitive should extend one of the base classes available and +optionally mixins as well. Not all mixins apply to all primitives. That +being said, you probably do not want to subclass ``PrimitiveBase`` +directly, but instead one of other base classes to signal to a caller +more about what your primitive is doing. If your primitive belong to a +larger set of primitives no exiting non-\ ``PrimitiveBase`` base class +suits well, consider suggesting that a new base class is created by +opening an issue or making a merge request. + +Base class and mixins have generally four type arguments you have to +provide: ``Inputs``, ``Outpus``, ``Params``, and ``Hyperparams``. One +can see a primitive as parameterized by those four type arguments. You +can access them at runtime through metadata: + +.. code:: python + + FooBarPrimitive.metadata.query()['class_type_arguments'] + +``Inputs`` should be set to a primary input type of a primitive. +Primary, because you can define additional inputs your primitive might +need, but we will go into these details later. Similarly for +``Outputs``. ``produce`` method then produces outputs from inputs. Other +primitive methods help the primitive (and its ``produce`` method) +achieve that, or help the runtime execute the primitive as a whole, or +optimize its behavior. + +Both ``Inputs`` and ``Outputs`` should be of a +:ref:`container_types`. We allow a limited set of value types being +passed between primitives so that both TA2 and TA3 systems can +implement introspection for those values if needed, or user interface +for them, etc. Moreover this allows us also to assure that they can be +efficiently used with Arrow/Plasma store. + +Container values can then in turn contain values of an :ref:`extended but +still limited set of data types `. + +Those values being passed between primitives also hold metadata. +Metadata is available on their ``metadata`` attribute. Metadata on +values is stored in an instance of +:class:`~d3m.metadata.base.DataMetadata` class. This is a +reason why we have :ref:`our own versions of some standard container +types `: to have the ``metadata`` attribute. + +All metadata is immutable and updating a metadata object returns a new, +updated, copy. Metadata internally remembers the history of changes, but +there is no API yet to access that. But the idea is that you will be +able to follow the whole history of change to data in a pipeline through +metadata. See :ref:`metadata API ` for more information +how to manipulate metadata. + +Primitives have a similar class ``PrimitiveMetadata``, which when +created automatically analyses its primitive and populates parts of +metadata based on that. In this way author does not have to have +information in two places (metadata and code) but just in code and +metadata is extracted from it. When possible. Some metadata author of +the primitive stil has to provide directly. + +Currently most standard interface base classes have only one ``produce`` +method, but design allows for multiple: their name has to be prefixed +with ``produce_``, have similar arguments and same semantics as all +produce methods. The main motivation for this is that some primitives +might be able to expose same results in different ways. Having multiple +produce methods allow the caller to pick which type of the result they +want. + +To keep primitive from outside simple and allow easier compositionality +in pipelines, primitives have arguments defined per primitive and not +per their method. The idea here is that once a caller satisfies +(computes a value to be passed to) an argument, any method which +requires that argument can be called on a primitive. + +There are three types of arguments: + +- pipeline – arguments which are provided by the pipeline, they are + required (otherwise caller would be able to trivially satisfy them by + always passing ``None`` or another default value) +- runtime – arguments which caller provides during pipeline execution + and they control various aspects of the execution +- hyper-parameter – a method can declare that primitive's + hyper-parameter can be overridden for the call of the method, they + have to match hyper-parameter definition + +Methods can accept additional pipeline and hyper-parameter arguments and +not just those from the standard interfaces. + +Produce methods and some other methods return results wrapped in +``CallResult``. In this way primitives can expose information about +internal iterative or optimization process and allow caller to decide +how long to run. + +When calling a primitive, to access ``Hyperparams`` class you can do: + +.. code:: python + + hyperparams_class = FooBarPrimitive.metadata.query()['class_type_arguments']['Hyperparams'] + +You can now create an instance of the class by directly providing values +for hyper-parameters, use available simple sampling, or just use default +values: + +.. code:: python + + hp1 = hyperparams_class({'threshold': 0.01}) + hp2 = hyperparams_class.sample(random_state=42) + hp3 = hyperparams_class.defaults + +You can then pass those instances as the ``hyperparams`` argument to +primitive's constructor. + +Author of a primitive has to define what internal parameters does the +primitive have, if any, by extending the ``Params`` class. It is just a +fancy dict, so you can both create an instance of it in the same way, +and access its values: + +.. code:: python + + class Params(params.Params): + coefficients: numpy.ndarray + + ps = Params({'coefficients': numpy.array[1, 2, 3]}) + ps['coefficients'] + +``Hyperparams`` class and ``Params`` class have to be pickable and +copyable so that instances of primitives can be serialized and restored +as needed. + +Primitives (and some other values) are uniquely identified by their ID +and version. ID does not change through versions. + +Primitives should not modify in-place any input argument but always +first make a copy before any modification. + +Checklist for creating a new primitive +-------------------------------------- +1. Implement as many interfaces as are applicable to your + primitive. An up-to-date list of mixins you can implement can be + found at + + +2. Create unit tests to test all methods you implement + +3. Include all relevant hyperparameters and use appropriate + ``Hyperparameter`` subclass for specifying the range of values a + hyperparameter can take. Try to provide good default values where + possible. Also include all relevant ``semantic_types`` + + +4. Include ``metadata`` and ``__author__`` fields in your class + definition. The ``__author__`` field should include a name or team + as well as email. The ``metadata`` object has many fields which should + be filled in: + + * id, this is a uuid unique to this primitive. It can be generated with :code:`import uuid; uuid.uuid4()` + * version + * python_path, the name you want to be import this primitive through + * keywords, keywords you want your primitive to be discovered by + * installation, how to install the package which has this primitive. This is easiest if this is just a python package on PyPI + * algorithm_types, specify which PrimitiveAlgorithmType the algorithm is, a complete list can be found in TODO + * primitive_family, specify the broad family a primitive falls under, a complete list can be found in TODO + * hyperparameters_to_tune, specify which hyperparameters you would prefer a TA2 system tune + +5. Make sure primitive uses the correct container type + +6. If container type is a dataframe, specify which column is the + target value, which columns are the input values, and which columns + are the output values. + +7. Create an example pipeline which includes this primitive and uses one of the seed datasets as input. + +Examples +-------- + +Examples of simple primitives using these interfaces can be found `in +this +repository `__: + +- `MonomialPrimitive `__ + is a simple regressor which shows how to use ``container.List``, + define and use ``Params`` and ``Hyperparams``, and implement multiple + methods needed by a supervised learner primitive +- `IncrementPrimitive `__ + is a transformer and shows how to have ``container.ndarray`` as + inputs and outputs, and how to set metadata for outputs +- `SumPrimitive `__ + is a transformer as well, but it is just a wrapper around a Docker + image, it shows how to define Docker image in metadata and how to + connect to a running Docker container, moreover, it also shows how + inputs can be a union type of multiple other types +- `RandomPrimitive `__ + is a generator which shows how to use ``random_seed``, too. diff --git a/d3m/docs/metadata.rst b/d3m/docs/metadata.rst new file mode 100644 index 0000000..b0e5f2f --- /dev/null +++ b/d3m/docs/metadata.rst @@ -0,0 +1,718 @@ +.. _metadata: + +Metadata for primitives and the values they process +=================================================== + +Metadata is a core component of any data-based system. This repository +is standardizing how we represent metadata in the D3M program and +focusing on three types of metadata: \* metadata associated with +primitives \* metadata associated with datasets \* metadata associated +with values passed inside pipelines + +This repository is also standardizing types of values being passed +between primitives in pipelines. While theoretically any value could be +passed between primitives, limiting them to a known set of values can +make primitives more compatible, efficient, and values easier to +introspect by TA3 systems. + +.. _container_types: + +Container types +--------------- + +All input and output (container) values passed between primitives should +expose a ``Sequence`` +`protocol `__ (sequence in +samples) and provide ``metadata`` attribute with metadata. + +``d3m.container`` module exposes such standard types: + +- ``Dataset`` – a class representing datasets, including D3M datasets, + implemented in + :mod:`d3m.container.dataset` module +- ``DataFrame`` – + :class:`pandas.DataFrame` + with support for ``metadata`` attribute, implemented in + :mod:`d3m.container.pandas` module +- ``ndarray`` – + :class:`numpy.ndarray` + with support for ``metadata`` attribute, implemented in + :mod:`d3m.container.numpy` module +- ``List`` – a standard :class:`list` with support for ``metadata`` + attribute, implemented in + :mod:`d3m.container.list` module + +``List`` can be used to create a simple list container. + +It is strongly encouraged to use the :class:`~d3m.container.pandas.DataFrame` container type for +primitives which do not have strong reasons to use something else +(:class:`~d3m.container.dataset.Dataset`\ s to operate on initial pipeline input, or optimized +high-dimensional packed data in :class:`~numpy.ndarray`\ s, or :class:`list`\ s to pass as +values to hyper-parameters). This makes it easier to operate just on +columns without type casting while the data is being transformed to make +it useful for models. + +When deciding which container type to use for inputs and outputs of a +primitive, consider as well where an expected place for your primitive +is in the pipeline. Generally, pipelines tend to have primitives +operating on :class:`~d3m.container.dataset.Dataset` at the beginning, then use :class:`~d3m.container.pandas.DataFrame` and +then convert to :class:`~numpy.ndarray`. + +.. _data_types: + +Data types +---------- + +Container types can contain values of the following types: + +* container types themselves +* Python builtin primitive types: + + * ``str`` + * ``bytes`` + * ``bool`` + * ``float`` + * ``int`` + * ``dict`` (consider using :class:`typing.Dict`, :class:`typing.NamedTuple`, or :ref:`TypedDict `) + * ``NoneType`` + +Metadata +-------- + +:mod:`d3m.metadata.base` module provides a +standard Python implementation for metadata object. + +When thinking about metadata, it is useful to keep in mind that metadata +can apply to different contexts: + +* primitives +* values being passed + between primitives, which we call containers (and are container types) +* datasets are a special case of a container +* to parts of data + contained inside a container +* for example, a cell in a table can have + its own metadata + +Containers and their data can be seen as multi-dimensional structures. +Dimensions can have numeric (arrays) or string indexes (string to value +maps, i.e., dicts). Moreover, even numeric indexes can still have names +associated with each index value, e.g., column names in a table. + +If a container type has a concept of *shape* +(:attr:`DataFrame.shape `, :attr:`ndarray.shape `), +dimensions go in that order. For tabular data and existing container +types this means that the first dimension of a container is always +traversing samples (e.g., rows in a table), and the second dimension +columns. + +Values can have nested other values and metadata dimensions go over all +of them until scalar values. So if a Pandas DataFrame contains +3-dimensional ndarrays, the whole value has 5 dimensions: two for rows +and columns of the DataFrame (even if there is only one column), and 3 +for the array. + +To tell to which part of data contained inside a container metadata +applies, we use a *selector*. Selector is a tuple of strings, integers, +or special values. Selector corresponds to a series of ``[...]`` item +getter Python operations on most values, except for Pandas DataFrame +where it corresponds to +:attr:`iloc ` +position-based selection. + +Special selector values: + +- ``ALL_ELEMENTS`` – makes metadata apply to all elements in a given + dimension (a wildcard) + +Metadata itself is represented as a (potentially nested) dict. If +multiple metadata dicts comes from different selectors for the same +resolved selector location, they are merged together in the order from +least specific to more specific, later overriding earlier. ``null`` +metadata value clears the key specified from a less specific selector. + +Example +~~~~~~~ + +To better understand how metadata is attached to various parts of the +value, A `simple tabular D3M +dataset `__ +could be represented as a multi-dimensional structure: + +.. code:: yaml + + { + "0": [ + [0, 5.1, 3.5, 1.4, 0.2, "Iris-setosa"], + [1, 4.9, 3, 1.4, 0.2, "Iris-setosa"], + ... + ] + } + +It contains one resource with ID ``"0"`` which is the first dimension +(using strings as index; it is a map not an array), then rows, which is +the second dimension, and then columns, which is the third dimension. +The last two dimensions are numeric. + +In Python, accessing third column of a second row would be +``["0"][1][2]`` which would be value ``3``. This is also the selector if +we would want to attach metadata to that cell. If this metadata is +description for this cell, we can thus describe this datum metadata as a +pair of a selector and a metadata dict: + +- selector: ``["0"][1][2]`` +- metadata: + ``{"description": "Measured personally by Ronald Fisher."}`` + +Dataset-level metadata have empty selector: + +- selector: ``[]`` +- metadata: ``{"id": "iris_dataset_1", "name": "Iris Dataset"}`` + +To describe first dimension itself, we set ``dimension`` metadata on the +dataset-level (container). ``dimension`` describes the next dimension at +that location in the data structure. + +- selector: ``[]`` +- metadata: ``{"dimension": {"name": "resources", "length": 1}}`` + +This means that the full dataset-level metadata is now: + +.. code:: json + + { + "id": "iris_dataset_1", + "name": "Iris Dataset", + "dimension": { + "name": "resources", + "length": 1 + } + } + +To attach metadata to the first (and only) resource, we can do: + +- selector: ``["0"]`` +- metadata: + ``{"structural_type": "pandas.core.frame.DataFrame", "dimension": {"length": 150, "name": "rows"}`` + +``dimension`` describes rows. + +Columns dimension: + +- selector: ``["0"][ALL_ELEMENTS]`` +- metadata: ``{"dimension": {"length": 6, "name": "columns"}}`` + +Observe that there is no requirement that dimensions are aligned from +the perspective of metadata. But in this case they are, so we can use +``ALL_ELEMENTS`` wildcard to describe columns for all rows. + +Third column metadata: + +- selector: ``["0"][ALL_ELEMENTS][2]`` +- metadata: + ``{"name": "sepalWidth", "structural_type": "builtins.str", "semantic_types": ["http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/Attribute"]}`` + +Column names belong to each particular column and not all columns. Using +``name`` can serve to assign a string name to otherwise numeric +dimension. + +We attach names and types to datums themselves and not dimensions. +Because we use ``ALL_ELEMENTS`` selector, this is internally stored +efficiently. We see traditional approach of storing this information in +the header of a column as a special case of a ``ALL_ELEMENTS`` selector. + +Note that the name of a column belongs to the metadata because it is +just an alternative way to reference values in an otherwise numeric +dimension. This is different from a case where a dimension has +string-based index (a map/dict) where names of values are part of the +data structure at that dimension. Which approach is used depends on the +structure of the container for which metadata is attached to. + +Default D3M dataset loader found in this package parses all tabular +values as strings and add semantic types, if known, for what could those +strings be representing (a float) and its role (an attribute). This +allows primitives later in a pipeline to convert them to proper +structural types but also allows additional analysis on original values +before such conversion is done. + +Fetching all metadata for ``["0"][1][2]`` now returns: + +.. code:: json + + { + "name": "sepalWidth", + "structural_type": "builtins.str", + "semantic_types": [ + "http://schema.org/Float", + "https://metadata.datadrivendiscovery.org/types/Attribute" + ], + "description": "Measured personally by Ronald Fisher." + } + +.. _metadata_api: + +API +~~~ + +:mod:`d3m.metadata.base` module provides two +classes which serve for storing metadata on values: :class:`~d3m.metadata.base.DataMetadata` for +data values, and :class:`~d3m.metadata.base.PrimitiveMetadata` for primitives. It also exposes a +:const:`~d3m.metadata.base.ALL_ELEMENTS` constant to be used in selectors. + +You can see public methods available on classes documented in their +code. Some main ones are: + +- ``__init__(metadata)`` – constructs a new instance of the metadata + class and optionally initializes it with top-level metadata +- ``update(selector, metadata)`` – updates metadata at a given location + in data structure identified by a selector +- ``query(selector)`` – retrieves metadata at a given location +- ``query_with_exceptions(selector)`` – retrieves metadata at a given + location, but also returns metadata for selectors which have metadata + which differs from that of ``ALL_ELEMENTS`` +- ``remove(selector)`` – removes metadata at a given location +- ``get_elements(selector)`` – lists element names which exists at a + given location +- ``to_json()`` – converts metadata to a JSON representation +- ``pretty_print()`` – pretty-print all metadata + +``PrimitiveMetadata`` differs from ``DataMetadata`` that it does not +accept selector in its methods because there is no structure in +primitives. + +Standard metadata keys +~~~~~~~~~~~~~~~~~~~~~~ + +You can use custom keys for metadata, but the following keys are +standardized, so you should use those if you are trying to represent the +same metadata: +https://metadata.datadrivendiscovery.org/schemas/v0/definitions.json + +The same key always have the same meaning and we reuse the same key in +different contexts when we need the same meaning. So instead of having +both ``primitive_name`` and ``dataset_name`` we have just ``name``. + +Different keys are expected in different contexts: + +- ``primitive`` – + https://metadata.datadrivendiscovery.org/schemas/v0/primitive.json +- ``container`` – + https://metadata.datadrivendiscovery.org/schemas/v0/container.json +- ``data`` – + https://metadata.datadrivendiscovery.org/schemas/v0/data.json + +A more user friendly visualization of schemas listed above is available +at https://metadata.datadrivendiscovery.org/. + +Contribute: Standardizing metadata schemas are an ongoing process. Feel +free to contribute suggestions and merge requests with improvements. + +.. _primitive-metadata: + +Primitive metadata +~~~~~~~~~~~~~~~~~~ + +Part of primitive metadata can be automatically obtained from +primitive's code, some can be computed through evaluation of primitives, +but some has to be provided by primitive's author. Details of which +metadata is currently standardized and what values are possible can be +found in primitive's JSON schema. This section describes author's +metadata into more detail. Example of primitive's metadata provided by +an author from `Monomial test +primitive `__, +slightly modified: + +.. code:: python + + metadata = metadata_module.PrimitiveMetadata({ + 'id': '4a0336ae-63b9-4a42-860e-86c5b64afbdd', + 'version': '0.1.0', + 'name': "Monomial Regressor", + 'keywords': ['test primitive'], + 'source': { + 'name': 'Test team', + 'uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/monomial.py', + 'https://gitlab.com/datadrivendiscovery/tests-data.git', + ], + }, + 'installation': [{ + 'type': metadata_module.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + 'location_uris': [ + 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/monomial.py'.format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)), + ), + ], + 'python_path': 'd3m.primitives.test.MonomialPrimitive', + 'algorithm_types': [ + metadata_module.PrimitiveAlgorithmType.LINEAR_REGRESSION, + ], + 'primitive_family': metadata_module.PrimitiveFamily.REGRESSION, + }) + +- Primitive's metadata provided by an author is defined as a class + attribute and instance of :class:`~d3m.metadata.base.PrimitiveMetadata`. +- When class is defined, class is automatically analyzed and metadata + is extended with automatically obtained values from class code. +- ``id`` can be simply generated using :func:`uuid.uuid4` in Python and + should never change. **Do not reuse IDs and do not use the ID from + this example.** +- When primitive's code changes you should update the version, a `PEP + 440 `__ compatible one. + Consider updating a version every time you change code, potentially + using `semantic versioning `__, but nothing of + this is enforced. +- ``name`` is a human-friendly name of the primitive. +- ``keywords`` can be anything you want to convey to users of the + primitive and which could help with primitive's discovery. +- ``source`` describes where the primitive is coming from. The required + value is ``name`` to tell information about the author, but you might + be interested also in ``contact`` where you can put an e-mail like + ``mailto:author@example.com`` as a way to contact the author. + ``uris`` can be anything. In above, one points to the code in GitLab, + and another to the repo. If there is a website for the primitive, you + might want to add it here as well. These URIs are not really meant + for automatic consumption but are more as a reference. See + ``location_uris`` for URIs to the code. +- ``installation`` is important because it describes how can your + primitive be automatically installed. Entries are installed in order + and currently the following types of entries are supported: +- A ``PIP`` package available on PyPI or some other package registry: + + :: + + ``` + { + 'type': metadata_module.PrimitiveInstallationType.PIP, + 'package': 'my-primitive-package', + 'version': '0.1.0', + } + ``` + +- A ``PIP`` package available at some URI. If this is a git repository, + then an exact git hash and ``egg`` name should be provided. ``egg`` + name should match the package name installed. Because here we have a + chicken and an egg problem: how can one commit a hash of code version + if this changes the hash, you can use a helper utility function to + provide you with a hash automatically at runtime. ``subdirectory`` + part of the URI suffix is not necessary and is here just because this + particular primitive happens to reside in a subdirectory of the + repository. +- A ``DOCKER`` image which should run while the primitive is operating. + Starting and stopping of a Docker container is managed by a caller, + which passes information about running container through primitive's + ``docker_containers`` ``__init__`` argument. The argument is a + mapping between the ``key`` value and address and ports at which the + running container is available. See `Sum test + primitive `__ + for an example: + + :: + + ``` + { + 'type': metadata_module.PrimitiveInstallationType.DOCKER, + 'key': 'summing', + 'image_name': 'registry.gitlab.com/datadrivendiscovery/tests-data/summing', + 'image_digest': 'sha256:07db5fef262c1172de5c1db5334944b2f58a679e4bb9ea6232234d71239deb64', + } + ``` + +- A ``UBUNTU`` entry can be used to describe a system library or + package required for installation or operation of your primitive. If + your other dependencies require a system library to be installed + before they can be installed, list this entry before them in + ``installation`` list. + + :: + + ``` + { + 'type': metadata_module.PrimitiveInstallationType.UBUNTU, + 'package': 'ffmpeg', + 'version': '7:3.3.4-2', + } + ``` + +- A ``FILE`` entry allows a primitive to specify a static file + dependency which should be provided by a caller to a primitive. + Caller passes information about the file path of downloaded file + through primitive's ``volumes`` ``__init__`` argument. The argument + is a mapping between the ``key`` value and file path. The filename + portion of the provided path does not necessary match the filename + portion of the file's URI. + + :: + + ``` + { + 'type': metadata_module.PrimitiveInstallationType.FILE, + 'key': 'model', + 'file_uri': 'http://mmlab.ie.cuhk.edu.hk/datasets/comp_cars/googlenet_finetune_web_car_iter_10000.caffemodel', + 'file_digest': '6bdf72f703a504cd02d7c3efc6c67cbbaf506e1cbd9530937db6a698b330242e', + } + ``` + +- A ``TGZ`` entry allows a primitive to specify a static directory + dependency which should be provided by a caller to a primitive. + Caller passes information about the directory path of downloaded and + extracted file through primitive's ``volumes`` ``__init__`` argument. + The argument is a mapping between the ``key`` value and directory + path. + + :: + + ``` + { + 'type': metadata_module.PrimitiveInstallationType.TGZ, + 'key': 'mails', + 'file_uri': 'https://www.cs.cmu.edu/~enron/enron_mail_20150507.tar.gz', + 'file_digest': 'b3da1b3fe0369ec3140bb4fbce94702c33b7da810ec15d718b3fadf5cd748ca7', + } + ``` + +- If you can provide, ``location_uris`` points to an exact code used by + the primitive. This can be obtained through installing a primitive, + but it can be helpful to have an online resource as well. +- ``python_path`` is a path under which the primitive will get mapped + through ``setup.py`` entry points. This is very important to keep in + sync. +- ``algorithm_types`` and ``primitive_family`` help with discovery of a + primitive. They are required and if suitable values are not available + for you, make a merge request and propose new values. As you see in + the code here and in ``installation`` entries, you can use directly + Python enumerations to populate these values. + +Some other metadata you might be interested to provide to help callers +use your primitive better are ``preconditions`` (what preconditions +should exist on data for primitive to operate well), ``effects`` (what +changes does a primitive do to data), and a ``hyperparams_to_tune`` hint +to help callers know which hyper-parameters are most important to focus +on. + +Primitive metadata also includes descriptions of a primitive and its +methods. These descriptions are automatically obtained from primitive's +docstrings. Docstrings should be made according to :ref:`numpy docstring +format ` +(`examples `__). + +Data metadata +~~~~~~~~~~~~~ + +Every value passed around a pipeline has metadata associated with it. +Defined container types have an attribute ``metadata`` to contain it. +API available to manipulate metadata is still evolving because many +operations one can do on data are reasonable also on metadata (e.g., +slicing and combining data). Currently, every operation on data clears +and re-initializes associated metadata. + + **Note:** While part of primitive's metadata is obtained + automatically nothing like that is currently done for data metadata. + This means one has to manually populate with dimension and typing + information. This will be improved in the future with automatic + extraction of this metadata from data. + +Parameters +---------- + +A base class to be subclassed and used as a type for :class:`~d3m.metadata.params.Params` type +argument in primitive interfaces can be found in the +:mod:`d3m.metadata.params` module. An +instance of this subclass should be returned from primitive's +:meth:`~d3m.metadata.params.Params.get_params` method, and accepted in :meth:`~d3m.metadata.params.Params.set_params`. + +To define parameters a primitive has you should subclass this base class +and define parameters as class attributes with type annotations. +Example: + +.. code:: python + + import numpy + from d3m.metadata import params + + class Params(params.Params): + weights: numpy.ndarray + bias: float + +:class:`~d3m.metadata.params.Params` class is just a fancy Python dict which checks types of +parameters and requires all of them to be set. You can create it like: + +.. code:: python + + ps = Params({'weights': weights, 'bias': 0.1}) + ps['bias'] + +:: + + 0.01 + +``weights`` and ``bias`` do not exist as an attributes on the class or +instance. In the class definition, they are just type annotations to +configure which parameters are there. + + **Note:** :class:`~d3m.metadata.params.Params` class uses ``parameter_name: type`` syntax + while :class:`~d3m.metadata.hyperparams.Hyperparams` class uses + ``hyperparameter_name = Descriptor(...)`` syntax. Do not confuse + them. + +.. _hyperparameters: + +Hyper-parameters +---------------- + +A base class for hyper-parameters description for primitives can be +found in the +:mod:`d3m.metadata.hyperparams` module. + +To define a hyper-parameters space you should subclass this base class +and define hyper-parameters as class attributes. Example: + +.. code:: python + + from d3m.metadata import hyperparams + + class Hyperparams(hyperparams.Hyperparams): + learning_rate = hyperparams.Uniform(lower=0.0, upper=1.0, default=0.001, semantic_types=[ + 'https://metadata.datadrivendiscovery.org/types/TuningParameter' + ]) + clusters = hyperparams.UniformInt(lower=1, upper=100, default=10, semantic_types=[ + 'https://metadata.datadrivendiscovery.org/types/TuningParameter' + ]) + +To access hyper-parameters space configuration, you can now call: + +.. code:: python + + Hyperparams.configuration + +:: + + OrderedDict([('learning_rate', Uniform(lower=0.0, upper=1.0, q=None, default=0.001)), ('clusters', UniformInt(lower=1, upper=100, default=10))]) + +To get a random sample of all hyper-parameters, call: + +.. code:: python + + hp1 = Hyperparams.sample(random_state=42) + +:: + + Hyperparams({'learning_rate': 0.3745401188473625, 'clusters': 93}) + +To get an instance with all default values: + +.. code:: python + + hp2 = Hyperparams.defaults() + +:: + + Hyperparams({'learning_rate': 0.001, 'clusters': 10}) + +:class:`~d3m.metadata.hyperparams.Hyperparams` class is just a fancy read-only Python dict. You can +also manually create its instance: + +.. code:: python + + hp3 = Hyperparams({'learning_rate': 0.01, 'clusters': 20}) + hp3['learning_rate'] + +:: + + 0.01 + +If you want to use most of default values, but set some, you can thus +use this dict-construction approach: + +.. code:: python + + hp4 = Hyperparams(Hyperparams.defaults(), clusters=30) + +:: + + Hyperparams({'learning_rate': 0.001, 'clusters': 30}) + +There is no class- or instance-level attribute ``learning_rate`` or +``clusters``. In the class definition, they were used only for defining +the hyper-parameters space, but those attributes were extracted out and +put into ``configuration`` attribute. + +There are four types of hyper-parameters: \* tuning parameters which +should be tuned during hyper-parameter optimization phase \* control +parameters which should be determined during pipeline construction phase +and are part of the logic of the pipeline \* parameters which control +the use of resources by the primitive \* parameters which control which +meta-features are computed by the primitive + +You can use hyper-parameter's semantic type to differentiate between +those types of hyper-parameters using the following URIs: + +* https://metadata.datadrivendiscovery.org/types/TuningParameter +* https://metadata.datadrivendiscovery.org/types/ControlParameter +* https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter +* https://metadata.datadrivendiscovery.org/types/MetafeatureParameter + +Once you define a :class:`~d3m.metadata.hyperparams.Hyperparams` class for your primitive you can pass +it as a class type argument in your primitive's class definition: + +.. code:: python + + class MyPrimitive(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): + ... + +Those class type arguments are then automatically extracted from the +class definition and made part of primitive's metadata. This allows the +caller to access the :class:`~d3m.metadata.hyperparams.Hyperparams` class to crete an instance to pass +to primitive's constructor: + +.. code:: python + + hyperparams_class = MyPrimitive.metadata.get_hyperparams() + primitive = MyPrimitive(hyperparams=hyperparams_class.defaults()) + + **Note:** :class:`~d3m.metadata.hyperparams.Hyperparams` class uses + ``hyperparameter_name = Descriptor(...)`` syntax while :class:`~d3m.metadata.params.Params` + class uses ``parameter_name: type`` syntax. Do not confuse them. + +Problem description +------------------- + +:mod:`d3m.metadata.problem` module provides +a parser for problem description into a normalized Python object. + +You can load a problem description and get the loaded object dumped back +by running: + +.. code:: bash + + python3 -m d3m problem describe + +Dataset +------- + +This package also provides a Python class to load and represent datasets +in Python in :mod:`d3m.container.dataset` +module. This container value can serve as an input to the whole pipeline +and be used as input for primitives which operate on a dataset as a +whole. It allows one to register multiple loaders to support different +formats of datasets. You pass an URI to a dataset and it automatically +picks the right loader. By default it supports: + +- D3M dataset. Only ``file://`` URI scheme is supported and URI should + point to the ``datasetDoc.json`` file. Example: + ``file:///path/to/datasetDoc.json`` +- CSV file. Many URI schemes are supported, including remote ones like + ``http://``. URI should point to a file with ``.csv`` extension. + Example: ``http://example.com/iris.csv`` +- Sample datasets from :mod:`sklearn.datasets`. + Example: ``sklearn://boston`` + +You can load a dataset and get the loaded object dumped back by running: + +.. code:: bash + + python3 -m d3m dataset describe diff --git a/d3m/docs/pipeline.rst b/d3m/docs/pipeline.rst new file mode 100644 index 0000000..733238d --- /dev/null +++ b/d3m/docs/pipeline.rst @@ -0,0 +1,443 @@ +Pipeline +======== + +Pipeline is described as a DAG consisting of interconnected steps, where +steps can be primitives, or (nested) other pipelines. Pipeline has +data-flow semantics, which means that steps are not necessary executed +in the order they are listed, but a step can be executed when all its +inputs are available. Some steps can even be executed in parallel. On +the other hand, each step can use only previously defined outputs from +steps coming before in the order they are listed. In JSON, the following +is a sketch of its representation: + +.. code:: yaml + + { + "id": , + "schema": , + "source": { + "name": , + "contact": , + "from": + ... # Any extra metadata author might want to add into the pipeline, like version, + # name, and config parameters of the system which produced this pipeline. + }, + "created": , + "name": , + "description": , + "users": [ + { + "id": , + "reason": , + "rationale": + } + ], + "inputs": [ + { + "name": + } + ], + "outputs": [ + { + "name": , + "data": + } + ], + "steps": [ + { + "type": "PRIMITIVE", + "primitive": { + "id": , + "version": , + "python_path": , + "name": , + "digest": + }, + # Constructor arguments should not be listed here, because they can be automatically created from other + # information. All these arguments are listed as kind "PIPELINE" in primitive's metadata. + "arguments": { + # A standard inputs argument used for both set_training_data and default "produce" method. + "inputs": { + "type": "CONTAINER", + "data": + }, + # A standard inputs argument, used for "set_training_data". + "outputs": { + "type": "CONTAINER", + "data": + }, + # An extra argument which takes as inputs outputs from another primitive in this pipeline. + "extra_data": { + "type": "CONTAINER", + "data": + }, + # An extra argument which takes as input a singleton output from another step in this pipeline. + "offset": { + "type": "DATA", + "data": + } + }, + "outputs": [ + { + # Data is made available by this step from default "produce" method. + "id": "produce" + }, + { + # Data is made available by this step from an extra "produce" method, too. + "id": "produce_score" + } + ], + # Some hyper-parameters are not really tunable and should be fixed as part of pipeline definition. This + # can be done here. Hyper-parameters listed here cannot be tuned or overridden during a run. Author of + # a pipeline decides which hyper-parameters are which, probably based on their semantic type. + # This is a map hyper-parameter names and their values using a similar format as arguments, but + # allowing also PRIMITIVE and VALUE types. + "hyperparams": { + "loss": { + "type": "PRIMITIVE", + "data": <0-based index from steps identifying a primitive to pass in> + }, + "column_to_operate_on": { + "type": "VALUE", + # Value is converted to a JSON-compatible value by hyper-parameter class. + # It also knows how to convert it back. + "data": 5 + }, + # A special case where a hyper-parameter can also be a list of primitives, + # which are then passed to the \"Set\" hyper-parameter class. + "ensemble": { + "type": "PRIMITIVE", + "data": [ + <0-based index from steps identifying a primitive to pass in>, + <0-based index from steps identifying a primitive to pass in> + ] + } + }, + "users": [ + { + "id": , + "reason": , + "rationale": + } + ] + }, + { + "type": "SUBPIPELINE", + "pipeline": { + "id": + }, + # For example: [{"data": "steps.0.produce"}] would map the data reference "steps.0.produce" of + # the outer pipeline to the first input of a sub-pipeline. + "inputs": [ + { + "data": + } + ], + # For example: [{"id": "predictions"}] would map the first output of a sub-pipeline to a data + # reference "steps.X.predictions" where "X" is the step number of a given sub-pipeline step. + "outputs": [ + { + "id": + } + ] + }, + { + # Used to represent a pipeline template which can be used to generate full pipelines. Not to be used in + # the metalearning context. Additional properties to further specify the placeholder constraints are allowed. + "type": "PLACEHOLDER", + # A list of inputs which can be used as inputs to resulting sub-pipeline. + # Resulting sub-pipeline does not have to use all the inputs, but it cannot use any other inputs. + "inputs": [ + { + "data": + } + ], + # A list of outputs of the resulting sub-pipeline. + # Their (allowed) number and meaning are defined elsewhere. + "outputs": [ + { + "id": + } + ] + } + ] + } + +``id`` uniquely identifies this particular database document. + +Pipeline describes how inputs are computed into outputs. In most cases +inputs are :class:`~d3m.container.dataset.Dataset` container values and +outputs are predictions as Pandas :class:`~d3m.container.pandas.DataFrame` container +values in `Lincoln Labs predictions +format `__, +and, during training, potentially also internal losses/scores. The same +pipeline is used for both training and predicting. + +Pipeline description contains many *data references*. Data reference is +just a string which identifies an output of a step or a pipeline input +and forms a data-flow connection between data available and an input to +a step. It is recommended to be a string of the following forms: + +- ``steps..`` — ``number`` identifies the step in the list + of steps (0-based) and ``id`` identifies the name of a produce method + of the primitive, or the output of a pipeline step +- ``inputs.`` — ``number`` identifies the pipeline input + (0-based) +- ``outputs.`` — ``number`` identifies the pipeline output + (0-based) + +Inputs in the context of metalearning are expected to be datasets, and +the order of inputs match the order of datasets in a pipeline run. (In +other contexts, like TA2-TA3 API, inputs might be something else, for +example a pipeline can consist of just one primitive a TA3 wants to run +on a particular input.) + +Remember that each primitive has a set of arguments it takes as a whole, +combining all the arguments from all its methods. Each argument +(identified by its name) can have only one value associated with it and +any method accepting that argument receives that value. Once all values +for all arguments for a method are available, that method can be called. + +Remember as well that each primitive can have multiple "produce" +methods. These methods can be called after a primitive has been fitted. +In this way a primitive can have multiple outputs, for each "produce" +method one. + +Placeholders can be used to define pipeline templates to be used outside +of the metalearning context. A placeholder is replaced with a pipeline +step to form a pipeline. Restrictions of placeholders may apply on the +number of them, their position, allowed inputs and outputs, etc. + +.. _pipeline-description-example: + +Pipeline description example +---------------------------- + +The following example uses the core package and the `common primitives +repo `__, this +example provides the basic knowledge to build a pipeline in memory. This +specific example creates a pipeline for classification task. + +.. code:: python + + from d3m import index + from d3m.metadata.base import ArgumentType + from d3m.metadata.pipeline import Pipeline, PrimitiveStep + + # -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest + # extract_columns_by_semantic_types(targets) -> ^ + + # Creating pipeline + pipeline_description = Pipeline() + pipeline_description.add_input(name='inputs') + + # Step 1: dataset_to_dataframe + step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) + step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') + step_0.add_output('produce') + pipeline_description.add_step(step_0) + + # Step 2: column_parser + step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) + step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') + step_1.add_output('produce') + pipeline_description.add_step(step_1) + + # Step 3: extract_columns_by_semantic_types(attributes) + step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) + step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') + step_2.add_output('produce') + step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/Attribute']) + pipeline_description.add_step(step_2) + + # Step 4: extract_columns_by_semantic_types(targets) + step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) + step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') + step_3.add_output('produce') + step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) + pipeline_description.add_step(step_3) + + attributes = 'steps.2.produce' + targets = 'steps.3.produce' + + # Step 5: imputer + step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_cleaning.imputer.SKlearn')) + step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) + step_4.add_output('produce') + pipeline_description.add_step(step_4) + + # Step 6: random_forest + step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.regression.random_forest.SKlearn')) + step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') + step_5.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets) + step_5.add_output('produce') + pipeline_description.add_step(step_5) + + # Final Output + pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') + + # Output to YAML + print(pipeline_description.to_yaml()) + +Pipeline Run +------------ + +:mod:`d3m.metadata.pipeline_run` module contains the classes that represent the Pipeline Run. The Pipeline Run was +introduced to ensure that pipeline execution could be captured and duplicated. To accomplish this, the problem doc, +hyperparameter settings and any other variables to the pipeline execution phases are captured by the Pipeline Run. + +The Pipeline Run is generated during pipeline execution: + +:: + + $ python3 -m d3m runtime fit-produce -p pipeline.json -r problem/problemDoc.json -i dataset_TRAIN/datasetDoc.json \ + -t dataset_TEST/datasetDoc.json -o results.csv -O pipeline_run.yml + + In JSON, the following is a sketch of the Pipeline Run representation in two phases for the above fit-produce call: + +.. code:: yaml + + context: + datasets: + + end: + environment: +
      + id: e3187585-cf8b-5e31-9435-69907912c3ca + pipeline: + + problem: + + random_seed: + run: + is_standard_pipeline: true + phase: FIT + results: + + schema: https://metadata.datadrivendiscovery.org/schemas/v0/pipeline_run.json + start: + status: + state: + steps: +
      + --- + context: + datasets: + + end: + environment: +
      + id: b2e9b591-c332-5bc5-815e-d1ec73ecdb06 + pipeline: + + previous_pipeline_run: + id: e3187585-cf8b-5e31-9435-69907912c3ca + problem: + + random_seed: + run: + is_standard_pipeline: true + phase: PRODUCE + results: + + scoring: + datasets: + + end: + pipeline: + + random_seed: + start: + status: + state: + steps: +
      + schema: https://metadata.datadrivendiscovery.org/schemas/v0/pipeline_run.json + start: + status: + state: + steps: +
      + +The d3m module has a call that supports actions Pipeline Run: + +:: + + $ python3 -m d3m pipeline-run --help + +Currently there is only one command available which validates a Pipeline Run: + +:: + + $ python3 -m d3m pipeline-run validate pipeline_run.yml + +The Reference Runtime offers a way to pass an existing Pipeline Run file to a runtime command to allow it to be rerun. +Here is an example of this for the fit-produce call: + +:: + + $ python3 -m d3m runtime fit-produce -u pipeline_run.yml + +Here is the guidance from the help menu: + +:: + + -u INPUT_RUN, --input-run INPUT_RUN + path to a pipeline run file with configuration, use + "-" for stdin + + +Reference runtime +----------------- + +:mod:`d3m.runtime` module contains a reference runtime for pipelines. This +module also has an extensive command line interface you can access +through ``python3 -m d3m runtime``. + +Example of fitting and producing a pipeline with Runtime: + +.. code:: python + + from d3m.metadata import base as metadata_base, hyperparams as hyperparams_module, pipeline as pipeline_module, problem + from d3m.container.dataset import Dataset + from d3m.runtime import Runtime + + # Loading problem description. + problem_description = problem.parse_problem_description('problemDoc.json') + + # Loading dataset. + path = 'file://{uri}'.format(uri=os.path.abspath('datasetDoc.json')) + dataset = Dataset.load(dataset_uri=path) + + # Loading pipeline description file. + with open('pipeline_description.json', 'r') as file: + pipeline_description = pipeline_module.Pipeline.from_json(string_or_file=file) + + # Creating an instance on runtime with pipeline description and problem description. + runtime = Runtime(pipeline=pipeline_description, problem_description=problem_description, context=metadata_base.Context.TESTING) + + # Fitting pipeline on input dataset. + fit_results = runtime.fit(inputs=[dataset]) + fit_results.check_success() + + # Producing results using the fitted pipeline. + produce_results = runtime.produce(inputs=[dataset]) + produce_results.check_success() + + print(produce_results.values) + +Also, the Runtime provides a very useful set of tools to run pipelines +on the terminal, here is a basic example of how to fit and produce a +pipeline like the previous example: + +:: + + $ python3 -m d3m runtime fit-produce -p pipeline.json -r problem/problemDoc.json -i dataset_TRAIN/datasetDoc.json -t dataset_TEST/datasetDoc.json -o results.csv -O pipeline_run.yml + +For more information about the usage: + +:: + + $ python3 -m d3m runtime --help diff --git a/d3m/docs/primitive-checklist.rst b/d3m/docs/primitive-checklist.rst new file mode 100644 index 0000000..5871fdb --- /dev/null +++ b/d3m/docs/primitive-checklist.rst @@ -0,0 +1,139 @@ +.. _primitive-good-citizen: + +Primitive Good Citizen Checklist +================================ + +This is a list of dos, don'ts and things to consider when crafting a new primitive or updating an existing one. This +list is not exhaustive so please add new items to the list as they are discovered! An example of a primitive that +endeavors to adheres to all of the following guidance can be found `here`_: + +DO's + +* Do complete the documentation on the primitive such as: + + * Primitive family, algorithm type. + * Docstring of the primitive's Python class. + + * One line summary first: + + * Primitive name should be close to this. + * Primitive path should be close to this as well. + + * Longer documentation/description after, all in the main docstring of the class. + + * Provide pipeline examples together with the primitive annotation. + * Docstrings in `numpy style`_. + * Please use `reStructuredText`_ instead of markdown or other formats. + * Maintain a change-log of alterations to the primitive (somewhere in the primitive's repo, consider using a `standard format`_). + * One should also add point of contact information and the git repository link in primitive's metadata + (``source.name``, ``source.contact`` and ``source.uris`` metadata fields). + * Add your primitive name to the `list of primitive names`_ if it does not already + exist. Chances are that your generic primitive name is in that list and you should use that name for your primitive. + +* Do annotate your Primitive with Python types. + +* Do make sure the output from your produce method is a d3m container type. + + + +* If your primitive is operating on columns and rows: + + * Do include ``d3mIndex`` column in produced output if input has ``d3mIndex`` column. + * You can make this behavior controlled by the ``add_index_columns`` hyper-parameter. + + * If a primitive has a hyper-paramer to directly set which columns to operate on, do use column + indices and not column names to identify those columns. + + * Consider using a pair of hyper-parameters: ``use_columns`` and ``exclude_columns`` with standard logic. + + * When deciding on which columns to operate, when using semantic types, do use + ``https://metadata.datadrivendiscovery.org/types/TrueTarget`` + and ``https://metadata.datadrivendiscovery.org/types/RedactedPrivilegedData`` semantic types and not + ``https://metadata.datadrivendiscovery.org/types/SuggestedTarget`` and + ``https://metadata.datadrivendiscovery.org/types/SuggestedPrivilegedData``. + The latter are semantic types which come from the dataset, the former are those which come from the problem description. + While it is true that currently generally they always match, in fact primitives should just respect those coming from + the problem description. The dataset has them so that one can create problem descriptions on the fly, if needed. + +* Be mindful that data being passed through a pipeline also has metadata: + + * If your primitive generates new data (e.g., new columns), add metadata suitable for those columns: + + * Name the column appropriately for human consumption by setting column's ``name`` metadata. + + * Set semantic types appropriately. + + * If your primitive is producing target predictions, add ``https://metadata.datadrivendiscovery.org/types/PredictedTarget`` + to a column containing those predictions. + + * Remember metadata encountered on target columns during fitting, and reuse that metadata as much + as reasonable when producing target predictions. + + * If your primitive is transforming existing data (e.g., transforming columns), reuse as much metadata from + original data as reasonable, but do update metadata based on new data. + + * If structural type of the column changes, make sure you note this change in metadata as well. + + * Support also non-standard metadata and try to pass it through as-is if possible. + +* Do write unit tests for your primitives. This greatly aids porting to a new version of the core package. + + * Test pickle and unpickle of the primitive (both fitted and unfitted primitives). + * Test with use of semantic types to select columns to operate on, and without the use of semantic types. + * Test with all return types: ``append``, ``replace``, ``new``. + * Test all hyper-parameter values with their ``sample`` method. + * Use/contribute to `tests data repository`_. + +* Do clearly define hyper-parameters (bounds, descriptions, semantic types). + + * Suggest new classes of hyper-parameters if needed. + * Consider if ``upper_inclusive`` and ``lower_inclusive`` values should be included or not for every hyper-parameter + * Define reasonable hyper-parameters which can be automatically populated/searched by TA2. + A hyper-parameter such as ``hyperparams.Hyperparameter[typing.Sequence[Any]]`` is not useful in this case. + * Ensure that your primitive can be run successfully with default settings for all hyper-parameters. + * If there are combinations of hyper-parameters settings that are suboptimal please note this in the documentation. For + example: "If hyper-parameter A is set to a True, hyper-parameter B must always be a positive integer". + +* Do bump primitive version when changing hyper-parameters, method signatures or params. + In short, on any API change of your primitive. + +* If your primitive can use GPUs if available, set ``can_use_gpus`` primitive's metadata to true. + +* If your primitive can use different number of CPUs/cores, expose a hyper-parameter with semantic types + `https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter` and `https://metadata.datadrivendiscovery.org/types/CPUResourcesUseParameter` + and allow caller to control the number of CPUs/cores used through it. + + * Make sure that the default value of such hyper-parameter is 1. + +DON'Ts + +* Don't change the input DataFrame! Make a copy and make changes to the copy instead. The original input DataFrame is + assumed never to change between primitives in the pipeline. +* Don't return DataFrames with a (non-default) Pandas DataFrame index. It can be utilized internally, but drop it before + returning. On output a default index should be provided. + +PLEASE CONSIDER + +* Consider using/supporting semantic types to select which columns to operate on, and use the `use_semantic_types` hyper-parameter. +* Consider allowing three types of outputs strategies: ``new``/``append``/``replace`` output, if operating on columns, + controlled by the ``return_result`` hyper-parameter. +* Consider picking the input and output format/structure of data to match other primitives of the same family/type. If + necessary, convert data to the format you need inside your primitive. Pipelines tend to start with datasets, then go + to dataframes, and then to ndarrays sometimes, returning predictions as a dataframe. + Consider where your primitive in a pipeline generally should be and + consider that when deciding on what are inputs and outputs of your primitive. Consider that your primitive will be + chosen dynamically by a TA2 and will be expected to behave in predictable ways based on family and base class. +* Consider using a specific hyper-parameter class instead of the hyper-parameter base class as it is not very useful for + TA2s. For example use ``hyperparams.Set`` instead of ``hyperparams.Hyperparameter[typing.Sequence[Any]]``. It is + better to use the former as it is far more descriptive. +* Use a base class for your primitive which makes sense based on semantics of the base class and not necessarily + how a human would understand the primitive. +* Consider that your primitive will be chosen dynamically by a TA2 and will + be expected to behave in predictable ways based on primitive family and base class. + +.. _here: https://gitlab.com/datadrivendiscovery/common-primitives/blob/master/common_primitives/random_forest.py +.. _numpy style: https://numpydoc.readthedocs.io/en/latest/format.html +.. _reStructuredText: http://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html +.. _tests data repository: https://gitlab.com/datadrivendiscovery/tests-data +.. _standard format: https://keepachangelog.com/en/1.0.0/ +.. _list of primitive names: https://gitlab.com/datadrivendiscovery/d3m/-/blob/devel/d3m/metadata/primitive_names.py diff --git a/d3m/docs/primitives_base_classes.rst b/d3m/docs/primitives_base_classes.rst new file mode 100644 index 0000000..c611db7 --- /dev/null +++ b/d3m/docs/primitives_base_classes.rst @@ -0,0 +1,40 @@ +High-level primitives base classes +================================== + +High-level primitives base classes provides tools to the developers +to easily create new primitives by abstracting some unnecessary and +repetitive work. + +Primitives base classes +----------------------- + +``FileReaderPrimitiveBase``: A primitive base class for reading files referenced in columns. + +``DatasetSplitPrimitiveBase``: A base class for primitives which fit on a +``Dataset`` object to produce splits of that ``Dataset`` when producing. + +``TabularSplitPrimitiveBase``: A primitive base class for splitting tabular datasets. + + +Examples +-------- + +Examples of primitives using these base classes can be found `in +this +repository `__: + +- `DataFrameImageReaderPrimitive `__ + A primitive which reads columns referencing image files. +- `FixedSplitDatasetSplitPrimitive `__ + A primitive which splits a tabular Dataset in a way that uses for the test + (score) split a fixed list of primary index values or row indices of the main + resource to be used. All other rows are added used for the train split. +- `KFoldDatasetSplitPrimitive `__ + A primitive which splits a tabular Dataset for k-fold cross-validation. +- `KFoldTimeSeriesSplitPrimitive `__ + A primitive which splits a tabular time-series Dataset for k-fold cross-validation. +- `NoSplitDatasetSplitPrimitive `__ + A primitive which splits a tabular Dataset in a way that for all splits it + produces the same (full) Dataset. +- `TrainScoreDatasetSplitPrimitive `__ + A primitive which splits a tabular Dataset into random train and score subsets. diff --git a/d3m/docs/quickstart.rst b/d3m/docs/quickstart.rst new file mode 100644 index 0000000..40cb826 --- /dev/null +++ b/d3m/docs/quickstart.rst @@ -0,0 +1,817 @@ +.. _quickstart: + +TA1 quick-start guide +===================== + +This aims to be a tutorial, or a quick-start guide, for +newcomers to the D3M project who are interested in writing TA1 primitives. +It is not meant to be a comprehensive +guide to everything about D3M, or even just TA1. The goal here is for +the reader to be able to write a new, simple, but working primitive by +the end of this tutorial. To achieve this goal, this tutorial is divided +into several sections: + +Important links +--------------- + +First, here is a list of some important links that should help you with +reference and instructional material beyond this quick start guide. Be +aware also that the d3m core package source code has extensive docstrings that +:ref:`you may find helpful `. + +- Documentation of the whole D3M program: + `https://docs.datadrivendiscovery.org `__ +- Common primitives: + `https://gitlab.com/datadrivendiscovery/common-primitives `__ +- Public datasets: + `https://datasets.datadrivendiscovery.org/d3m/datasets `__ +- Docker images: + `https://docs.datadrivendiscovery.org/docker.html `__ +- Index of TA1, TA2, TA3 repositories: + `https://github.com/darpa-i2o/d3m-program-index `__ +- :ref:`primitive-good-citizen` + +.. _overview-of-primitives-and-pipelines: + +Overview of primitives and pipelines +------------------------------------ + +Let's start with basic definitions in order for us to understand a +little bit better what happens when we run a pipeline later in the +tutorial. + +A *pipeline* is basically a series of steps that are executed in order +to solve a particular *problem* (such as prediction based on historical +data). A step of a pipeline is usually a *primitive* (a step can be +something else, however, like a sub-pipeline, but for the purposes of +this tutorial, assume that each step is a primitive): something that +individually could, for example, transform data into another format, or +fit a model for prediction. There are many types of primitives (see the +`primitives index repo`_ for the full +list of available primitives). In a pipeline, the steps must be arranged +in a way such that each step must be able to read the data in the format +produced by the preceding step. + +.. _primitives index repo: https://gitlab.com/datadrivendiscovery/primitives + +For this tutorial, let's try to use the example pipeline that comes with +a primitive called +``d3m.primitives.classification.logistic_regression.SKlearn`` to predict +baseball hall-of-fame players, based on their stats (see the +`185_baseball dataset `__). + +Let's take a look at the example pipeline. Many example pipelines can be found +in `primitives index repo`_ where they demonstrate how to use particular primitives. +At the time of this writing, an example pipeline can be found `here +`__, +but this repository's directory names and files periodically change, so it is +prudent to see how to navigate to this file too. + +The index is organized as: +- ``v2020.1.9`` (version of the core package of the index, changes periodically) +- ``JPL`` (the organization that develops/maintains the primitive) +- ``d3m.primitives.classification.logistic_regression.SKlearn`` (the python path of the actual primitive) +- ``2019.11.13`` (the version of this primitive, changes periodically) +- ``pipelines`` +- ``862df0a2-2f87-450d-a6bd-24e9269a8ba6.json`` (actual pipeline description filename, changes periodically) + +Early on in this JSON document, you will see a list called ``steps``. This +is the actual list of primitive steps that run one after another in a +pipeline. Each step has the information about the primitive, as well as +arguments, outputs, and hyper-parameters, if any. This specific pipeline +has 5 steps (the ``d3m.primitives`` prefix is omitted in the following +list): + +- ``data_transformation.dataset_to_dataframe.Common`` +- ``data_transformation.column_parser.Common`` +- ``data_cleaning.imputer.SKlearn`` +- ``classification.logistic_regression.SKlearn`` +- ``data_transformation.construct_predictions.Common`` + +Now let's take a look at the first primitive step in that pipeline. We +can find the source code of this primitive in the common-primitives repo +(`common_primitives/dataset_to_dataframe.py +`__). +Take a look particularly at the ``produce`` method. This is essentially +what the primitive does. Try to do this for the other primitive steps in +the pipeline as well - take a cursory look at what each one essentially +does (note that for the actual classifier primitive, you should look at +the ``fit`` method as well to see how the model is trained). Primitives +whose python path suffix is ``*.Common`` is in the `common primitives `__ +repository, and those that have a ``*.SKlearn`` suffix is in the +`sklearn-wrap `__ repository (checkout the `dist `__ branch, +to which primitives are being generated). + +If you're having a hard time looking for the correct source file, you can try +taking the primitive ``id`` from the primitive step description in the +pipeline, and ``grep`` for it. For example, if you were +looking for the source code of the first primitive step in this +pipeline, first look at the primitive info in that step and get its +``id``: + +.. code:: + + "primitive": { + "id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", + "version": "0.3.0", + "python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common", + "name": "Extract a DataFrame from a Dataset" + }, + +Then, run this: + +.. code:: shell + + git clone https://gitlab.com/datadrivendiscovery/common-primitives.git + cd common-primitives + grep -r 4b42ce1e-9b98-4a25-b68e-fad13311eb65 . | grep -F .py + +However, this series of commands assumes that you know exactly which +specific repository is the primitive's source code located in (the ``git +clone`` command). Since this is probably not the case for an arbitrarily +given primitive, there is a method on how to find out the repository URL +of any primitive, and it requires using a d3m Docker image, which is +described in the next section. + +Setting up a local d3m environment +---------------------------------- + +In order to run a pipeline, you must have a Python environment where the +d3m core package is installed, as well as the packages of the primitives +installed as well. While it is possible to setup a Python virtual +environment and install the packages them through ``pip``, in this +tutorial, we're going to use the d3m Docker images instead (in many +cases, even beyond this tutorial, this will save you a lot of time and +effort trying to find the any missing primitive packages, manually +installing them, and troubleshooting installation errors). So, make sure +`Docker `__ is installed in your system. + +You can find the list of D3M docker images `here `__. +The one we're going to use in this tutorial is the v2020.1.9 +primitives image (feel free to use whatever the latest one instead +though - just modify the ``v2020.1.9`` part accordingly): + +.. code:: shell + + docker pull registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 + +Once you have downloaded the image, we can finally run the d3m package +(and hence run a pipeline). Before running a pipeline though, let's +first try to get a list of what primitives are installed in the image's +Python environment: + +.. code:: shell + + docker run --rm registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 python3 -m d3m index search + +You should get a big list of primitives. All of the known primitives to +D3M should be there. + +You can also run the docker container in interactive mode (to run +commands as if you have logged into the container machine provides) by +using the ``-it`` option: + +.. code:: shell + + docker run --rm -it registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 + +The previous section mentions a method of determining where the source +code of an arbitrarily given primitive can be found. We can do this +using the d3m python package within a d3m docker container. First get the +``python_path`` of the primitive step (see the JSON snippet above of the +primitive's info from the pipeline). Then, run this command: + +.. code:: shell + + docker run --rm registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 python3 -m d3m index describe d3m.primitives.data_transformation.dataset_to_dataframe.Common + +Near the top of the huge JSON string describing the primitive, you'll see +``"source"``, and inside it, ``"uris"``. To help read the JSON, you can use +the ``jq`` utility: + +.. code:: shell + + docker run --rm -it registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 + python3 -m d3m index describe d3m.primitives.data_transformation.dataset_to_dataframe.Common | jq .source.uris + +This should give the URI of the git repo where the source code of that primitive can be found. Also, You +can also substitute the primitive ``id`` for the ``python_path`` in that +command, but the command usually returns a result faster if you provide +the ``python_path``. Note also that you can only do this for primitives +that have been submitted for a particular image (primitives that are +contained in the `primitives index repo`_). + +It can be obscure at first how to use the d3m python package, but you can +always access the help string for each d3m command at every level of the +command chain by using the ``-h`` flag. This is useful especially for +the getting a list of all the possible arguments for the ``runtime`` +module. + +.. code:: shell + + docker run --rm registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 python3 -m d3m -h + docker run --rm registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 python3 -m d3m index -h + docker run --rm registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 python3 -m d3m runtime -h + docker run --rm registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 python3 -m d3m runtime fit-score -h + +One last point before we try running a pipeline. The docker container +must be able to access the dataset location and the pipeline location +from the host filesystem. We can do this by `bind-mounting +`__ a host directory that +contains both the ``datasets`` repo and the ``primitives`` index repo to +a container directory. Git clone these repos, and also make another empty directory called +``pipeline-outputs``. Now, if your directory structure looks like this:: + + /home/foo/d3m + ├── datasets + ├── pipeline-outputs + └── primitives + +Then you'll want to bind-mount ``/home/foo/d3m`` to a directory in the +container, say ``/mnt/d3m``. You can specify this mapping in the docker +command itself: + +.. code:: shell + + docker run \ + --rm \ + -v /home/foo/d3m:/mnt/d3m \ + registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2019.11.10 \ + ls /mnt/d3m + +If you're reading this tutorial from a text editor, it might be a good +idea at this point to find and replace ``/home/foo/d3m`` with the actual +path in your system where the ``datasets``, ``pipeline-outputs``, and +``primitives`` directories are all located. This will make it easier for +you to just copy and paste the commands from here on out, instead of +changing the faux path every time. + +.. _running-example-pipeline: + +Running an example pipeline +--------------------------- + +At this point, let's try running a pipeline. Again, we're going to run +the example pipeline that comes with +``d3m.primitives.classification.logistic_regression.SKlearn``. There are +two ways to run a pipeline: by specifying all the necessary paths of the +dataset, or by specifying and using a pipeline run file. Let's +make sure first though that the dataset is available, as described in the +next subsection. + +.. _preparing-dataset: + +Preparing the dataset +~~~~~~~~~~~~~~~~~~~~~ + +Towards the end of the previous section, you were asked to git clone the +``datasets`` repo to your machine. Most likely, you might have +accomplished that like this: + +.. code:: shell + + git clone https://datasets.datadrivendiscovery.org/d3m/datasets.git + +But unless you had `git LFS `__ +installed, the entire contents of the repo might not have been really +installed. + +The repo is organized such that all files larger than 100 +KB is stored in git LFS. Thus, if you cloned without git LFS installed, you +most likely have to do a one-time extra step before you can use a dataset, as +some files of that dataset that are over 100 KB will not have the actual +data in them (although they will still exist as files in the cloned +repo). This is true even for the dataset that we will use in this +exercise, ``185_baseball``. To verify this, open this file in a text +editor:: + + datasets/training_datasets/seed_datasets_archive/185_baseball/185_baseball_dataset/tables/learningData.csv + +Then, see if it contains text similar to this:: + + version https://git-lfs.github.com/spec/v1 + oid sha256:931943cc4a675ee3f46be945becb47f53e4297ec3e470c4e3e1f1db66ad3b8d6 + size 131187 + +If it does, then this dataset has not yet been fully downloaded from git +LFS (but if it looks like a normal CSV file, then you can skip the rest +of this subsection and move on). To download this dataset, simply run +this command inside the ``datasets`` directory: + +.. code:: shell + + git lfs pull -I training_datasets/seed_datasets_archive/185_baseball/ + +Inspect the file again, and you should see that it looks like a normal +CSV file now. + +In general, if you don't know which specific dataset does a certain +example pipeline in the ``primitives`` repo uses, inspect the pipeline +run output file of that primitive (whose file path is similar to that of +the pipeline JSON file, as described in the :ref:`overview-of-primitives-and-pipelines` section, but +instead of going to ``pipelines``, go to ``pipeline_runs``). The +pipeline run is initially gzipped in the ``primitives`` repo, so +decompress it first. Then open up the actual .yml file, look at +``datasets``, and under it should be ``id``. If you do that for the +example pipeline run of the SKlearn logistic regression primitive +that we're looking at for this exercise, you'll find that the dataset id +is ``185_baseball_dataset``. The name of the main dataset directory is this string, +without the ``_dataset`` part. + +Now, let's actually run the pipeline using the two ways mentioned +earlier. + +Specifying all the necessary paths of a dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can use this if there is no existing pipeline run yet for a +pipeline, or if you want to manually specify the dataset path (set the +paths for ``--problem``, ``--input``, ``--test-input``, ``--score-input``, ``--pipeline`` to your target dataset +location). + +Remember to change the bind mount paths as appropriate for your system +(specified by ``-v``). + +.. code:: shell + + docker run \ + --rm \ + -v /home/foo/d3m:/mnt/d3m \ + registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 \ + python3 -m d3m \ + runtime \ + fit-score \ + --problem /mnt/d3m/datasets/training_datasets/seed_datasets_archive/185_baseball/185_baseball_problem/problemDoc.json \ + --input /mnt/d3m/datasets/training_datasets/seed_datasets_archive/185_baseball/TRAIN/dataset_TRAIN/datasetDoc.json \ + --test-input /mnt/d3m/datasets/training_datasets/seed_datasets_archive/185_baseball/TEST/dataset_TEST/datasetDoc.json \ + --score-input /mnt/d3m/datasets/training_datasets/seed_datasets_archive/185_baseball/SCORE/dataset_TEST/datasetDoc.json \ + --pipeline /mnt/d3m/primitives/v2020.1.9/JPL/d3m.primitives.classification.logistic_regression.SKlearn/2019.11.13/pipelines/862df0a2-2f87-450d-a6bd-24e9269a8ba6.json \ + --output /mnt/d3m/pipeline-outputs/predictions.csv \ + --output-run /mnt/d3m/pipeline-outputs/run.yml + +The score is displayed after the pipeline run. The output predictions +will be stored on the path specified by ``--output``, and information about +the pipeline run is stored in the path specified by ``--output-run``. + +Again, you can use the ``-h`` flag on ``fit-score`` to access the help +string and read about the different arguments, as described earlier. + +If you get a python error that complains about missing columns, or +something that looks like this:: + + ValueError: Mismatch between column name in data 'version https://git-lfs.github.com/spec/v1' and column name in metadata 'd3mIndex'. + +Chances are that the ``185_baseball`` dataset has not yet been +downloaded through git LFS. See the :ref:`previous subsection +` for details on how to verify and do this. + +Using a pipeline run file +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Instead of specifying all the specific dataset paths, you can also use +an existing pipeline run to essentially "re-run" a previous run +of the pipeline: + +.. code:: shell + + docker run \ + --rm \ + -v /home/foo/d3m:/mnt/d3m \ + registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 \ + python3 -m d3m \ + --pipelines-path /mnt/d3m/primitives/v2020.1.9/JPL/d3m.primitives.classification.logistic_regression.SKlearn/2019.11.13/pipelines \ + runtime \ + --datasets /mnt/d3m/datasets \ + fit-score \ + --input-run /mnt/d3m/primitives/v2020.1.9/JPL/d3m.primitives.classification.logistic_regression.SKlearn/2019.11.13/pipeline_runs/pipeline_run.yml.gz \ + --output /mnt/d3m/pipeline-outputs/predictions.csv \ + --output-run /mnt/d3m/pipeline-outputs/run.yml + +In this case, ``--input-run`` is the pipeline run file that this pipeline +will re-run, and ``---output-run`` is the new pipeline run file that will be +generated. + +Note that if you choose ``fit-score`` for the d3m runtime option, the +pipeline actually runs in two phases: fit, and produce. You can verify +this by searching for ``phase`` in the pipeline run file. + +Lastly, if you want to run multiple commands in the docker container, +simply chain your commands with ``&&`` and wrap them double quotes +(``"``) for ``bash -c``. As an example: + +.. code:: shell + + docker run \ + --rm \ + -v /home/foo/d3m:/mnt/d3m \ + registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 \ + /bin/bash -c \ + "python3 -m d3m \ + --pipelines-path /mnt/d3m/primitives/v2020.1.9/JPL/d3m.primitives.classification.logistic_regression.SKlearn/2019.11.13/pipelines \ + runtime \ + --datasets /mnt/d3m/datasets/training_datasets/seed_datasets_archive/185_baseball \ + fit-score \ + --input-run /mnt/d3m/primitives/v2020.1.9/JPL/d3m.primitives.classification.logistic_regression.SKlearn/2019.11.13/pipeline_runs/pipeline_run.yml \ + --output /mnt/d3m/pipeline-outputs/predictions.csv \ + --output-run /mnt/d3m/pipeline-outputs/run.yml && \ + head /mnt/d3m/pipeline-outputs/predictions.csv" + +Writing a new primitive +----------------------- + +Let's now try to write a very simple new primitive - one that simply +passes whatever input data it receives from the previous step to the +next step in the pipeline. Let's call this primitive "Passthrough". + +We will use this `skeleton primitive repo +`__ +as a starting point +for this exercise. A d3m primitive repo does not have to follow the +exact same directory structure as this, but this is a good structure to +start with, at least. git clone the repo into ``docs-quickstart`` at the same place +where the other repos that we have used earlier are located +(``datasets``, ``pipeline-outputs``, ``primitives``). + +Alternatively, you can also use the `test primitives +`__ +as a model/starting point. ``test_primitives/null.py`` is essentially +the same primitive that we are trying to write. + +.. _primitive-source-code: + +Primitive source code +~~~~~~~~~~~~~~~~~~~~~ + +In the ``docs-quickstart`` directory, open +``quickstart_primitives/sample_primitive1/input_to_output.py``. The first +important thing to change here is the primitive metadata, which are the +first objects defined under the ``InputToOutputPrimitive`` class. Modify the +following fields (unless otherwise noted, the values you put in must be +strings): + +- ``id``: The primitive's UUID v4 number/identifier. To generate one, + you can run simply run this simple inline Python command: + + .. code:: shell + + python3 -c "import uuid; print(uuid.uuid4())" + +- ``version``: You can use semantic versioning for this or another style + of versioning. Write ``"0.1.0"`` for this exercise. You should bump + the version of the primitive at least every time public interfaces + of the primitive change (e.g. hyper-parameters). + +- ``name``: The primitive's name. Write ``"Passthrough primitive"`` for + this exercise. + +- ``description``: A short description of the primitive. Write ``"A + primitive which directly outputs the input."`` for this exercise. + +- ``python_path``: This follows this format:: + + d3m.primitives... + + Primitive families can be found in the `d3m metadata page + `__ + (wait a few seconds for the page to load completely), and primitive + names can be found in the `d3m core package source code + `__. + The last segment can be used to attribute the primitive to the author and/or + describe in which way it is different from other primitives with same + primitive family and primitive name, e.g., a different implementation with different + trade-offs. + + For this exercise, write + ``"d3m.primitives.operator.input_to_output.Quickstart"``. Note that + ``input_to_output`` is not currently registered as a standard primitive name + and using it will produce a warning. For primitives you intent on publishing + make a merge request to the d3m core package to add any primitive names + you need. + +- ``primitive_family``: This must be the same as used for ``python_path``, + as enumeration value. You can use a string or Python enumeration value. + Add this import statement (if not there already): + + .. code:: python + + from d3m.metadata import base as metadata_base + + Then write ``metadata_base.PrimitiveFamily.OPERATOR`` (as + a value, not a string, so do not put quotation marks) as the value of + this field. + +- ``algorithm_types``: Algorithm type(s) that the primitive implements. + This can be multiple values in an array. Values can be chosen from + the `d3m metadata page + `__ + as well. + Write ``[metadata_base.PrimitiveAlgorithmType.IDENTITY_FUNCTION]`` + here for this exercise (as a list that contains one element, not a + string). + +- ``source``: General info about the author of this primitive. ``name`` + is usually the name of the person or the team that wrote this + primitive. ``contact`` is a ``mailto`` URI to the email address of + whoever one should contact about this primitive. ``uris`` are usually + the git clone URL of the repo, and you can also add the URL of the + source file of this primitive. + + Write these for the exercise: + + .. code:: python + + "name": "My Name", + "contact": "mailto:myname@example.com", + "uris": ["https://gitlab.com/datadrivendiscovery/docs-quickstart.git"], + +- ``keywords``: Key words for what this primitive is or does. Write + ``["passthrough"]``. + +- ``installation``: Information about how to install this primitive. Add + these import statements first: + + .. code:: python + + import os.path + from d3m import utils + + Then replace the ``installation`` entry with this: + + .. code:: python + + "installation": [{ + "type": metadata_base.PrimitiveInstallationType.PIP, + "package_uri": "git+https://gitlab.com/datadrivendiscovery/docs-quickstart@{git_commit}#egg=quickstart_primitives".format( + git_commit=utils.current_git_commit(os.path.dirname(__file__)) + ), + }], + + In general, for your own actual primitives, you might only need to + substitute the git repo URL here as well as the python egg name. + +Next, let's take a look at the ``produce`` method. You can see that it +simply makes a new dataframe out of the input data, and returns it as +the output. To see for ourselves though that our primitive (and thus +this ``produce`` method) gets called during the pipeline run, let's add +a log statement here. The ``produce`` method should now look something +like this: + +.. code:: python + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + self.logger.warning('Hi, InputToOutputPrimitive.produce was called!') + return base.CallResult(value=inputs) + +Note that this is simply an example primitive that is intentionally +simple for the purposes of this tutorial. It does not necessarily model +a well-written primitive, by any means. For guidelines on how to write a +good primitive, take a look at the :ref:`primitive-good-citizen`. + +setup.py +~~~~~~~~ + +Next, we fill in the necessary information in ``setup.py`` so that +``pip`` can correctly install our primitive in our local d3m +environment. Open ``setup.py`` (in the project root), and modify the +following fields: + +- ``name``: Same as the egg name you used in ``package_uri`` + +- ``version``: Same as the primitive metadata's ``version`` + +- ``description``: Same as the primitive metadata's ``description``, + or a description of all primitives if there are multiple primitives + in the package you are making + +- ``author``: Same as the primitive metadata's ``suorce.name`` + +- ``url``: Same as main URL in the primitive metadata's + ``source.uris`` + +- ``packages``: This is an array of the python packages that this + primitive repo contains. You can use the ``find_packages`` helper: + + .. code:: python + + packages=find_packages(exclude=['pipelines']), + +- ``keywords``: A list of keywords. Important standard keyword is + ``d3m_primitive`` which makes all primitives discoverable on PyPi + +- ``install_requires``: This is an array of the python package + dependencies of the primitives contained in this repo. Our primitive + needs nothing except the d3m core package (and the + ``common-primitives`` package too for testing, but this is not a + package dependency), so write this as the value of this field: + ``['d3m']`` + +- ``entry_points``: This is how the d3m runtime maps your primitives' + d3m python paths to the your repo's local python paths. For this + exercise, it should look like this: + + .. code:: python + + entry_points={ + 'd3m.primitives': [ + 'operator.input_to_output.Quickstart = quickstart_primitives.sample_primitive1:InputToOutputPrimitive', + ], + } + +That's it for this file. Briefly review it for any possible syntax +errors. + +Primitive unit tests +~~~~~~~~~~~~~~~~~~~~ + +Let's now make a python test for this primitive, which in this case will +just assert whether the input dataframe to the primitive equals the +output dataframe. Make a new file called ``test_input_to_output.py`` +inside ``quickstart_primitives/sample_primitive1`` (the same directory as +``input_to_output.py``), and write this as its contents: + +.. code:: python + + import unittest + import os + + from d3m import container + from common_primitives import dataset_to_dataframe + from input_to_output import InputToOutputPrimitive + + + class InputToOutputTestCase(unittest.TestCase): + def test_output_equals_input(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'tests-data', 'datasets', 'timeseries_dataset_1', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + dataframe_hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() + dataframe_primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=dataframe_hyperparams_class.defaults()) + dataframe = dataframe_primitive.produce(inputs=dataset).value + + i2o_hyperparams_class = InputToOutputPrimitive.metadata.get_hyperparams() + i2o_primitive = InputToOutputPrimitive(hyperparams=dataframe_hyperparams_class.defaults()) + output = i2o_primitive.produce(inputs=dataframe).value + + self.assertTrue(output.equals(dataframe)) + + + if __name__ == '__main__': + unittest.main() + +For the dataset that this test uses, add as git submodule the `d3m tests-data `__ +repository at the root of the ``docs-quickstart`` repository. +Then let's install this new primitive to the Docker image's d3m environment, and +run this test using the command below: + +.. code:: shell + + docker run \ + --rm \ + -v /home/foo/d3m:/mnt/d3m \ + registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 \ + /bin/bash -c \ + "pip3 install -e /mnt/d3m/docs-quickstart && \ + cd /mnt/d3m/docs-quickstart/quickstart_primitives/sample_primitive1 && \ + python3 test_input_to_output.py" + +You should see a log statement like this, as well as the python unittest +pass message:: + + Hi, InputToOutputPrimitive.produce was called! + . + ---------------------------------------------------------------------- + Ran 1 test in 0.011s + +Using this primitive in a pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Having seen the primitive test pass, we can now confidently include this +primitive in a pipeline. Let's take the same pipeline that we ran :ref:`before ` +(the sklearn logistic regression's example pipeline), +and add a step using this primitive. + +In the root directory of your repository, create these directories: +``pipelines/operator.input_to_output.Quickstart``. Then, from the d3m +``primitives`` repo, copy the JSON pipeline description file from +``primitives/v2020.1.9/JPL/d3m.primitives.classification.logistic_regression.SKlearn/2019.11.13/pipelines`` +into the directory we just created. Open this file, and replace the +``id`` (generate another UUID v4 number using the inline python command +earlier, different from the primitive ``id``), as well as the created +timestamp using this inline python command (add ``Z`` at the end of the +generated timestamp):: + + python3 -c "import time; import datetime; \ + print(datetime.datetime.fromtimestamp(time.time()).isoformat())" + +You can rename the json file too using the new pipeline ``id``. + +Next, change the output step number (shown below, ``"steps.4.produce"``) +to be one more than the current number (at the time of this writing, it +is ``4``, so in this case, change it to ``5``): + +.. code:: json + + "outputs": [ + { + "data": "steps.5.produce", + "name": "output predictions" + } + ], + +Then, find the step that contains the +``d3m.primitives.classification.logistic_regression.SKlearn`` primitive +(search for this string in the file), and right above it, add the +following JSON object. Remember to change ``primitive.id`` to the +primitive's id that you generated in the earlier :ref:`primitive-source-code` subsection. + +.. code:: json + + { + "type": "PRIMITIVE", + "primitive": { + "id": "30d5f2fa-4394-4e46-9857-2029ec9ed0e0", + "version": "0.1.0", + "python_path": "d3m.primitives.operator.input_to_output.Quickstart", + "name": "Passthrough primitive" + }, + "arguments": { + "inputs": { + "type": "CONTAINER", + "data": "steps.2.produce" + } + }, + "outputs": [ + { + "id": "produce" + } + ] + }, + +Make sure that the step number (``"steps.N.produce"``) in +``arguments.inputs.data`` is correct (one greater than the previous step +and one less than the next step). Do this as well for the succeeding +steps, with the following caveats: + +- For ``d3m.primitives.classification.logistic_regression.SKlearn``, + increment the step number both for ``arguments.inputs.data`` and + ``arguments.outputs.data`` (at the time of this writing, the number + should be changed to ``3``). +- For + ``d3m.primitives.data_transformation.construct_predictions.Common``, + increment the step number for ``arguments.inputs.data`` (at the time + of this writing, the number should be changed to ``4``), but do not + change the one for ``arguments.reference.data`` (the value should + stay as ``"steps.0.produce"``) + +Generally, you can also programmatically generate a pipeline, as +described in the :ref:`pipeline-description-example`. + +Now we can finally run this pipeline that uses our new primitive. In the +command below, modify the pipeline JSON filename in the ``-p`` argument +to match the filename of your pipeline file (if you changed it to the +new pipeline id that you generated). + +.. code:: shell + + docker run \ + --rm \ + -v /home/foo/d3m:/mnt/d3m \ + registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 \ + /bin/bash -c \ + "pip3 install -e /mnt/d3m/docs-quickstart && \ + python3 -m d3m \ + runtime \ + fit-score \ + --problem /mnt/d3m/datasets/training_datasets/seed_datasets_archive/185_baseball/185_baseball_problem/problemDoc.json \ + --input /mnt/d3m/datasets/training_datasets/seed_datasets_archive/185_baseball/TRAIN/dataset_TRAIN/datasetDoc.json \ + --test-input /mnt/d3m/datasets/training_datasets/seed_datasets_archive/185_baseball/TEST/dataset_TEST/datasetDoc.json \ + --score-input /mnt/d3m/datasets/training_datasets/seed_datasets_archive/185_baseball/SCORE/dataset_TEST/datasetDoc.json \ + --pipeline /mnt/d3m/docs-quickstart/pipelines/operator.input_to_output.Quickstart/0f290525-3fec-44f7-ab93-bd778747b91e.json \ + --output /mnt/d3m/pipeline-outputs/predictions_new.csv \ + --output-run /mnt/d3m/pipeline-outputs/run_new.yml" + +In the output, you should see the log statement as a warning, +before the score is shown (similar to the text below):: + + ... + WARNING:d3m.primitives.operator.input_to_output.Quickstart:Hi, InputToOutputPrimitive.produce was called! + ... + metric,value,normalized,randomSeed + F1_MACRO,0.31696136214800263,0.31696136214800263,0 + +Verify that the old and new ``predictions.csv`` in ``pipeline-outputs`` +are the same (you can use ``diff``), as well as the scores in the old +and new ``run.yml`` files (search for ``scores`` in the files). + +Beyond this tutorial +-------------------- + +Congratulations! You just built your own primitive and you were able to +use it in a d3m pipeline! + +Normally, when you build your own primitives, you would proceed to +validating the primitives to be included in the d3m index of all known +primitives. See the `primitives repo README +`__ +on details on how to do this. diff --git a/d3m/docs/reference.rst b/d3m/docs/reference.rst new file mode 100644 index 0000000..2fcfdb4 --- /dev/null +++ b/d3m/docs/reference.rst @@ -0,0 +1,9 @@ +.. _api-reference: + +API reference +------------- + +.. toctree:: + :maxdepth: 2 + + d3m diff --git a/d3m/docs/repostructure.rst b/d3m/docs/repostructure.rst new file mode 100644 index 0000000..f4efc61 --- /dev/null +++ b/d3m/docs/repostructure.rst @@ -0,0 +1,17 @@ +:orphan: + +.. _repostructure: + +Repository structure +-------------------- + +``master`` branch contains latest stable release of the package. +``devel`` branch is a staging branch for the next release. + +Releases are +`tagged `__. + +Contributing +~~~~~~~~~~~~~ + +See the repo's `CODE_STYLE.md `__ document for our coding style and contribution guide. Please ensure any merge requests you open follow this guide. diff --git a/d3m/docs/tutorial.rst b/d3m/docs/tutorial.rst new file mode 100644 index 0000000..4f3993b --- /dev/null +++ b/d3m/docs/tutorial.rst @@ -0,0 +1,493 @@ +Advanced Tutorial +================= + +This tutorial assumes the reader is familiar with d3m ecosystem in general. +If not, please refer to other sections of `documentation`_ first, e.g., +:ref:`quickstart`. + +.. _documentation: https://docs.datadrivendiscovery.org + +Overview of building a primitive +-------------------------------- + +1. :ref:`Recognize the base class of a primitive `. + +2. :ref:`Identify the input and output container types `. + +3. :ref:`Define metadata for each primitive `. + +4. :ref:`Write a unit test to verify the primitive functions `. + +5. :ref:`Generate the primitive annotation for the primitive `. + +6. :ref:`Write pipeline for demonstrating primitive functionality `. + +7. :ref:`Advanced: Primitive might use static files `. + +.. _primitive-class: + +Primitive class +--------------- + +There are a variety of :py:mod:`primitive interfaces/classes ` available. As an example, +a primitive doing just attribute extraction without requiring any fitting, a :py:class:`~d3m.primitive_interfaces.transformer.TransformerPrimitiveBase` +from :py:mod:`~d3m.primitive_interfaces.transformer` module can be used. + +Each primitives can have it's own :py:mod:`hyper-parameters `. Some example hyper-parameter types one can use to describe +primitive's hyper-parameters are: :py:class:`~d3m.metadata.hyperparams.Constant`, :py:class:`~d3m.metadata.hyperparams.UniformBool`, +:py:class:`~d3m.metadata.hyperparams.UniformInt`, :py:class:`~d3m.metadata.hyperparams.Choice`, :py:class:`~d3m.metadata.hyperparams.List`. + +Also, each hyper-parameter should be defined as one or more of the four :ref:`hyper-parameter semantic types `: + +* `https://metadata.datadrivendiscovery.org/types/TuningParameter `__ +* `https://metadata.datadrivendiscovery.org/types/ControlParameter `__ +* `https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter `__ +* `https://metadata.datadrivendiscovery.org/types/MetafeatureParameter `__ + +Example +~~~~~~~ + +.. code:: python + + from d3m.primitive_interfaces import base, transformer + from d3m.metadata import base as metadata_base, hyperparams + + __all__ = ('ExampleTransformPrimitive',) + + + class Hyperparams(hyperparams.Hyperparams): + learning_rate = hyperparams.Uniform(lower=0.0, upper=1.0, default=0.001, semantic_types=[ + 'https://metadata.datadrivendiscovery.org/types/TuningParameter', + ]) + clusters = hyperparams.UniformInt(lower=1, upper=100, default=10, semantic_types=[ + 'https://metadata.datadrivendiscovery.org/types/TuningParameter', + ]) + + + class ExampleTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + The docstring is very important and must to be included. It should contain + relevant information about the hyper-parameters, primitive functionality, etc. + """ + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + pass + +.. _input-output-types: + +Input/Output types +------------------ + +The acceptable inputs/outputs of a primitive must be pre-defined. D3M supports a variety of +standard input/output :ref:`container types ` such as: + +- ``pandas.DataFrame`` (as :py:class:`d3m.container.pandas.DataFrame`) + +- ``numpy.ndarray`` (as :py:class:`d3m.container.numpy.ndarray`) + +- ``list`` (as :py:class:`d3m.container.list.List`) + +.. note:: + Even thought D3M container types behave mostly as standard types, the D3M container types must be used for inputs/outputs, because D3M container types support D3M metadata. + +Example +~~~~~~~ + +.. code:: python + + from d3m import container + + Inputs = container.DataFrame + Outputs = container.DataFrame + + + class ExampleTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + ... + +.. note:: + When returning the output DataFrame, its metadata should be updated with the correct semantic and structural types. + +Example +~~~~~~~ + +.. code:: python + + # Update metadata for each DataFrame column. + for column_index in range(outputs.shape[1]): + column_metadata = {} + column_metadata['structural_type'] = type(1.0) + column_metadata['name'] = "column {i}".format(i=column_index) + column_metadata["semantic_types"] = ("http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/Attribute",) + outputs.metadata = outputs.metadata.update((metadata_base.ALL_ELEMENTS, column_index), column_metadata) + +.. _tutorial-primitive-metadata: + +Primitive Metadata +------------------ + +It is very crucial to define :ref:`primitive metadata ` for the primitive properly. +Primitive metadata can be used by TA2 systems to metalearn about primitives and in general decide which primitive to use when. + +Example +~~~~~~~ + +.. code:: python + + from d3m.primitive_interfaces import base, transformer + from d3m.metadata import base as metadata_base, hyperparams + + __all__ = ('ExampleTransformPrimitive',) + + class ExampleTransformPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + Docstring. + """ + + metadata = metadata_base.PrimitiveMetadata({ + 'id': , + 'version': , + 'name': , + 'python_path': 'd3m.primitives.<>.<>.<>' # Must match path in setup.py, + 'source': { + 'name': , + 'uris': [], + 'contact': 'mailto:' + }, + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+@{git_commit}#egg='.format( + git_commit=d3m_utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + 'algorithm_types': [ + # Check https://metadata.datadrivendiscovery.org/devel/?definitions#definitions.algorithm_types for all available algorithm types. + # If algorithm type s not available a Merge Request should be made to add it to core package. + metadata_base.PrimitiveAlgorithmType., + ], + # Check https://metadata.datadrivendiscovery.org/devel/?definitions#definitions.primitive_family for all available primitive family types. + # If primitive family is not available a Merge Request should be made to add it to core package. + 'primitive_family': metadata_base.PrimitiveFamily. + }) + + ... + +.. _unit-tests: + +Unit tests +---------- + +Once the primitives are constructed, unit testing must be done to see if the +primitive works as intended. + +**Sample Setup** + +.. code:: python + + import os + import unittest + + from d3m.container import dataset + from d3m.metadata import base as metadata_base + from common_primitives import dataset_to_dataframe + + from example_primitive import ExampleTransformPrimitive + + + class ExampleTransformTest(unittest.TestCase): + def test_happy_path(): + # Load a dataset. + # Datasets can be obtained from: https://datasets.datadrivendiscovery.org/d3m/datasets + base_path = '../datasets/training_datasets/seed_datasets_archive/' + dataset_doc_path = os.path.join(base_path, '38_sick_dataset', 'datasetDoc.json') + dataset = dataset.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + dataframe_hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() + dataframe_primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=dataframe_hyperparams_class.defaults()) + dataframe = dataframe_primitive.produce(inputs=dataset).value + + # Call example transformer. + hyperparams_class = SampleTransform.metadata.get_hyperparams() + primitive = SampleTransform(hyperparams=hyperparams_class.defaults()) + test_out = primitive.produce(inputs=dataframe).value + + # Write assertions to make sure that the output (type, shape, metadata) is what is expected. + self.assertEqual(...) + + ... + + + if __name__ == '__main__': + unittest.main() + +It is recommended to do the testing inside the D3M Docker container: + +.. code:: shell + + docker run --rm -v /home/foo/d3m:/mnt/d3m -it \ + registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 + cd /mnt/d3m/example_primitive + python3 primitive_name_test.py + +.. _primitive-annotation: + +Primitive annotation +-------------------- + +Once primitive is constructed and unit testing is successful, the +final step in building a primitive is to generate the primitive annotation +which will be indexed and used by D3M. + +.. code:: shell + + docker run --rm -v /home/foo/d3m:/mnt/d3m -it \ + registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 + cd /mnt/d3m/example_primitive + pip3 install -e . + python3 -m d3m index describe -i 4 + +Alternatively, a `helper script `__ +can be used to generate primitive annotations as well. +This can be more convenient when having to manage multiple primitives. +In this case, generating the primitive annotation is done as follows: + +.. code:: shell + + docker run --rm -v /home/foo/d3m:/mnt/d3m -it \ + registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 + cd /mnt/d3m/example_primitive + pip3 install -e . + python3 generate-primitive-json.py ... + +.. _example-pipeline: + +Example pipeline +---------------- + +After building custom primitives, it has to be used in an example pipeline and run using one of +D3M seed datasets in order to be integrated with other indexed D3M primitives. + +The essential elements of pipelines are: + +``Dataset Denormalizer -> Dataset Parser -> Data Cleaner (If necessary) -> Feature Extraction -> Classifier/Regressor -> Output`` + +An example code of building pipeline is shown below: + +.. code:: python + + # D3M dependencies + from d3m import index + from d3m.metadata.base import ArgumentType + from d3m.metadata.pipeline import Pipeline, PrimitiveStep + + # Common Primitives + from common_primitives.column_parser import ColumnParserPrimitive + from common_primitives.dataset_to_dataframe import DatasetToDataFramePrimitive + from common_primitives.extract_columns_semantic_types import ExtractColumnsBySemanticTypesPrimitive + + # Testing primitive + from quickstart_primitives.sample_primitive1.input_to_output import InputToOutputPrimitive + + # Pipeline + pipeline = Pipeline() + pipeline.add_input(name='inputs') + + # Step 0: DatasetToDataFrame (Dataset Denormalizer) + step_0 = PrimitiveStep(primitive_description=DatasetToDataFramePrimitive.metadata.query()) + step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') + step_0.add_output('produce') + pipeline.add_step(step_0) + + # Step 1: Custom primitive + step_1 = PrimitiveStep(primitive=InputToOutputPrimitive) + step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') + step_1.add_output('produce') + pipeline.add_step(step_1) + + # Step 2: Column Parser (Dataset Parser) + step_2 = PrimitiveStep(primitive_description=ColumnParserPrimitive.metadata.query()) + step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') + step_2.add_output('produce') + pipeline.add_step(step_2) + + # Step 3: Extract Attributes (Feature Extraction) + step_3 = PrimitiveStep(primitive_description=ExtractColumnsBySemanticTypesPrimitive.metadata.query()) + step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') + step_3.add_output('produce') + step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'] ) + pipeline.add_step(step_3) + + # Step 4: Extract Targets (Feature Extraction) + step_4 = PrimitiveStep(primitive_description=ExtractColumnsBySemanticTypesPrimitive.metadata.query()) + step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') + step_4.add_output('produce') + step_4.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'] ) + pipeline.add_step(step_4) + + attributes = 'steps.3.produce' + targets = 'steps.4.produce' + + # Step 6: Imputer (Data Cleaner) + step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_cleaning.imputer.SKlearn')) + step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) + step_5.add_output('produce') + pipeline.add_step(step_5) + + # Step 7: Classifier + step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.classification.decision_tree.SKlearn')) + step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') + step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets) + step_6.add_output('produce') + pipeline.add_step(step_6) + + # Final Output + pipeline.add_output(name='output predictions', data_reference='steps.6.produce') + + # print(pipeline.to_json()) + with open('./pipeline.json', 'w') as write_file: + write_file.write(pipeline.to_json(indent=4, sort_keys=False, ensure_ascii=False)) + +Once pipeline is constructed and the pipeline's JSON file is generated, the pipeline is run using +``python3 -m d3m runtime`` command. +Successfully running the pipeline validates that the primitive is working as intended. + +.. code:: shell + + docker run --rm -v /home/foo/d3m:/mnt/d3m -it \ + registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 \ + /bin/bash -c "cd /mnt/d3m; \ + pip3 install -e .; \ + cd pipelines; \ + python3 -m d3m runtime fit-produce \ + --pipeline pipeline.json \ + --problem /datasets/seed_datasets_current/38_sick/TRAIN/problem_TRAIN/problemDoc.json \ + --input /datasets/seed_datasets_current/38_sick/TRAIN/dataset_TRAIN/datasetDoc.json \ + --test-input /datasets/seed_datasets_current/38_sick/TEST/dataset_TEST/datasetDoc.json \ + --output 38_sick_results.csv \ + --output-run pipeline_run.yml; \ + exit" + +.. _static-files: + +Advanced: Primitive with static files +------------------------------------- + +When building primitives that uses external/static files i.e. pre-trained weights, the +metadata for the primitive must be properly define such dependency. +The static file can be hosted anywhere based on your preference, as long as the URL to the file is a direct download link. It must +be public so that users of your primitive can access the file. Be sure to keep the URL available, as +the older version of the primitive could potentially start failing if URL stops resolving. + +.. note:: + Full code of this section can be found in the `quickstart repository `__. + +Below is a description of primitive metadata definition required, named ``_weights_configs`` for +each static file. + +.. code:: python + + _weights_configs = [{ + 'type': 'FILE', + 'key': '', + 'file_uri': '', + 'file_digest':'sha256sum of the ', + }] + + +This ``_weights_configs`` should be directly added to the ``INSTALLATION`` field of the primitive metadata. + +.. code:: python + + from d3m.primitive_interfaces import base, transformer + from d3m.metadata import base as metadata_base, hyperparams + + __all__ = ('ExampleTransform',) + + class ExampleTransform(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + Docstring. + """ + + _weights_configs = [{ + 'type': 'FILE', + 'key': '', + 'file_uri': '', + 'file_digest':'sha256sum of the ', + }] + + metadata = ... + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+@{git_commit}#egg='.format( + git_commit=d3m_utils.current_git_commit(os.path.dirname(__file__)), + ), + }] + _weights_configs, + ... + + ... + +After the primitive metadata definition, it is important to include code to return the path of files. +An example is given as follows: + +.. code:: python + + def _find_weights_path(self, key_filename): + if key_filename in self.volumes: + weight_file_path = self.volumes[key_filename] + else: + weight_file_path = os.path.join('.', self._weights_configs['file_digest'], key_filename) + + if not os.path.isfile(weight_file_path): + raise ValueError( + "Can't get weights file from volumes by key '{key_filename}' and at path '{path}'.".format( + key_filename=key_filename, + path=weight_file_path, + ), + ) + + return weight_file_path + +In this example code, ``_find_weights_path`` method will try to find the static files from volumes based on weight file key. +If it cannot be found (e.g., runtime was not provided with static files), then it looks into the current directory. +The latter fallback is useful during development. + +To run a pipeline with such primitive, you have to download static files and provide them to the runtime: + +.. code:: shell + + docker run --rm -v /home/foo/d3m:/mnt/d3m -it \ + registry.gitlab.com/datadrivendiscovery/images/primitives:ubuntu-bionic-python36-v2020.1.9 \ + /bin/bash -c "cd /mnt/d3m; \ + pip3 install -e .; \ + cd pipelines; \ + mkdir /static + python3 -m d3m index download -p d3m.primitives.path.of.Primitive -o /static; \ + python3 -m d3m runtime --volumes /static fit-produce \ + --pipeline feature_pipeline.json \ + --problem /datasets/seed_datasets_current/22_handgeometry/TRAIN/problem_TRAIN/problemDoc.json \ + --input /datasets/seed_datasets_current/22_handgeometry/TRAIN/dataset_TRAIN/datasetDoc.json \ + --test-input /datasets/seed_datasets_current/22_handgeometry/TEST/dataset_TEST/datasetDoc.json \ + --output 22_handgeometry_results.csv \ + --output-run feature_pipeline_run.yml; \ + exit" + +The static files will be downloaded and stored locally based on ``file_digest`` of ``_weights_configs``. +In this way we don't duplicate same files used by multiple primitives: + +.. code:: shell + + mkdir /static + python3 -m d3m index download -p d3m.primitives.path.of.Primitive -o /static + +``-p`` optional argument to download static files for a particular primitive, matching on its Python path. +``-o`` optional argument to download the static files into a common folder. If not provided, they are +downloaded into the current directory. + +After the download, the file structure is given as follows:: + + /static/ + / + + / + + ... + ... diff --git a/d3m/entry_points.ini b/d3m/entry_points.ini new file mode 100644 index 0000000..88e433b --- /dev/null +++ b/d3m/entry_points.ini @@ -0,0 +1,2 @@ +[d3m.primitives] +evaluation.compute_scores.Core = d3m.contrib.primitives.compute_scores:ComputeScoresPrimitive diff --git a/d3m/oldest_dependencies.py b/d3m/oldest_dependencies.py new file mode 100755 index 0000000..11acbab --- /dev/null +++ b/d3m/oldest_dependencies.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 + +import pkg_resources + +package = pkg_resources.working_set.by_key['d3m'] + +oldest_dependencies = [] + +for requirement in package.requires(): + dependency = requirement.project_name + if requirement.extras: + dependency += '[' + ','.join(requirement.extras) + ']' + for comparator, version in requirement.specs: + if comparator == '==': + if len(requirement.specs) != 1: + raise ValueError('Invalid dependency: {requirement}'.format(requirement=requirement)) + dependency += '==' + version + elif comparator == '<=': + if len(requirement.specs) != 2: + raise ValueError('Invalid dependency: {requirement}'.format(requirement=requirement)) + elif comparator == '>=': + dependency += '==' + version + + oldest_dependencies.append(dependency) + +for dependency in oldest_dependencies: + print(dependency) diff --git a/d3m/run_benchmarks.sh b/d3m/run_benchmarks.sh new file mode 100755 index 0000000..092baa4 --- /dev/null +++ b/d3m/run_benchmarks.sh @@ -0,0 +1,18 @@ +#!/bin/bash -e + +if ! git remote get-url upstream > /dev/null 2>&1 ; then + git remote add upstream https://gitlab.com/datadrivendiscovery/d3m.git +fi +git fetch upstream + +asv machine --yes --config tests/asv.conf.json + +ASV_OUTPUT=$(asv continuous upstream/devel HEAD -s -f 1.1 -e --config tests/asv.conf.json) +echo "$ASV_OUTPUT" + +if echo "$ASV_OUTPUT" | egrep -q "(SOME BENCHMARKS HAVE CHANGED SIGNIFICANTLY)|( failed$)" ; then + echo "Benchmarks have errors." + exit 1 +else + echo "Benchmarks ran without errors." +fi diff --git a/d3m/run_tests.py b/d3m/run_tests.py new file mode 100755 index 0000000..16c264a --- /dev/null +++ b/d3m/run_tests.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 + +import sys +import unittest + +runner = unittest.TextTestRunner(verbosity=1) + +tests = unittest.TestLoader().discover('tests') + +if not runner.run(tests).wasSuccessful(): + sys.exit(1) diff --git a/d3m/setup.cfg b/d3m/setup.cfg new file mode 100644 index 0000000..b6a8bc3 --- /dev/null +++ b/d3m/setup.cfg @@ -0,0 +1,25 @@ +[pycodestyle] +max-line-length = 200 + +[metadata] +description-file = README.md + +[mypy] +warn_redundant_casts = True +# TODO: Enable back once false positives are fixed. +# See: https://github.com/python/mypy/issues/4412 +#warn_unused_ignores = True +warn_unused_configs = True +disallow_untyped_defs = True + +# TODO: Remove once this is fixed: https://github.com/python/mypy/issues/4300 +[mypy-d3m.container.list] +ignore_errors = True + +# TODO: Remove once this is fixed: https://github.com/python/mypy/issues/4300 +[mypy-d3m.metadata.hyperparams] +ignore_errors = True + +# TODO: Remove once this is fixed: https://github.com/python/mypy/pull/4384#issuecomment-354033177 +[mypy-d3m.primitive_interfaces.distance] +ignore_errors = True diff --git a/d3m/setup.py b/d3m/setup.py new file mode 100644 index 0000000..b79da82 --- /dev/null +++ b/d3m/setup.py @@ -0,0 +1,87 @@ +import os +import os.path +import sys +from setuptools import setup, find_packages + +PACKAGE_NAME = 'd3m' +MINIMUM_PYTHON_VERSION = 3, 6 + + +def check_python_version(): + """Exit when the Python version is too low.""" + if sys.version_info < MINIMUM_PYTHON_VERSION: + sys.exit("Python {}.{}+ is required.".format(*MINIMUM_PYTHON_VERSION)) + + +def read_package_variable(key): + """Read the value of a variable from the package without importing.""" + module_path = os.path.join(PACKAGE_NAME, '__init__.py') + with open(module_path) as module: + for line in module: + parts = line.strip().split(' ') + if parts and parts[0] == key: + return parts[-1].strip("'") + raise KeyError("'{0}' not found in '{1}'".format(key, module_path)) + + +def read_readme(): + with open(os.path.join(os.path.dirname(__file__), 'README.md'), encoding='utf8') as file: + return file.read() + + +def read_entry_points(): + with open('entry_points.ini') as entry_points: + return entry_points.read() + + +check_python_version() +version = read_package_variable('__version__') +description = read_package_variable('__description__') +author = read_package_variable('__author__') + +setup( + name=PACKAGE_NAME, + version=version, + description=version, + author=author, + packages=find_packages(exclude=['contrib', 'docs', 'site', 'tests*']), + package_data={'d3m': ['metadata/schemas/*/*.json', 'contrib/pipelines/*']}, + data_files=[('./', ['./entry_points.ini'])], + install_requires=[ + 'scikit-learn[alldeps]>=0.20.3,<=0.22.2.post1', + 'pytypes==1.0b5', + 'frozendict==1.2', + 'numpy>=1.15.4,<=1.18.2', + 'jsonschema>=3.0.2,<=3.2.0', + 'requests>=2.19.1,<=2.23.0', + 'strict-rfc3339==0.7', + 'rfc3987==1.3.8', + 'webcolors>=1.8.1,<=1.11.1', + 'dateparser>=0.7.0,<=0.7.2', + 'python-dateutil==2.8.1', + 'pandas>=0.23.4,<=1.0.3', + 'typing-inspect==0.5.0', + 'GitPython==3.1.0', + 'jsonpath-ng==1.4.3', + 'custom-inherit>=2.2.0,<=2.2.2', + 'PyYAML>=5.1,<=5.3', + 'pycurl>=7.43.0.2,<=7.43.0.5', + 'pyarrow>=0.15.1,<=0.16.0', + 'gputil>=1.3.0,<=1.4.0', + 'pyrsistent>=0.14.11,<=0.15.7', + 'scipy>=1.2.1,<=1.4.1', + 'openml==0.10.1', + ], + tests_require=[ + 'asv==0.3.1', + 'docker[tls]==2.7', + ], + entry_points=read_entry_points(), + url='https://gitlab.com/datadrivendiscovery/d3m', + long_description=read_readme(), + long_description_content_type='text/markdown', + license='Apache-2.0', + classifiers=[ + 'License :: OSI Approved :: Apache Software License', + ], +) diff --git a/d3m/site/.gitignore b/d3m/site/.gitignore new file mode 100644 index 0000000..ef5c05b --- /dev/null +++ b/d3m/site/.gitignore @@ -0,0 +1,4 @@ +static/bundle.js +static/bundle.css +static/fonts +node_modules diff --git a/d3m/site/Makefile b/d3m/site/Makefile new file mode 100644 index 0000000..4de816f --- /dev/null +++ b/d3m/site/Makefile @@ -0,0 +1,14 @@ +default: static/bundle.js static/bundle.css static/fonts + +clean: + rm -f static/bundle.js static/bundle.css + rm -rf static/fonts + +static/bundle.js: client.js html_construction.js package-lock.json + ./node_modules/.bin/browserify -d -t [ babelify --presets [ env ] ] ./client.js > $@ + +static/bundle.css: client.less package-lock.json + ./node_modules/.bin/lessc ./client.less $@ + +static/fonts: + cp -r node_modules/font-awesome/fonts static/fonts diff --git a/d3m/site/build_site.sh b/d3m/site/build_site.sh new file mode 100755 index 0000000..e6bd7ee --- /dev/null +++ b/d3m/site/build_site.sh @@ -0,0 +1,47 @@ +#!/bin/bash -e + +# Builds sites for schemas. For each tag and `devel` branch a separate site is built. + +deploy () { + if [ ! -d site ] || [ ! -e site/package.json ] + then + return 0 + fi + + cd site + npm install + make + cd .. + + # Copying results into output directory "public". + cp -a site/static public/$1 + rm -f public/$1/schemas + cp -a d3m/metadata/schemas public/$1/schemas + + # Cleaning. + cd site + make clean + rm -fr node_modules + cd .. + + # Reverting changes, "package-lock.json" might be changed. + git checkout -- . +} + +rm -rf public +mkdir public + +git checkout devel +cp -a d3m/metadata/schemas public/schemas +deploy devel + +while read -r -a line +do + IFS='/' read -r -a parts <<< ${line[1]} + + if [[ ${parts[-1]} == v* ]] + then + git checkout ${line[0]} + deploy ${parts[-1]} + fi +done <<< $(git show-ref --tags) diff --git a/d3m/site/build_site_types.py b/d3m/site/build_site_types.py new file mode 100644 index 0000000..178fb4d --- /dev/null +++ b/d3m/site/build_site_types.py @@ -0,0 +1,284 @@ +""" +Constructs sites for ``semantic types``, site, which hierarchically displays all types, and site, which +lists all available versions of schemas. + +Sites are placed under ``types`` folder inside ``public`` folder, which should exist at the root of the repository. +""" + +import json +import os +import typing +from shutil import copyfile + +from pyquery import PyQuery +from yattag import Doc + +PREFIX = 'https://metadata.datadrivendiscovery.org/types/' + +types = {} + + +def cycle_detection(url: str, past_urls: typing.List[str]) -> None: + """ + Detects cycle in semantic types' hierarchy. + + Also checks if referenced urls in ``parents`` exist. + + Parameters + ---------- + url : str + URL of the semantic type that is to be analyzed. + past_urls : typing.List[str] + List of previously called urls. + """ + + global types + + if url not in types: + raise Exception("Cannot find referenced semantic type '{url}'".format(url=url)) + if url in past_urls: + raise Exception("Cycle in semantic types hierarchy. Cycle: '{cycle}'".format( + cycle=(' -> '.join(past_urls + [url])) + )) + + for parent in types[url]['parents']: + cycle_detection(parent, past_urls + [url]) + + +def template(tag, line): + """ + Generates HTML base for the site. + + Yields the result, so HTML end brackets (e.g. ````) are not closed. + + Usage:: + + for temp in template(tag, line): + ... + + Parameters + ---------- + tag : yattag.tag + ``tag`` from the ``yattag`` module. + line : yattag.line + ``line`` from the ``yattag`` module. + + Returns + ------- + Element of the ``yattag`` module representing container of the page. + """ + + global types + + with tag('html'): + with tag('head'): + line('title', "D3M Metadata") + line('meta', '', charset='utf-8') + line('meta', '', name='viewport', content='width=device-width, initial-scale=1') + line('link', '', rel='stylesheet', href='/schema-org.css') + with tag('body'): + with tag('div', id='container'): + with tag('div', id='intro'): + with tag('div', id='pageHeader'): + with tag('div', klass='wrapper'): + with tag('div', id='sitename'): + with tag('h1'): + line('a', "metadata.datadrivendiscovery.org", href='/') + with tag('div', id='selectionbar'): + with tag('div', klass='wrapper'): + with tag('ul'): + with tag('li'): + line('a', "Types", href='/types') + with tag('li'): + line('a', "Schemas", href='/devel') + with tag('div', id='mainContent'): + yield + + +def construct_types(site, parent: str) -> None: + """ + Constructs hierarchy displayed semantic types at ``/types/`` path. + + More specifically, constructs list (HTML ``