From e519729fc13cf3ecd7b05c4a697905ec66fbb9b8 Mon Sep 17 00:00:00 2001 From: Mia_Wan Date: Mon, 28 Sep 2020 18:59:52 -0500 Subject: [PATCH] add ensemble methods to pipeline Former-commit-id: 0aac8ed31d1381e4f004cf99865319860ca87d5b [formerly 54095e5d5f783509d3f65c0aa54f152a51027f1f] [formerly 5f0e21e20057623725dd40166034d3a5be5518a1 [formerly 9702ef05e74eacade359f6c7869df9ee4b222119]] [formerly 08c89fc468ab7e8ef15d4c4cf938392f1e85c970 [formerly 62d490094cf95c97d0037dd20585e4bb26803dc4] [formerly ea2a04f2d73ac554e58272543f35ec0464938e3d [formerly e9198d0f199cd05940f00d733ec4fdd272965cda]]] [formerly b5781dc6ef889ac8522d08042ee44cfbee0317ab [formerly 2ced0513ec35c0aef314ddbedda681d8097e1231] [formerly 116f6170638d1a74e70d3b3bfbbd796a343fd742 [formerly 7b649b3cd25b04511043dd7aa92abb1e497b2584]] [formerly 0fa911701489735b3ad535bb814bf3f307edc97a [formerly 9c32f76780f10f265945600066991228151a7fa5] [formerly 23897889c507561764f6163b87392ffc9dfa02b4 [formerly bfce13eeab463edf1367cbd3f8848948455dd05a]]]] [formerly e88ebc108f6c85314ce9327c12ae7fe2e2299b0c [formerly 8e0da546a61bc6e78dc11938ba27168880608992] [formerly 5c399fc9e256134bbc1b0d35477a122e4b929e4b [formerly 79e41ea33e085160a38e9b4dc0af178b820cb352]] [formerly aa1974a08b06c80e12b514522ee7de54d951eb44 [formerly 823f9638a95d59bb0caeb105ebd4dd6a3346f0db] [formerly c617b22e673c9a37ea803222de8a6ee61dad6d7f [formerly ecb36514a046dcf42044bcde2d71818be742c4cf]]] [formerly 0010947336182d4fa355d4b25f68bd809abfc551 [formerly d119c90230e3d6cd2ffeaf60111c9150a8c785ad] [formerly 52bb0ce978c11cad71fa2d486fccf79604cab0a6 [formerly 4026124a7911a1c0c15ab3e644e3b91ba86a232c]] [formerly 35f7ba2d2e400ec85b2cd387b8d8491150b98baf [formerly 713166e02907ab9e6a0f553325f579d3b2fb8fd9] [formerly a624cee295efeb267b69ccd8d42d33f17f5ca9d3 [formerly 4929310a7fb3989bc73b30a0a8b9b3ec0c866fdb]]]]] [formerly 74a828bb119d4cec0b64bd17b51eba5c9997ccc1 [formerly e7f0c2e563fc752b17d7e6676c69bb7135a69d2e] [formerly 490afa890cf04a4439affe041fa30c22ecae7197 [formerly 9206b73b902d23add51f96c169420b5d61aa438a]] [formerly 8ca01b1230cd445d765661d848f52fcd0c8ea4f4 [formerly 15e949f7883736b846deb45a96c6601b008cad25] [formerly baf4e0150a508e48e468ad7accc11740a251ae88 [formerly 04053bae356fcc22a5713349f4a6b815b172502c]]] [formerly 8a9428f968dd4a5d42ecf6583101b94354137a16 [formerly db62e7894dc2396687b07b6ae5fd61906ff3367c] [formerly 715b0046f38cda32cc3b5714c5a4f0aece67c7c1 [formerly 59b2aed4356373cf2381b830faff243ccef7e753]] [formerly 635186ce7b6483d68cb9887a215c876c70faaf37 [formerly cc0cf93bb6c251a24d7b03eeb728c8ffd73bc7c9] [formerly 9ca2a4e0062859b569e2d60dfd80667b43146149 [formerly c3d79d1cd0ee414afa2261ccc4df20511405f581]]]] [formerly f46290d5829626ff82494814ee44fd98b2085339 [formerly 47160128393bd60f9a1d8638f95360b98a10a20b] [formerly 4ba202f0e0c520b882ed1c594574ac05809189b8 [formerly 3856e5eb2a20cf86e2b98555f72ed04a5b4f2f53]] [formerly deb75358b752e5a5c17026d8ebd69f2f0cc871d0 [formerly 0d99e8fa0ba1d493a73854991fd379e89c9d4054] [formerly 3ce5b6f093bb359453ae5626a601cfff9e7b1b3f [formerly a61c4bb9defe0445927e6336563ef73f891b3131]]] [formerly c7039765bb515347b63e084885181eb9c8f97be4 [formerly dadcd9915b042328705d9a588d4d421cbb9387bd] [formerly 08a186707e6397400514f3f1685b8687f1084da8 [formerly d1bf05d7e97208fb21175e485a254af496725ba6]] [formerly fcb25900861e20681ddaddb2b194dc8f7f0c4aed [formerly 5dfb75e6edae71fdec674f92512982e64a9ac407] [formerly 9308f51cbc1c312b5fdb96b9d4ecf2569390166e [formerly 427e33c78cedd47b9aad0546175162b96a02ec1e]]]]]] Former-commit-id: e8ef4326afd2a2004249ef02bc668047ecd73c90 [formerly 380372c0fb4721a911f4b66dbd471d660b6f980d] [formerly 93871265c8f1699003d49dc65e2d8b05ef9fea17 [formerly 3aff99cde7fd7d0b53f5d14c6c44cecd3e20a4aa]] [formerly 6d892c0d2408e77c97d69276f6320b1d453ecbfc [formerly 1d8eb6d2cceb8a824c91a2123c645d5f7a17a85a] [formerly 16f625b708f48dab65db48b013cbfc7f0978ced7 [formerly dc1f8c55bfef69f6a6f358df762efdc326cf19bf]]] [formerly ca9d83ffa38a6a07480ea6f905f99afac5c1eda4 [formerly c09a6d106b4025d8511b96120336e26c5a969c99] [formerly 2e3317f357bd1c17b7bce3ef6906e2440167dbde [formerly 977d8b30299b9dd4c74561012cd053281e4c05e1]] [formerly 370448792d4562b97f5f79e53153686c2c931c5e [formerly f92f5e8e9b77f458b7d08a7a778ba083ec5e142a] [formerly f74e823892ab8f85ebeefd33e7f8ede669977889 [formerly 83cc7e2d6c0aa6c07b10e1aea3aeb848e1740449]]]] [formerly 7abc24c7a718ee8ea67421149a92b363c0d1fa42 [formerly 27d525a99e7f3d32d12cf0c949d3e2563afc475e] [formerly bc6b671a0eea1e33aa5ff4af41ba185adeeb27dd [formerly a9ea335e3327434d3e1b1ab47e1ef71841c86f3c]] [formerly b00a5949fff3bb307f54d7b38bdaf034fb206fbb [formerly 152bf4c388643ca0b87e59ee5417bfee967850b0] [formerly 135e8bf5311165f6a8014a40f2dbf62eb63efbbe [formerly 90cbe275657fcd641b6eb0794502e82321d8296b]]] [formerly 8ba2b02175ce239116f8d94dce6e3c11b9d535d9 [formerly e5117f2da480a914b0a8c85731155798b1767dc3] [formerly 32ad600dd454d8b8ce3a9d4380ba81ccd771e4a9 [formerly f92251497eb8540e663fb374b32c110db3410532]] [formerly e224e6dbc32de985189834fc8be0b9555e8bbc56 [formerly c0a682e0186e057f8ded9e118353603ba09c892b] [formerly 9308f51cbc1c312b5fdb96b9d4ecf2569390166e]]]] Former-commit-id: c15d9ba7fe9dbca1eefba7df3273116396653efc [formerly b607deb29404b8ab220a3170fc47f5a93b89fd89] [formerly 434d0e154bc445ecc80c410ab0b54fcb59adfd6b [formerly cb55cd1547cdb67f1c71c938f64e9f606079620a]] [formerly b51280175c07e174f8dca4ae4207a1c3d248a6f2 [formerly c1a7fc5a7bac8d4c823e9ba52e730f2758b1aeee] [formerly 64397fd023ae2955cf55467711adb763ca9aaef3 [formerly cc7b7afc7591e8f00a730857a911e6744c0c9ce5]]] [formerly 4549e6b1261c6fb6013040b7cd67eba156a01276 [formerly 9a5f6e86b26baf49bdf7d1519d1e8a5fdba6a4b1] [formerly 468b0a7b5974db8f21c35ef495b3fcb55fc96a48 [formerly f58d56871939f0b421a984ab29e5ca7754dd74c4]] [formerly cd891c7725ba25ac9f4c1a5b95890aaa0311e4c3 [formerly f5c3a4daf67e4573dc36ed0566110b4ded55b7fa] [formerly a5236160c8444e4a0d430fbaf9848570c1ecf355 [formerly 31e5383ba109f9c312ea2d7d25a414b48f506a5e]]]] Former-commit-id: dd6fde427a196cbf9944e856a9acb83d14b47b99 [formerly e6b828625d2211ca22e28893077588d3afbb721c] [formerly 20376456d0cdd17c09b508f166dd5f75d5b85ed2 [formerly 2f19f94b6c22969a3a1fdb3e23ef7d08bfb53e16]] [formerly 9b23f2e0690d7e61fc8184cecfd6c90b24ebd877 [formerly 872ac94733a4542f855eedc36ca0e5511199792a] [formerly f8325526d6554d4ba4887af1f35e799e66b7824d [formerly 18439b2511dc9fb2a891c3719b54cebf24b58be8]]] Former-commit-id: 3364cf523237ebd228f2507012b6c148e38affdf [formerly 084c4f4f61e99e1d9258726dd47bb77b48562f6a] [formerly 1c7610b381bd42c79869266d44f8a497dc1b7fcd [formerly 0b10d322cc8ec254bab447b52c1342dc1378009f]] Former-commit-id: 8ba5d340805fa5a10f185f80878a77b6368ce09c [formerly 69b539f6c0d8c53c62836e54a26bb5be161858a1] Former-commit-id: 1d3c2449e209a495daabfed250c5d7bb62bb49d2 --- examples/build_Ensemble.py | 72 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 examples/build_Ensemble.py diff --git a/examples/build_Ensemble.py b/examples/build_Ensemble.py new file mode 100644 index 0000000..8534676 --- /dev/null +++ b/examples/build_Ensemble.py @@ -0,0 +1,72 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: extract_columns_by_semantic_types(attributes) +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/Attribute']) +pipeline_description.add_step(step_2) + +# Step 3: extract_columns_by_semantic_types(targets) +step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_3.add_output('produce') +step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) +pipeline_description.add_step(step_3) + +attributes = 'steps.2.produce' +targets = 'steps.3.produce' + +# Step 4: auto encoder +step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae')) +step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) +step_4.add_output('produce_score') +step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=[0,1,2]) +step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +pipeline_description.add_step(step_4) + +# Step 5: ensemble +step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.Ensemble')) +step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce_score') +step_5.add_output('produce') +pipeline_description.add_step(step_5) + + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') + +# Output to YAML +#yaml = pipeline_description.to_yaml() +#with open('pipeline.yml', 'w') as f: +# f.write(yaml) +#prin(yaml) + +# Output to json +data = pipeline_description.to_json() +with open('example_pipeline.json', 'w') as f: + f.write(data) + print(data)