Former-commit-id:master94043c2300
[formerlyb45733a1d0
] [formerlyd5042bb136
[formerly51bb45768e
]] [formerly95ffadd6d8
[formerlye90788f729
] [formerly4baa1bbd36
[formerlye7c713266c
]]] [formerly772bf095ea
[formerlyd03cf02caa
] [formerlyfea615997e
[formerlyfcae9cb8b6
]] [formerlyb8967be962
[formerly55dbea29ed
] [formerly5f76bf379a
[formerly25720b7121
]]]] [formerly5d0c719dce
[formerlyf5dbbf1595
] [formerly836b4006c7
[formerly7e18c6b333
]] [formerly7e83679307
[formerly3f42064f1e
] [formerlydcdf152eea
[formerly28079a22ff
]]] [formerly103942552a
[formerly4a083d8357
] [formerly3faae41350
[formerlya9cf746458
]] [formerlyf7a29ee08b
[formerly74f3ddb1cb
] [formerly8080c82fc1
[formerly527e8c2ab3
]]]]] [formerly9c40821770
[formerlya0a0b218c0
] [formerly1a6449b3da
[formerlyf28562eed4
]] [formerly6d2aa95124
[formerlyfe33e93915
] [formerly4fc4c5d693
[formerly595c3bc468
]]] [formerlyd041990a94
[formerly8928771710
] [formerly6ce7b1b42a
[formerly04df89fd7e
]] [formerly6a0bfe64a9
[formerlye348490342
] [formerly8ff59698a3
[formerly45034f2ad2
]]]] [formerlyc382eece45
[formerly9a3e9804d3
] [formerlyc6dd56bdfb
[formerly4316d74e90
]] [formerlycd5cc08abf
[formerlydf4670ec94
] [formerlya3920bd132
[formerly4385082926
]]] [formerlyf2d68829fa
[formerlyd617b51f8e
] [formerly2c0363e761
[formerlyd30774ccb9
]] [formerly1aefdb7a6d
[formerly0983b312de
] [formerly16af77255b
[formerly351d94a1a0
]]]]]] Former-commit-id:0aac720ed9
[formerly4c9b0637b0
] [formerlye7ab703570
[formerly065727f4b8
]] [formerlya919abf3eb
[formerly6fa494b0b3
] [formerlyd7e6376187
[formerlyceee8b5ad6
]]] [formerly8135d4aaf5
[formerly0477e8c377
] [formerly78272aa1a3
[formerly43b5408fd2
]] [formerly8364ea76fa
[formerly14305a7625
] [formerly3f6573a401
[formerlyc56850302d
]]]] [formerly4e2575e559
[formerlyd8068f5026
] [formerlye51a3dd42f
[formerly81dd7e2f1d
]] [formerlyf604061af2
[formerlye407b5d0b1
] [formerly8d3602120d
[formerlye563b7e7c5
]]] [formerlyaab553bb2f
[formerly631d8d76c3
] [formerly1cec7b5f2f
[formerly945f34dbca
]] [formerly080e18f0be
[formerlyf1132fa762
] [formerly16af77255b
]]]] Former-commit-id:ff7eb7ad18
[formerlye2e1163ef1
] [formerlyabc1fff948
[formerlyda6d629afa
]] [formerlyff7dfe520b
[formerly015cc45265
] [formerly8f9148b059
[formerly4d8370bc45
]]] [formerlybe941c6dc4
[formerlydde5829498
] [formerly69d9c1a20a
[formerlyfcc8faa993
]] [formerly71bb6b09de
[formerly7cb44f0d9d
] [formerly282bb41261
[formerly6d91069889
]]]] Former-commit-id:e5a920084c
[formerlye8ac010da9
] [formerlydc836f921f
[formerly6fbd3b41e4
]] [formerly710c662c14
[formerly737958f86d
] [formerlyb38a85b151
[formerlye9ce6a2129
]]] Former-commit-id:46f2a522dd
[formerlyba84173dab
] [formerly7990bdcb4c
[formerlyb6dc6ec409
]] Former-commit-id:90c1b4b06e
[formerlyaab646e394
] Former-commit-id:7532188e1b
@@ -0,0 +1,70 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
pipeline_description.add_step(step_3) | |||
attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
# Step 4: imputer | |||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing')) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_4.add_output('produce') | |||
pipeline_description.add_step(step_4) | |||
# Step 5: ABOD | |||
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_abod')) | |||
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') | |||
step_5.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_5.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
step_5.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2, 4,)) | |||
step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='replace') | |||
step_5.add_output('produce') | |||
pipeline_description.add_step(step_5) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,67 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
pipeline_description.add_step(step_3) | |||
attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
# Step 4: imputer | |||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing')) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_4.add_output('produce') | |||
pipeline_description.add_step(step_4) | |||
# Step 5: auto encoder | |||
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae')) | |||
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_5.add_output('produce') | |||
pipeline_description.add_step(step_5) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,71 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import numpy as np | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: Standardization | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(1,2,3,4,5,)) | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# # Step 4: test primitive | |||
primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.AutoRegODetector') | |||
step_4 = PrimitiveStep(primitive=primitive_4) | |||
step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
step_4.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=10) | |||
# step_4.add_hyperparameter(name='weights', argument_type=ArgumentType.VALUE, data=weights_ndarray) | |||
step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False) | |||
# step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) # There is sth wrong with multi-dimensional | |||
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_4.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') | |||
step_4.add_output('produce') | |||
step_4.add_output('produce_score') | |||
pipeline_description.add_step(step_4) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,50 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import copy | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: test primitive | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,44 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: BKFilter | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.bk_filter')) | |||
# step_2.add_hyperparameter(name = 'columns_using_method', argument_type=ArgumentType.VALUE, data = 'name') | |||
step_2.add_hyperparameter(name = 'use_semantic_types', argument_type=ArgumentType.VALUE, data = True) | |||
step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = (2,3)) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,51 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import copy | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: test primitive | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_cblof') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,48 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: Column Parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: Categorical to Binary | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.data_processing.categorical_to_binary') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(3,)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() |
@@ -0,0 +1,49 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
#Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.auto_correlation') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True) | |||
step_2.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (2, 3)) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.data_processing.column_filter') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() |
@@ -0,0 +1,43 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: ContinuityValidation | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.continuity_validation')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name = 'continuity_option', argument_type=ArgumentType.VALUE, data = 'imputation') | |||
step_2.add_hyperparameter(name = 'interval', argument_type=ArgumentType.VALUE, data = 0.3) | |||
# Or: | |||
# step_2.add_hyperparameter(name = 'continuity_option', argument_type=ArgumentType.VALUE, data = 'ablation') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,49 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: test primitive | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.deeplog') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
#step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# # Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() |
@@ -0,0 +1,50 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: Column Parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: Discrete Cosine Transform | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.discrete_cosine_transform') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,42 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: DuplicationValidation | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.duplication_validation')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name = 'keep_option', argument_type=ArgumentType.VALUE, data = 'average') # Or: 'first' | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,48 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: Column Parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: Fast Fourier Transform | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.fast_fourier_transform') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() |
@@ -0,0 +1,68 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
pipeline_description.add_step(step_3) | |||
attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
# Step 4: imputer | |||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing')) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_4.add_output('produce') | |||
pipeline_description.add_step(step_4) | |||
# Step 5: HBOS | |||
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_hbos')) | |||
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') | |||
step_5.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
# step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_5.add_output('produce') | |||
pipeline_description.add_step(step_5) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,71 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
pipeline_description.add_step(step_3) | |||
attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
# Step 4: imputer | |||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing')) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_4.add_output('produce') | |||
pipeline_description.add_step(step_4) | |||
# Step 5: HBOS | |||
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_hbos')) | |||
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') | |||
step_5.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
step_5.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True) | |||
# step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_5.add_output('produce_score') | |||
step_5.add_output('produce') | |||
pipeline_description.add_step(step_5) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') | |||
# pipeline_description.add_output(name='output score', data_reference='steps.5.produce_score') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,46 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: HPFilter | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.hp_filter')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = [2,3,6]) | |||
step_2.add_hyperparameter(name = 'use_semantic_types', argument_type=ArgumentType.VALUE, data = True) | |||
step_2.add_hyperparameter(name = 'return_result', argument_type=ArgumentType.VALUE, data = 'append') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,76 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
pipeline_description.add_step(step_3) | |||
attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
# Step 4: imputer | |||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing')) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_4.add_output('produce') | |||
pipeline_description.add_step(step_4) | |||
# Step 5: holt smoothing | |||
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.holt_smoothing')) | |||
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_5.add_hyperparameter(name="exclude_columns", argument_type=ArgumentType.VALUE, data = (2, 3)) | |||
step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True) | |||
step_5.add_output('produce') | |||
pipeline_description.add_step(step_5) | |||
# Step 6: isolation forest | |||
#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm')) | |||
#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') | |||
#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets) | |||
#step_6.add_output('produce') | |||
#pipeline_description.add_step(step_6) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,76 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
pipeline_description.add_step(step_3) | |||
attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
# Step 4: imputer | |||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing')) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_4.add_output('produce') | |||
pipeline_description.add_step(step_4) | |||
# Step 5: holt winters exponential smoothing | |||
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.holt_winters_exponential_smoothing')) | |||
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_5.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (2, 3)) | |||
step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True) | |||
step_5.add_output('produce') | |||
pipeline_description.add_step(step_5) | |||
# Step 6: isolation forest | |||
#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm')) | |||
#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') | |||
#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets) | |||
#step_6.add_output('produce') | |||
#pipeline_description.add_step(step_6) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,59 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import copy | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_iforest') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
# step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
# step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce_score') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce_score') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,71 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import numpy as np | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: Standardization | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(1,2,3,4,5,)) | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# # Step 4: test primitive | |||
primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.KDiscordODetector') | |||
step_4 = PrimitiveStep(primitive=primitive_4) | |||
step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
step_4.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=10) | |||
# step_4.add_hyperparameter(name='weights', argument_type=ArgumentType.VALUE, data=weights_ndarray) | |||
step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False) | |||
# step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) # There is sth wrong with multi-dimensional | |||
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_4.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') | |||
step_4.add_output('produce') | |||
step_4.add_output('produce_score') | |||
pipeline_description.add_step(step_4) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,51 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import copy | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: test primitive | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_knn') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,51 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import copy | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: test primitive | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_loda') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,51 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import copy | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: test primitive | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_lof') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,70 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import numpy as np | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# # Step 2: Standardization | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(1,2,3,4,5,)) | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# # Step 3: test primitive | |||
primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.LSTMODetector') | |||
step_4 = PrimitiveStep(primitive=primitive_4) | |||
step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
step_4.add_hyperparameter(name='diff_group_method', argument_type=ArgumentType.VALUE, data='average') | |||
step_4.add_hyperparameter(name='feature_dim', argument_type=ArgumentType.VALUE, data=5) | |||
step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False) | |||
# step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) # There is sth wrong with multi-dimensional | |||
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_4.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') | |||
step_4.add_output('produce') | |||
pipeline_description.add_step(step_4) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,49 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: test primitive | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.matrix_profile') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4)) # There is sth wrong with multi-dimensional | |||
step_2.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=3) # There is sth wrong with multi-dimensional | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# # Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() |
@@ -0,0 +1,77 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
pipeline_description.add_step(step_3) | |||
attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
# Step 4: imputer | |||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing')) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_4.add_output('produce') | |||
pipeline_description.add_step(step_4) | |||
# Step 5: mean average transform | |||
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.moving_average_transform')) | |||
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_5.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (2, 3)) | |||
step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True) | |||
step_5.add_output('produce') | |||
pipeline_description.add_step(step_5) | |||
# Step 6: isolation forest | |||
#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm')) | |||
#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') | |||
#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets) | |||
#step_6.add_output('produce') | |||
#pipeline_description.add_step(step_6) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,50 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: Column Parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: Non Negative Matrix Factorization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.non_negative_matrix_factorization') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_hyperparameter(name='rank', argument_type=ArgumentType.VALUE, data=5) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,51 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import copy | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: test primitive | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ocsvm') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,71 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import numpy as np | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: Standardization | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(1,2,3,4,5,)) | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# # Step 4: test primitive | |||
primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.PCAODetector') | |||
step_4 = PrimitiveStep(primitive=primitive_4) | |||
step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
step_4.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=10) | |||
# step_4.add_hyperparameter(name='weights', argument_type=ArgumentType.VALUE, data=weights_ndarray) | |||
step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False) | |||
# step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) # There is sth wrong with multi-dimensional | |||
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_4.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') | |||
step_4.add_output('produce') | |||
step_4.add_output('produce_score') | |||
pipeline_description.add_step(step_4) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,49 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import copy | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: test primitive | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.power_transformer') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,51 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import copy | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: test primitive | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_cof') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4)) # There is sth wrong with multi-dimensional | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,49 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import copy | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: test primitive | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.quantile_transformer') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,54 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.reinforcement.rule_filter')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2, 4,)) | |||
step_3.add_hyperparameter(name='rule', argument_type=ArgumentType.VALUE, data='#4# % 2 == 0 and #2# <= 0.3') | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,49 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: test primitive | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_sod') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4)) # There is sth wrong with multi-dimensional | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# # Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() |
@@ -0,0 +1,76 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
pipeline_description.add_step(step_3) | |||
attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
# Step 4: imputer | |||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing')) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_4.add_output('produce') | |||
pipeline_description.add_step(step_4) | |||
# Step 5: simple exponential smoothing | |||
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.simple_exponential_smoothing')) | |||
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_5.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (1,)) | |||
step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True) | |||
step_5.add_output('produce') | |||
pipeline_description.add_step(step_5) | |||
# Step 6: isolation forest | |||
#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm')) | |||
#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') | |||
#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets) | |||
#step_6.add_output('produce') | |||
#pipeline_description.add_step(step_6) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,49 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import copy | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: test primitive | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,44 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: TRMF | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.trmf')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name = 'lags', argument_type=ArgumentType.VALUE, data = [1,2,10,100]) | |||
# step_2.add_hyperparameter(name = 'K', argument_type=ArgumentType.VALUE, data = 3) | |||
# step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = (2, 3, 4, 5, 6)) | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() |
@@ -0,0 +1,48 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: Column Parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: Fast Fourier Transform | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.telemanom') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() |
@@ -0,0 +1,86 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: dataframe transformation | |||
# primitive_1 = index.get_primitive('d3m.primitives.data_transformation.SKPowerTransformer') | |||
# primitive_1 = index.get_primitive('d3m.primitives.data_transformation.SKStandardization') | |||
# primitive_1 = index.get_primitive('d3m.primitives.data_transformation.SKQuantileTransformer') | |||
#Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.data_processing.time_interval_transform') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name="time_interval", argument_type=ArgumentType.VALUE, data = '5T') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# | |||
# # Step 2: column_parser | |||
# step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
# step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
# step_2.add_output('produce') | |||
# pipeline_description.add_step(step_2) | |||
# | |||
# | |||
# # Step 3: extract_columns_by_semantic_types(attributes) | |||
# step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
# step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
# step_3.add_output('produce') | |||
# step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
# data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
# pipeline_description.add_step(step_3) | |||
# | |||
# # Step 4: extract_columns_by_semantic_types(targets) | |||
# step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
# step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
# step_4.add_output('produce') | |||
# step_4.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
# data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
# pipeline_description.add_step(step_4) | |||
# | |||
# attributes = 'steps.3.produce' | |||
# targets = 'steps.4.produce' | |||
# | |||
# # Step 5: imputer | |||
# step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_cleaning.imputer.SKlearn')) | |||
# step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
# step_5.add_output('produce') | |||
# pipeline_description.add_step(step_5) | |||
# | |||
# # Step 6: random_forest | |||
# step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.regression.random_forest.SKlearn')) | |||
# step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') | |||
# step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets) | |||
# step_6.add_output('produce') | |||
# pipeline_description.add_step(step_6) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.1.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() |
@@ -0,0 +1,44 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: TruncatedSVD | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.truncated_svd')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name = 'n_components', argument_type=ArgumentType.VALUE, data = 3) | |||
step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = (2, 3, 4, 5, 6)) | |||
step_2.add_hyperparameter(name = 'return_result', argument_type=ArgumentType.VALUE, data = 'append') | |||
step_2.add_hyperparameter(name = 'use_semantic_types', argument_type=ArgumentType.VALUE, data = True) | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,67 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
pipeline_description.add_step(step_3) | |||
attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
# Step 4: imputer | |||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing')) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_4.add_output('produce') | |||
pipeline_description.add_step(step_4) | |||
# Step 5: variatinal auto encoder | |||
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_vae')) | |||
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_5.add_output('produce') | |||
pipeline_description.add_step(step_5) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,64 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import copy | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: test WaveletTransform | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.wavelet_transform') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='wavelet', argument_type=ArgumentType.VALUE, data='db8') | |||
step_2.add_hyperparameter(name='level', argument_type=ArgumentType.VALUE, data=2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 2: test inverse WaveletTransform | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.wavelet_transform') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='wavelet', argument_type=ArgumentType.VALUE, data='db8') | |||
step_3.add_hyperparameter(name='level', argument_type=ArgumentType.VALUE, data=2) | |||
step_3.add_hyperparameter(name='inverse', argument_type=ArgumentType.VALUE, data=1) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False) | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,50 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import copy | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: test primitive | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_mogaal') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() |
@@ -0,0 +1,50 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import copy | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: test primitive | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_sogaal') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
print(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() |
@@ -0,0 +1,61 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.spectral_residual_transform') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='avg_filter_dimension', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_abs_energy') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_abs_sum') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_g_mean') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_h_mean') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_kurtosis') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_maximum') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean_abs') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean_abs_temporal_derivative') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean_temporal_derivative') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_median') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,63 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_median_abs_deviation') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_minimum') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_skew') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_std') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_var') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_variation') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_vec_sum') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_willison_amplitude') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,62 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_zero_crossing') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(9,10)) # There is sth wrong with multi-dimensional | |||
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -0,0 +1,61 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import copy | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# # Step 2: Standardization | |||
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_2 = PrimitiveStep(primitive=primitive_2) | |||
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) | |||
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
pipeline_description.add_step(step_2) | |||
# # Step 3: test primitive | |||
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') | |||
primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.decomposition.time_series_seasonality_trend_decomposition') | |||
step_3 = PrimitiveStep(primitive=primitive_3) | |||
step_3.add_hyperparameter(name='period', argument_type=ArgumentType.VALUE, data=5) | |||
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') | |||
step_3.add_output('produce') | |||
pipeline_description.add_step(step_3) | |||
# Final Output | |||
pipeline_description.add_output(name='output', data_reference='steps.3.produce') | |||
# Output to YAML | |||
yaml = pipeline_description.to_yaml() | |||
with open('pipeline.yml', 'w') as f: | |||
f.write(yaml) | |||
# Or you can output json | |||
#data = pipline_description.to_json() | |||
@@ -1,6 +1,6 @@ | |||
#!/bin/bash | |||
test_scripts=$(ls primitive_tests) | |||
test_scripts=$(ls new_tests) | |||
#test_scripts=$(ls primitive_tests | grep -v -f tested_file.txt) | |||
for file in $test_scripts | |||
@@ -0,0 +1,398 @@ | |||
import hashlib | |||
import os | |||
import typing | |||
import numpy # type: ignore | |||
from d3m import container, utils as d3m_utils | |||
from d3m.base import utils as base_utils | |||
from d3m.metadata import base as metadata_base, hyperparams | |||
from d3m.primitive_interfaces import base, transformer | |||
import common_primitives | |||
from common_primitives import utils | |||
__all__ = ('ColumnParserPrimitive',) | |||
Inputs = container.DataFrame | |||
Outputs = container.DataFrame | |||
class Hyperparams(hyperparams.Hyperparams): | |||
parse_semantic_types = hyperparams.Set( | |||
elements=hyperparams.Enumeration( | |||
values=[ | |||
'http://schema.org/Boolean', 'https://metadata.datadrivendiscovery.org/types/CategoricalData', | |||
'http://schema.org/Integer', 'http://schema.org/Float', | |||
'https://metadata.datadrivendiscovery.org/types/FloatVector', 'http://schema.org/DateTime', | |||
], | |||
# Default is ignored. | |||
# TODO: Remove default. See: https://gitlab.com/datadrivendiscovery/d3m/issues/141 | |||
default='http://schema.org/Boolean', | |||
), | |||
default=( | |||
'http://schema.org/Boolean', 'https://metadata.datadrivendiscovery.org/types/CategoricalData', | |||
'http://schema.org/Integer', 'http://schema.org/Float', | |||
'https://metadata.datadrivendiscovery.org/types/FloatVector', 'http://schema.org/DateTime', | |||
), | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="A set of semantic types to parse. One can provide a subset of supported semantic types to limit what the primitive parses.", | |||
) | |||
use_columns = hyperparams.Set( | |||
elements=hyperparams.Hyperparameter[int](-1), | |||
default=(), | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", | |||
) | |||
exclude_columns = hyperparams.Set( | |||
elements=hyperparams.Hyperparameter[int](-1), | |||
default=(), | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", | |||
) | |||
return_result = hyperparams.Enumeration( | |||
values=['append', 'replace', 'new'], | |||
default='replace', | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned?", | |||
) | |||
add_index_columns = hyperparams.UniformBool( | |||
default=True, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", | |||
) | |||
parse_categorical_target_columns = hyperparams.UniformBool( | |||
default=False, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Should it parse also categorical target columns?", | |||
) | |||
replace_index_columns = hyperparams.UniformBool( | |||
default=True, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Replace primary index columns even if otherwise appending columns. Applicable only if \"return_result\" is set to \"append\".", | |||
) | |||
fuzzy_time_parsing = hyperparams.UniformBool( | |||
default=True, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Use fuzzy time parsing.", | |||
) | |||
class ColumnParserPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
A primitive which parses strings into their parsed values. | |||
It goes over all columns (by default, controlled by ``use_columns``, ``exclude_columns``) | |||
and checks those with structural type ``str`` if they have a semantic type suggesting | |||
that they are a boolean value, categorical, integer, float, or time (by default, | |||
controlled by ``parse_semantic_types``). Categorical values are converted with | |||
hash encoding. | |||
What is returned is controlled by ``return_result`` and ``add_index_columns``. | |||
""" | |||
metadata = metadata_base.PrimitiveMetadata( | |||
{ | |||
'id': 'd510cb7a-1782-4f51-b44c-58f0236e47c7', | |||
'version': '0.6.0', | |||
'name': "Parses strings into their types", | |||
'python_path': 'd3m.primitives.tods.data_processing.column_parser', | |||
'source': { | |||
'name': common_primitives.__author__, | |||
'contact': 'mailto:mitar.commonprimitives@tnode.com', | |||
'uris': [ | |||
'https://gitlab.com/datadrivendiscovery/common-primitives/blob/master/common_primitives/column_parser.py', | |||
'https://gitlab.com/datadrivendiscovery/common-primitives.git', | |||
], | |||
}, | |||
'installation': [{ | |||
'type': metadata_base.PrimitiveInstallationType.PIP, | |||
'package_uri': 'git+https://gitlab.com/datadrivendiscovery/common-primitives.git@{git_commit}#egg=common_primitives'.format( | |||
git_commit=d3m_utils.current_git_commit(os.path.dirname(__file__)), | |||
), | |||
}], | |||
'algorithm_types': [ | |||
metadata_base.PrimitiveAlgorithmType.DATA_CONVERSION, | |||
], | |||
'primitive_family': metadata_base.PrimitiveFamily.DATA_TRANSFORMATION, | |||
}, | |||
) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
columns_to_use, output_columns = self._produce_columns(inputs) | |||
if self.hyperparams['replace_index_columns'] and self.hyperparams['return_result'] == 'append': | |||
assert len(columns_to_use) == len(output_columns) | |||
index_columns = inputs.metadata.get_index_columns() | |||
index_columns_to_use = [] | |||
other_columns_to_use = [] | |||
index_output_columns = [] | |||
other_output_columns = [] | |||
for column_to_use, output_column in zip(columns_to_use, output_columns): | |||
if column_to_use in index_columns: | |||
index_columns_to_use.append(column_to_use) | |||
index_output_columns.append(output_column) | |||
else: | |||
other_columns_to_use.append(column_to_use) | |||
other_output_columns.append(output_column) | |||
outputs = base_utils.combine_columns(inputs, index_columns_to_use, index_output_columns, return_result='replace', add_index_columns=self.hyperparams['add_index_columns']) | |||
outputs = base_utils.combine_columns(outputs, other_columns_to_use, other_output_columns, return_result='append', add_index_columns=self.hyperparams['add_index_columns']) | |||
else: | |||
outputs = base_utils.combine_columns(inputs, columns_to_use, output_columns, return_result=self.hyperparams['return_result'], add_index_columns=self.hyperparams['add_index_columns']) | |||
return base.CallResult(outputs) | |||
def _can_use_column(self, inputs_metadata: metadata_base.DataMetadata, column_index: int) -> bool: | |||
column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) | |||
# We produce only on columns which have not yet been parsed (are strings). | |||
if column_metadata['structural_type'] != str: | |||
return False | |||
semantic_types = column_metadata.get('semantic_types', []) | |||
for semantic_type in self.hyperparams['parse_semantic_types']: | |||
if semantic_type not in semantic_types: | |||
continue | |||
if semantic_type == 'https://metadata.datadrivendiscovery.org/types/CategoricalData': | |||
# Skip parsing if a column is categorical, but also a target column. | |||
if not self.hyperparams['parse_categorical_target_columns'] and 'https://metadata.datadrivendiscovery.org/types/Target' in semantic_types: | |||
continue | |||
return True | |||
return False | |||
def _produce_columns(self, inputs: Inputs) -> typing.Tuple[typing.List[int], typing.List[Outputs]]: | |||
# The logic of parsing values tries to mirror also the logic of detecting | |||
# values in "SimpleProfilerPrimitive". One should keep them in sync. | |||
columns_to_use = self._get_columns(inputs.metadata) | |||
# We check against this list again, because there might be multiple matching semantic types | |||
# (which is not really valid). | |||
parse_semantic_types = self.hyperparams['parse_semantic_types'] | |||
output_columns = [] | |||
for column_index in columns_to_use: | |||
column_metadata = inputs.metadata.query((metadata_base.ALL_ELEMENTS, column_index)) | |||
semantic_types = column_metadata.get('semantic_types', []) | |||
if column_metadata['structural_type'] == str: | |||
if 'http://schema.org/Boolean' in parse_semantic_types and 'http://schema.org/Boolean' in semantic_types: | |||
output_columns.append(self._parse_boolean_data(inputs, column_index)) | |||
elif 'https://metadata.datadrivendiscovery.org/types/CategoricalData' in parse_semantic_types and \ | |||
'https://metadata.datadrivendiscovery.org/types/CategoricalData' in semantic_types and \ | |||
(self.hyperparams['parse_categorical_target_columns'] or 'https://metadata.datadrivendiscovery.org/types/Target' not in semantic_types): | |||
output_columns.append(self._parse_categorical_data(inputs, column_index)) | |||
elif 'http://schema.org/Integer' in parse_semantic_types and 'http://schema.org/Integer' in semantic_types: | |||
# For primary key we know all values have to exist so we can assume they can always be represented as integers. | |||
if 'https://metadata.datadrivendiscovery.org/types/PrimaryKey' in semantic_types: | |||
integer_required = True | |||
else: | |||
integer_required = False | |||
output_columns.append(self._parse_integer(inputs, column_index, integer_required)) | |||
elif 'http://schema.org/Float' in parse_semantic_types and 'http://schema.org/Float' in semantic_types: | |||
output_columns.append(self._parse_float_data(inputs, column_index)) | |||
elif 'https://metadata.datadrivendiscovery.org/types/FloatVector' in parse_semantic_types and 'https://metadata.datadrivendiscovery.org/types/FloatVector' in semantic_types: | |||
output_columns.append(self._parse_float_vector_data(inputs, column_index)) | |||
elif 'http://schema.org/DateTime' in parse_semantic_types and 'http://schema.org/DateTime' in semantic_types: | |||
output_columns.append(self._parse_time_data(inputs, column_index, self.hyperparams['fuzzy_time_parsing'])) | |||
else: | |||
assert False, column_index | |||
assert len(output_columns) == len(columns_to_use) | |||
return columns_to_use, output_columns | |||
def _produce_columns_metadata(self, inputs_metadata: metadata_base.DataMetadata) -> typing.Tuple[typing.List[int], typing.List[metadata_base.DataMetadata]]: | |||
columns_to_use = self._get_columns(inputs_metadata) | |||
# We check against this list again, because there might be multiple matching semantic types | |||
# (which is not really valid). | |||
parse_semantic_types = self.hyperparams['parse_semantic_types'] | |||
output_columns = [] | |||
for column_index in columns_to_use: | |||
column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) | |||
semantic_types = column_metadata.get('semantic_types', []) | |||
if column_metadata['structural_type'] == str: | |||
if 'http://schema.org/Boolean' in parse_semantic_types and 'http://schema.org/Boolean' in semantic_types: | |||
output_columns.append(self._parse_boolean_metadata(inputs_metadata, column_index)) | |||
elif 'https://metadata.datadrivendiscovery.org/types/CategoricalData' in parse_semantic_types and \ | |||
'https://metadata.datadrivendiscovery.org/types/CategoricalData' in semantic_types and \ | |||
(self.hyperparams['parse_categorical_target_columns'] or 'https://metadata.datadrivendiscovery.org/types/Target' not in semantic_types): | |||
output_columns.append(self._parse_categorical_metadata(inputs_metadata, column_index)) | |||
elif 'http://schema.org/Integer' in parse_semantic_types and 'http://schema.org/Integer' in semantic_types: | |||
output_columns.append(self._parse_integer_metadata(inputs_metadata, column_index)) | |||
elif 'http://schema.org/Float' in parse_semantic_types and 'http://schema.org/Float' in semantic_types: | |||
output_columns.append(self._parse_float_metadata(inputs_metadata, column_index)) | |||
elif 'https://metadata.datadrivendiscovery.org/types/FloatVector' in parse_semantic_types and 'https://metadata.datadrivendiscovery.org/types/FloatVector' in semantic_types: | |||
output_columns.append(self._parse_float_vector_metadata(inputs_metadata, column_index)) | |||
elif 'http://schema.org/DateTime' in parse_semantic_types and 'http://schema.org/DateTime' in semantic_types: | |||
output_columns.append(self._parse_time_metadata(inputs_metadata, column_index)) | |||
else: | |||
assert False, column_index | |||
assert len(output_columns) == len(columns_to_use) | |||
return columns_to_use, output_columns | |||
def _get_columns(self, inputs_metadata: metadata_base.DataMetadata) -> typing.List[int]: | |||
def can_use_column(column_index: int) -> bool: | |||
return self._can_use_column(inputs_metadata, column_index) | |||
columns_to_use, columns_not_to_use = base_utils.get_columns_to_use(inputs_metadata, self.hyperparams['use_columns'], self.hyperparams['exclude_columns'], can_use_column) | |||
# We are OK if no columns ended up being parsed. | |||
# "base_utils.combine_columns" will throw an error if it cannot work with this. | |||
if self.hyperparams['use_columns'] and columns_not_to_use: | |||
self.logger.warning("Not all specified columns can parsed. Skipping columns: %(columns)s", { | |||
'columns': columns_not_to_use, | |||
}) | |||
return columns_to_use | |||
@classmethod | |||
def _parse_boolean_data(cls, inputs: Inputs, column_index: metadata_base.SimpleSelectorSegment) -> Outputs: | |||
return cls._parse_categorical_data(inputs, column_index) | |||
@classmethod | |||
def _parse_boolean_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_index: metadata_base.SimpleSelectorSegment) -> metadata_base.DataMetadata: | |||
return cls._parse_categorical_metadata(inputs_metadata, column_index) | |||
@classmethod | |||
def _parse_categorical_data(cls, inputs: Inputs, column_index: metadata_base.SimpleSelectorSegment) -> Outputs: | |||
values_map: typing.Dict[str, int] = {} | |||
for value in inputs.iloc[:, column_index]: | |||
value = value.strip() | |||
if value not in values_map: | |||
value_hash = hashlib.sha256(value.encode('utf8')) | |||
values_map[value] = int.from_bytes(value_hash.digest()[0:8], byteorder='little') ^ int.from_bytes(value_hash.digest()[8:16], byteorder='little') ^ \ | |||
int.from_bytes(value_hash.digest()[16:24], byteorder='little') ^ int.from_bytes(value_hash.digest()[24:32], byteorder='little') | |||
outputs = container.DataFrame({inputs.columns[column_index]: [values_map[value.strip()] for value in inputs.iloc[:, column_index]]}, generate_metadata=False) | |||
outputs.metadata = cls._parse_categorical_metadata(inputs.metadata, column_index) | |||
return outputs | |||
@classmethod | |||
def _parse_categorical_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_index: metadata_base.SimpleSelectorSegment) -> metadata_base.DataMetadata: | |||
outputs_metadata = inputs_metadata.select_columns([column_index]) | |||
return outputs_metadata.update_column(0, {'structural_type': int}) | |||
@classmethod | |||
def _str_to_int(cls, value: str) -> typing.Union[float, int]: | |||
try: | |||
return int(value.strip()) | |||
except ValueError: | |||
try: | |||
# Maybe it is an int represented as a float. Let's try this. This can get rid of non-integer | |||
# part of the value, but the integer was requested through a semantic type, so this is probably OK. | |||
return int(float(value.strip())) | |||
except ValueError: | |||
# No luck, use NaN to represent a missing value. | |||
return float('nan') | |||
@classmethod | |||
def _parse_integer(cls, inputs: Inputs, column_index: metadata_base.SimpleSelectorSegment, | |||
integer_required: bool) -> container.DataFrame: | |||
outputs = container.DataFrame({inputs.columns[column_index]: [cls._str_to_int(value) for value in inputs.iloc[:, column_index]]}, generate_metadata=False) | |||
if outputs.dtypes.iloc[0].kind == 'f': | |||
structural_type: type = float | |||
elif outputs.dtypes.iloc[0].kind in ['i', 'u']: | |||
structural_type = int | |||
else: | |||
assert False, outputs.dtypes.iloc[0] | |||
if structural_type is float and integer_required: | |||
raise ValueError("Not all values in a column can be parsed into integers, but only integers were expected.") | |||
outputs.metadata = inputs.metadata.select_columns([column_index]) | |||
outputs.metadata = outputs.metadata.update_column(0, {'structural_type': structural_type}) | |||
return outputs | |||
@classmethod | |||
def _parse_integer_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_index: metadata_base.SimpleSelectorSegment) -> metadata_base.DataMetadata: | |||
outputs_metadata = inputs_metadata.select_columns([column_index]) | |||
# Without data we assume we can parse everything into integers. This might not be true and | |||
# we might end up parsing into floats if we have to represent missing (or invalid) values. | |||
return outputs_metadata.update_column(0, {'structural_type': int}) | |||
@classmethod | |||
def _str_to_float(cls, value: str) -> float: | |||
try: | |||
return float(value.strip()) | |||
except ValueError: | |||
return float('nan') | |||
@classmethod | |||
def _parse_float_data(cls, inputs: Inputs, column_index: metadata_base.SimpleSelectorSegment) -> Outputs: | |||
outputs = container.DataFrame({inputs.columns[column_index]: [cls._str_to_float(value) for value in inputs.iloc[:, column_index]]}, generate_metadata=False) | |||
outputs.metadata = cls._parse_float_metadata(inputs.metadata, column_index) | |||
return outputs | |||
@classmethod | |||
def _parse_float_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_index: metadata_base.SimpleSelectorSegment) -> metadata_base.DataMetadata: | |||
outputs_metadata = inputs_metadata.select_columns([column_index]) | |||
return outputs_metadata.update_column(0, {'structural_type': float}) | |||
@classmethod | |||
def _parse_float_vector_data(cls, inputs: Inputs, column_index: metadata_base.SimpleSelectorSegment) -> Outputs: | |||
# We are pretty strict here because we are assuming this was generated programmatically. | |||
outputs = container.DataFrame( | |||
{ | |||
inputs.columns[column_index]: [ | |||
container.ndarray([cls._str_to_float(value) for value in values.split(',')]) | |||
for values in inputs.iloc[:, column_index] | |||
], | |||
}, | |||
generate_metadata=False, | |||
) | |||
outputs.metadata = cls._parse_float_metadata(inputs.metadata, column_index) | |||
# We have to automatically generate metadata to set ndarray dimension(s). | |||
outputs.metadata = outputs.metadata.generate(outputs) | |||
return outputs | |||
@classmethod | |||
def _parse_float_vector_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_index: metadata_base.SimpleSelectorSegment) -> metadata_base.DataMetadata: | |||
outputs_metadata = inputs_metadata.select_columns([column_index]) | |||
# We cannot know the dimension of the ndarray without data. | |||
outputs_metadata = outputs_metadata.update_column(0, {'structural_type': container.ndarray}) | |||
outputs_metadata = outputs_metadata.update((metadata_base.ALL_ELEMENTS, 0, metadata_base.ALL_ELEMENTS), {'structural_type': numpy.float64}) | |||
return outputs_metadata | |||
@classmethod | |||
def _parse_time_data(cls, inputs: Inputs, column_index: metadata_base.SimpleSelectorSegment, fuzzy: bool) -> Outputs: | |||
outputs = container.DataFrame({inputs.columns[column_index]: [utils.parse_datetime_to_float(value, fuzzy=fuzzy) for value in inputs.iloc[:, column_index]]}, generate_metadata=False) | |||
outputs.metadata = cls._parse_time_metadata(inputs.metadata, column_index) | |||
return outputs | |||
@classmethod | |||
def _parse_time_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_index: metadata_base.SimpleSelectorSegment) -> metadata_base.DataMetadata: | |||
outputs_metadata = inputs_metadata.select_columns([column_index]) | |||
return outputs_metadata.update_column(0, {'structural_type': float}) |
@@ -0,0 +1,261 @@ | |||
import os | |||
import typing | |||
from d3m import container, utils as d3m_utils | |||
from d3m.metadata import base as metadata_base, hyperparams | |||
from d3m.primitive_interfaces import base, transformer | |||
from d3m.contrib.primitives import compute_scores | |||
import common_primitives | |||
__all__ = ('ConstructPredictionsPrimitive',) | |||
Inputs = container.DataFrame | |||
Outputs = container.DataFrame | |||
class Hyperparams(hyperparams.Hyperparams): | |||
use_columns = hyperparams.Set( | |||
elements=hyperparams.Hyperparameter[int](-1), | |||
default=(), | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="A set of column indices to force primitive to operate on. If metadata reconstruction happens, this is used for reference columns." | |||
" If any specified column is not a primary index or a predicted target, it is skipped.", | |||
) | |||
exclude_columns = hyperparams.Set( | |||
elements=hyperparams.Hyperparameter[int](-1), | |||
default=(), | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="A set of column indices to not operate on. If metadata reconstruction happens, this is used for reference columns. Applicable only if \"use_columns\" is not provided.", | |||
) | |||
class ConstructPredictionsPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
A primitive which takes as input a DataFrame and outputs a DataFrame in Lincoln Labs predictions | |||
format: first column is a d3mIndex column (and other primary index columns, e.g., for object detection | |||
problem), and then predicted targets, each in its column, followed by optional confidence column(s). | |||
It supports both input columns annotated with semantic types (``https://metadata.datadrivendiscovery.org/types/PrimaryKey``, | |||
``https://metadata.datadrivendiscovery.org/types/PrimaryMultiKey``, ``https://metadata.datadrivendiscovery.org/types/PredictedTarget``, | |||
``https://metadata.datadrivendiscovery.org/types/Confidence``), or trying to reconstruct metadata. | |||
This is why the primitive takes also additional input of a reference DataFrame which should | |||
have metadata to help reconstruct missing metadata. If metadata is missing, the primitive | |||
assumes that all ``inputs`` columns are predicted targets, without confidence column(s). | |||
""" | |||
metadata = metadata_base.PrimitiveMetadata( | |||
{ | |||
'id': '8d38b340-f83f-4877-baaa-162f8e551736', | |||
'version': '0.3.0', | |||
'name': "Construct pipeline predictions output", | |||
'python_path': 'd3m.primitives.tods.data_processing.construct_predictions', | |||
'source': { | |||
'name': common_primitives.__author__, | |||
'contact': 'mailto:mitar.commonprimitives@tnode.com', | |||
'uris': [ | |||
'https://gitlab.com/datadrivendiscovery/common-primitives/blob/master/common_primitives/construct_predictions.py', | |||
'https://gitlab.com/datadrivendiscovery/common-primitives.git', | |||
], | |||
}, | |||
'installation': [{ | |||
'type': metadata_base.PrimitiveInstallationType.PIP, | |||
'package_uri': 'git+https://gitlab.com/datadrivendiscovery/common-primitives.git@{git_commit}#egg=common_primitives'.format( | |||
git_commit=d3m_utils.current_git_commit(os.path.dirname(__file__)), | |||
), | |||
}], | |||
'algorithm_types': [ | |||
metadata_base.PrimitiveAlgorithmType.DATA_CONVERSION, | |||
], | |||
'primitive_family': metadata_base.PrimitiveFamily.DATA_TRANSFORMATION, | |||
}, | |||
) | |||
def produce(self, *, inputs: Inputs, reference: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: # type: ignore | |||
index_columns = inputs.metadata.get_index_columns() | |||
target_columns = inputs.metadata.list_columns_with_semantic_types(('https://metadata.datadrivendiscovery.org/types/PredictedTarget',)) | |||
# Target columns cannot be also index columns. This should not really happen, | |||
# but it could happen with buggy primitives. | |||
target_columns = [target_column for target_column in target_columns if target_column not in index_columns] | |||
if index_columns and target_columns: | |||
outputs = self._produce_using_semantic_types(inputs, index_columns, target_columns) | |||
else: | |||
outputs = self._produce_reconstruct(inputs, reference, index_columns, target_columns) | |||
outputs = compute_scores.ComputeScoresPrimitive._encode_columns(outputs) | |||
# Generally we do not care about column names in DataFrame itself (but use names of columns from metadata), | |||
# but in this case setting column names makes it easier to assure that "to_csv" call produces correct output. | |||
# See: https://gitlab.com/datadrivendiscovery/d3m/issues/147 | |||
column_names = [] | |||
for column_index in range(len(outputs.columns)): | |||
column_names.append(outputs.metadata.query_column(column_index).get('name', outputs.columns[column_index])) | |||
outputs.columns = column_names | |||
return base.CallResult(outputs) | |||
def _filter_index_columns(self, inputs_metadata: metadata_base.DataMetadata, index_columns: typing.Sequence[int]) -> typing.Sequence[int]: | |||
if self.hyperparams['use_columns']: | |||
index_columns = [index_column_index for index_column_index in index_columns if index_column_index in self.hyperparams['use_columns']] | |||
if not index_columns: | |||
raise ValueError("No index columns listed in \"use_columns\" hyper-parameter, but index columns are required.") | |||
else: | |||
index_columns = [index_column_index for index_column_index in index_columns if index_column_index not in self.hyperparams['exclude_columns']] | |||
if not index_columns: | |||
raise ValueError("All index columns listed in \"exclude_columns\" hyper-parameter, but index columns are required.") | |||
names = [] | |||
for index_column in index_columns: | |||
index_metadata = inputs_metadata.query_column(index_column) | |||
# We do not care about empty strings for names either. | |||
if index_metadata.get('name', None): | |||
names.append(index_metadata['name']) | |||
if 'd3mIndex' not in names: | |||
raise ValueError("\"d3mIndex\" index column is missing.") | |||
names_set = set(names) | |||
if len(names) != len(names_set): | |||
duplicate_names = names | |||
for name in names_set: | |||
# Removes just the first occurrence. | |||
duplicate_names.remove(name) | |||
self.logger.warning("Duplicate names for index columns: %(duplicate_names)s", { | |||
'duplicate_names': list(set(duplicate_names)), | |||
}) | |||
return index_columns | |||
def _get_columns(self, inputs_metadata: metadata_base.DataMetadata, index_columns: typing.Sequence[int], target_columns: typing.Sequence[int]) -> typing.List[int]: | |||
assert index_columns | |||
assert target_columns | |||
index_columns = self._filter_index_columns(inputs_metadata, index_columns) | |||
if self.hyperparams['use_columns']: | |||
target_columns = [target_column_index for target_column_index in target_columns if target_column_index in self.hyperparams['use_columns']] | |||
if not target_columns: | |||
raise ValueError("No target columns listed in \"use_columns\" hyper-parameter, but target columns are required.") | |||
else: | |||
target_columns = [target_column_index for target_column_index in target_columns if target_column_index not in self.hyperparams['exclude_columns']] | |||
if not target_columns: | |||
raise ValueError("All target columns listed in \"exclude_columns\" hyper-parameter, but target columns are required.") | |||
assert index_columns | |||
assert target_columns | |||
return list(index_columns) + list(target_columns) | |||
def _get_confidence_columns(self, inputs_metadata: metadata_base.DataMetadata) -> typing.List[int]: | |||
confidence_columns = inputs_metadata.list_columns_with_semantic_types(('https://metadata.datadrivendiscovery.org/types/Confidence',)) | |||
if self.hyperparams['use_columns']: | |||
confidence_columns = [confidence_column_index for confidence_column_index in confidence_columns if confidence_column_index in self.hyperparams['use_columns']] | |||
else: | |||
confidence_columns = [confidence_column_index for confidence_column_index in confidence_columns if confidence_column_index not in self.hyperparams['exclude_columns']] | |||
return confidence_columns | |||
def _produce_using_semantic_types(self, inputs: Inputs, index_columns: typing.Sequence[int], | |||
target_columns: typing.Sequence[int]) -> Outputs: | |||
confidence_columns = self._get_confidence_columns(inputs.metadata) | |||
output_columns = self._get_columns(inputs.metadata, index_columns, target_columns) + confidence_columns | |||
# "get_index_columns" makes sure that "d3mIndex" is always listed first. | |||
# And "select_columns" selects columns in order listed, which then | |||
# always puts "d3mIndex" first. | |||
outputs = inputs.select_columns(output_columns) | |||
if confidence_columns: | |||
outputs.metadata = self._update_confidence_columns(outputs.metadata, confidence_columns) | |||
return outputs | |||
def _update_confidence_columns(self, inputs_metadata: metadata_base.DataMetadata, confidence_columns: typing.Sequence[int]) -> metadata_base.DataMetadata: | |||
output_columns_length = inputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||
outputs_metadata = inputs_metadata | |||
# All confidence columns have to be named "confidence". | |||
for column_index in range(output_columns_length - len(confidence_columns), output_columns_length): | |||
outputs_metadata = outputs_metadata.update((metadata_base.ALL_ELEMENTS, column_index), { | |||
'name': 'confidence', | |||
}) | |||
return outputs_metadata | |||
def _produce_reconstruct(self, inputs: Inputs, reference: Inputs, index_columns: typing.Sequence[int], target_columns: typing.Sequence[int]) -> Outputs: | |||
if not index_columns: | |||
reference_index_columns = reference.metadata.get_index_columns() | |||
if not reference_index_columns: | |||
raise ValueError("Cannot find an index column in reference data, but index column is required.") | |||
filtered_index_columns = self._filter_index_columns(reference.metadata, reference_index_columns) | |||
index = reference.select_columns(filtered_index_columns) | |||
else: | |||
filtered_index_columns = self._filter_index_columns(inputs.metadata, index_columns) | |||
index = inputs.select_columns(filtered_index_columns) | |||
if not target_columns: | |||
if index_columns: | |||
raise ValueError("No target columns in input data, but index column(s) present.") | |||
# We assume all inputs are targets. | |||
targets = inputs | |||
# We make sure at least basic metadata is generated correctly, so we regenerate metadata. | |||
targets.metadata = targets.metadata.generate(targets) | |||
# We set target column names from the reference. We set semantic types. | |||
targets.metadata = self._update_targets_metadata(targets.metadata, self._get_target_names(reference.metadata)) | |||
else: | |||
targets = inputs.select_columns(target_columns) | |||
return index.append_columns(targets) | |||
def multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, reference: Inputs, timeout: float = None, iterations: int = None) -> base.MultiCallResult: # type: ignore | |||
return self._multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs, reference=reference) | |||
def fit_multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, reference: Inputs, timeout: float = None, iterations: int = None) -> base.MultiCallResult: # type: ignore | |||
return self._fit_multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs, reference=reference) | |||
def _get_target_names(self, metadata: metadata_base.DataMetadata) -> typing.List[typing.Union[str, None]]: | |||
target_names = [] | |||
for column_index in metadata.list_columns_with_semantic_types(('https://metadata.datadrivendiscovery.org/types/TrueTarget',)): | |||
column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) | |||
target_names.append(column_metadata.get('name', None)) | |||
return target_names | |||
def _update_targets_metadata(self, metadata: metadata_base.DataMetadata, target_names: typing.Sequence[typing.Union[str, None]]) -> metadata_base.DataMetadata: | |||
targets_length = metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||
if targets_length != len(target_names): | |||
raise ValueError("Not an expected number of target columns to apply names for. Expected {target_names}, provided {targets_length}.".format( | |||
target_names=len(target_names), | |||
targets_length=targets_length, | |||
)) | |||
for column_index, target_name in enumerate(target_names): | |||
metadata = metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, column_index), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
metadata = metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, column_index), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget') | |||
# We do not have it, let's skip it and hope for the best. | |||
if target_name is None: | |||
continue | |||
metadata = metadata.update_column(column_index, { | |||
'name': target_name, | |||
}) | |||
return metadata |
@@ -0,0 +1,141 @@ | |||
import os | |||
import typing | |||
from d3m import container, exceptions, utils as d3m_utils | |||
from d3m.base import utils as base_utils | |||
from d3m.metadata import base as metadata_base, hyperparams | |||
from d3m.primitive_interfaces import base, transformer | |||
import common_primitives | |||
__all__ = ('ExtractColumnsBySemanticTypesPrimitive',) | |||
Inputs = container.DataFrame | |||
Outputs = container.DataFrame | |||
class Hyperparams(hyperparams.Hyperparams): | |||
semantic_types = hyperparams.Set( | |||
elements=hyperparams.Hyperparameter[str](''), | |||
default=('https://metadata.datadrivendiscovery.org/types/Attribute',), | |||
min_size=1, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Semantic types to use to extract columns. If any of them matches, by default.", | |||
) | |||
match_logic = hyperparams.Enumeration( | |||
values=['all', 'any', 'equal'], | |||
default='any', | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Should a column have all of semantic types in \"semantic_types\" to be extracted, or any of them?", | |||
) | |||
negate = hyperparams.UniformBool( | |||
default=False, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Should columns which do not match semantic types in \"semantic_types\" be extracted?", | |||
) | |||
use_columns = hyperparams.Set( | |||
elements=hyperparams.Hyperparameter[int](-1), | |||
default=(), | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="A set of column indices to force primitive to operate on. If any specified column does not match any semantic type, it is skipped.", | |||
) | |||
exclude_columns = hyperparams.Set( | |||
elements=hyperparams.Hyperparameter[int](-1), | |||
default=(), | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", | |||
) | |||
add_index_columns = hyperparams.UniformBool( | |||
default=False, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Also include primary index columns if input data has them.", | |||
) | |||
class ExtractColumnsBySemanticTypesPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
A primitive which extracts columns from input data based on semantic types provided. | |||
Columns which match any of the listed semantic types are extracted. | |||
If you want to extract only attributes, you can use ``https://metadata.datadrivendiscovery.org/types/Attribute`` | |||
semantic type (also default). | |||
For real targets (not suggested targets) use ``https://metadata.datadrivendiscovery.org/types/Target``. | |||
For this to work, columns have to be are marked as targets by the TA2 in a dataset before passing the dataset | |||
through a pipeline. Or something else has to mark them at some point in a pipeline. | |||
It uses ``use_columns`` and ``exclude_columns`` to control which columns it considers. | |||
""" | |||
metadata = metadata_base.PrimitiveMetadata( | |||
{ | |||
'id': '4503a4c6-42f7-45a1-a1d4-ed69699cf5e1', | |||
'version': '0.4.0', | |||
'name': "Extracts columns by semantic type", | |||
'python_path': 'd3m.primitives.tods.data_processing.extract_columns_by_semantic_types', | |||
'source': { | |||
'name': common_primitives.__author__, | |||
'contact': 'mailto:mitar.commonprimitives@tnode.com', | |||
'uris': [ | |||
'https://gitlab.com/datadrivendiscovery/common-primitives/blob/master/common_primitives/extract_columns_semantic_types.py', | |||
'https://gitlab.com/datadrivendiscovery/common-primitives.git', | |||
], | |||
}, | |||
'installation': [{ | |||
'type': metadata_base.PrimitiveInstallationType.PIP, | |||
'package_uri': 'git+https://gitlab.com/datadrivendiscovery/common-primitives.git@{git_commit}#egg=common_primitives'.format( | |||
git_commit=d3m_utils.current_git_commit(os.path.dirname(__file__)), | |||
), | |||
}], | |||
'algorithm_types': [ | |||
metadata_base.PrimitiveAlgorithmType.ARRAY_SLICING, | |||
], | |||
'primitive_family': metadata_base.PrimitiveFamily.DATA_TRANSFORMATION, | |||
}, | |||
) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
columns_to_use = self._get_columns(inputs.metadata) | |||
output_columns = inputs.select_columns(columns_to_use) | |||
outputs = base_utils.combine_columns(inputs, columns_to_use, [output_columns], return_result='new', add_index_columns=self.hyperparams['add_index_columns']) | |||
return base.CallResult(outputs) | |||
def _can_use_column(self, inputs_metadata: metadata_base.DataMetadata, column_index: int) -> bool: | |||
column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) | |||
semantic_types = column_metadata.get('semantic_types', []) | |||
if self.hyperparams['match_logic'] == 'all': | |||
match = all(semantic_type in semantic_types for semantic_type in self.hyperparams['semantic_types']) | |||
elif self.hyperparams['match_logic'] == 'any': | |||
match = any(semantic_type in semantic_types for semantic_type in self.hyperparams['semantic_types']) | |||
elif self.hyperparams["match_logic"] == "equal": | |||
match = set(semantic_types) == set(self.hyperparams["semantic_types"]) | |||
else: | |||
raise exceptions.UnexpectedValueError("Unknown value of hyper-parameter \"match_logic\": {value}".format(value=self.hyperparams['match_logic'])) | |||
if self.hyperparams['negate']: | |||
return not match | |||
else: | |||
return match | |||
def _get_columns(self, inputs_metadata: metadata_base.DataMetadata) -> typing.Sequence[int]: | |||
def can_use_column(column_index: int) -> bool: | |||
return self._can_use_column(inputs_metadata, column_index) | |||
columns_to_use, columns_not_to_use = base_utils.get_columns_to_use(inputs_metadata, self.hyperparams['use_columns'], self.hyperparams['exclude_columns'], can_use_column) | |||
if not columns_to_use: | |||
raise ValueError("Input data has no columns matching semantic types: {semantic_types}".format( | |||
semantic_types=self.hyperparams['semantic_types'], | |||
)) | |||
if self.hyperparams['use_columns'] and columns_not_to_use: | |||
self.logger.warning("Not all specified columns match semantic types. Skipping columns: %(columns)s", { | |||
'columns': columns_not_to_use, | |||
}) | |||
return columns_to_use |
@@ -7,7 +7,9 @@ tods.data_processing.timestamp_validation = tods.data_processing.TimeStampValida | |||
tods.data_processing.duplication_validation = tods.data_processing.DuplicationValidation:DuplicationValidationPrimitive | |||
tods.data_processing.continuity_validation = tods.data_processing.ContinuityValidation:ContinuityValidationPrimitive | |||
tods.data_processing.impute_missing = tods.data_processing.SKImputer:SKImputerPrimitive | |||
tods.data_processing.column_parser = tods.data_processing.ColumnParser:ColumnParserPrimitive | |||
tods.data_processing.extract_columns_by_semantic_types = tods.data_processing.ExtractColumnsBySemanticTypes:ExtractColumnsBySemanticTypesPrimitive | |||
tods.data_processing.construct_predictions = tods.data_processing.ConstructPredictions:ConstructPredictionsPrimitive | |||
tods.timeseries_processing.transformation.axiswise_scaler = tods.timeseries_processing.SKAxiswiseScaler:SKAxiswiseScalerPrimitive | |||
tods.timeseries_processing.transformation.standard_scaler = tods.timeseries_processing.SKStandardScaler:SKStandardScalerPrimitive | |||