Former-commit-id:mastere09e96da62
[formerlya8cb5982ed
] [formerlyec7bbd0204
[formerlyc1b0617dda
]] [formerly8d51c3dc71
[formerlyc9abb06f7f
] [formerly3a77611d4a
[formerlybeb337f107
]]] [formerly92c7972ca4
[formerlyc5e6426304
] [formerlyd929ff85bd
[formerly3e436d432a
]] [formerly7639c8a1e2
[formerlyc1b3c51db7
] [formerlyf55464bfac
[formerlyfc5e005ecb
]]]] [formerly53b8e8d0d1
[formerly0e6a992a60
] [formerly4d6ea58f86
[formerly571088207a
]] [formerly2ad36dabad
[formerly75fc126cd1
] [formerlyb307aaca22
[formerlya73468b058
]]] [formerly6a13942666
[formerlya6d09fa80d
] [formerlya8010c86ee
[formerly68144e3eed
]] [formerly848f3e7cdb
[formerlya2c824d305
] [formerlya6577468a1
[formerly5e237b61f1
]]]]] [formerly3ed63112d3
[formerly1baf9e8af2
] [formerlyda7c68ac57
[formerly147d719c68
]] [formerly54c5c07e6c
[formerly4ad822cf98
] [formerlydb190aa1e0
[formerly55e8dd2418
]]] [formerlye7da822bac
[formerly3464d83a01
] [formerly18c8f4f009
[formerly15a28c1053
]] [formerlyfe817c3d70
[formerlyedd625b115
] [formerly4aa5a5468a
[formerlyc276f626e8
]]]] [formerly2bc7ff3518
[formerly8012cd1139
] [formerlye399702d77
[formerly585f464185
]] [formerlyd367a3d971
[formerlyf9b66ea5e1
] [formerly097a9e5378
[formerly2a2f4c0c01
]]] [formerlycfc2bdb341
[formerly89c9106d1f
] [formerly2b01232957
[formerly6c018ff487
]] [formerly67a18efc65
[formerly045590cf25
] [formerly7e64f61103
[formerly9bdc4fe665
]]]]]] Former-commit-id:73550fce0f
[formerly634af6f3b9
] [formerlyd6aaa03889
[formerly682bdc5673
]] [formerly03083e9997
[formerlybf83c73a28
] [formerlyc2363b6a7a
[formerlycd1046fc88
]]] [formerlyee07c8ccd9
[formerly608cf89946
] [formerly84204596fc
[formerly70057be770
]] [formerly551af69bb0
[formerly91c21a4459
] [formerlyc2fda8a1e7
[formerly508383cffd
]]]] [formerly963dcb939a
[formerly54735416d8
] [formerly30b6124a12
[formerly4053364617
]] [formerly55753c5ef4
[formerly0e10bdc7fa
] [formerly407ac8e875
[formerlyc3377650e8
]]] [formerlyefd116f4b4
[formerlybc59622245
] [formerly0c0e6d431d
[formerly7d3bd74e79
]] [formerly5d5a669341
[formerlydf92dd3f5f
] [formerly7e64f61103
]]]] Former-commit-id:34bba1d69b
[formerly9811462869
] [formerlyd322125fc5
[formerly84aaeeb8a9
]] [formerlyaa96e46a9a
[formerly69b07ace14
] [formerlycb41af137f
[formerly008d0ade6e
]]] [formerly241d534906
[formerly6e2c83b4e2
] [formerly5d6a805f2e
[formerly2819c3ed8c
]] [formerly26a162b4e9
[formerlyd29919bc4f
] [formerly8f226ed24a
[formerly622231b2f8
]]]] Former-commit-id:654b1d582a
[formerlydb0d68e092
] [formerly53d73a7b4e
[formerly0e46b0f61f
]] [formerlyb231e00228
[formerlyd5f2960b1e
] [formerlybef506be86
[formerly8c25f9e63f
]]] Former-commit-id:02c292f318
[formerly68b1db509b
] [formerlyaae9c16fc7
[formerlyad5347c30f
]] Former-commit-id:66964b0c36
[formerlyfd39d202ba
] Former-commit-id:af3b0a0a42
@@ -10,19 +10,19 @@ pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
@@ -30,7 +30,7 @@ step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALU | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
@@ -53,7 +53,7 @@ step_5.add_output('produce') | |||
pipeline_description.add_step(step_5) | |||
# Step 6: Predictions | |||
step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) | |||
step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions')) | |||
step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') | |||
step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_6.add_output('produce') | |||
@@ -1,70 +0,0 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
pipeline_description.add_step(step_3) | |||
attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
# Step 4: processing | |||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_4.add_output('produce') | |||
pipeline_description.add_step(step_4) | |||
# Step 5: algorithm` | |||
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.auto_correlation')) | |||
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') | |||
step_5.add_output('produce') | |||
pipeline_description.add_step(step_5) | |||
# Step 6: Predictions | |||
step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) | |||
step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') | |||
step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_6.add_output('produce') | |||
pipeline_description.add_step(step_6) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') | |||
# Output to json | |||
data = pipeline_description.to_json() | |||
with open('example_pipeline.json', 'w') as f: | |||
f.write(data) | |||
print(data) | |||
@@ -1,70 +0,0 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
pipeline_description.add_step(step_3) | |||
attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
# Step 4: processing | |||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_4.add_output('produce') | |||
pipeline_description.add_step(step_4) | |||
# Step 5: algorithm` | |||
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.deeplog')) | |||
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') | |||
step_5.add_output('produce') | |||
pipeline_description.add_step(step_5) | |||
# Step 6: Predictions | |||
step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) | |||
step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') | |||
step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_6.add_output('produce') | |||
pipeline_description.add_step(step_6) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') | |||
# Output to json | |||
data = pipeline_description.to_json() | |||
with open('example_pipeline.json', 'w') as f: | |||
f.write(data) | |||
print(data) | |||
@@ -1,72 +0,0 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
pipeline_description.add_step(step_3) | |||
attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
# Step 4: auto encoder | |||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae')) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_4.add_output('produce_score') | |||
step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=[0,1,2]) | |||
step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
pipeline_description.add_step(step_4) | |||
# Step 5: ensemble | |||
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.Ensemble')) | |||
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce_score') | |||
step_5.add_output('produce') | |||
pipeline_description.add_step(step_5) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') | |||
# Output to YAML | |||
#yaml = pipeline_description.to_yaml() | |||
#with open('pipeline.yml', 'w') as f: | |||
# f.write(yaml) | |||
#prin(yaml) | |||
# Output to json | |||
data = pipeline_description.to_json() | |||
with open('example_pipeline.json', 'w') as f: | |||
f.write(data) | |||
print(data) |
@@ -1,103 +0,0 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.data_transformation.column_parser.Common') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
pipeline_description.add_step(step_3) | |||
attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
# Step 4: Power transformation | |||
primitive_4 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.power_transformer') | |||
step_4 = PrimitiveStep(primitive=primitive_4) | |||
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_4.add_output('produce') | |||
pipeline_description.add_step(step_4) | |||
# Step 5: Axiswise scaling | |||
primitive_5 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler') | |||
step_5 = PrimitiveStep(primitive=primitive_5) | |||
step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') | |||
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') | |||
step_5.add_output('produce') | |||
pipeline_description.add_step(step_5) | |||
# Step 6: Standarization | |||
primitive_6 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
step_6 = PrimitiveStep(primitive=primitive_6) | |||
step_6.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') | |||
step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') | |||
step_6.add_output('produce') | |||
pipeline_description.add_step(step_6) | |||
# Step 7: Quantile transformation | |||
primitive_7 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.quantile_transformer') | |||
step_7 = PrimitiveStep(primitive=primitive_7) | |||
step_7.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') | |||
step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce') | |||
step_7.add_output('produce') | |||
pipeline_description.add_step(step_7) | |||
# Step 4: Isolation Forest | |||
primitive_8 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_iforest') | |||
step_8 = PrimitiveStep(primitive=primitive_8) | |||
step_8.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
step_8.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.7.produce') | |||
# step_8.add_output('produce_score') | |||
step_8.add_output('produce') | |||
pipeline_description.add_step(step_8) | |||
# Step 5: Predictions | |||
step_9 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) | |||
step_9.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.8.produce') | |||
step_9.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_9.add_output('produce') | |||
pipeline_description.add_step(step_9) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.9.produce') | |||
# Output to json | |||
data = pipeline_description.to_json() | |||
with open('example_pipeline.json', 'w') as f: | |||
f.write(data) | |||
print(data) | |||
## Output to YAML | |||
#yaml = pipeline_description.to_yaml() | |||
#with open('pipeline.yml', 'w') as f: | |||
# f.write(yaml) | |||
#print(yaml) |
@@ -1,72 +0,0 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
from d3m.metadata import hyperparams | |||
import copy | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
step_0 = PrimitiveStep(primitive=primitive_0) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# # Step 1: column_parser | |||
primitive_1 = index.get_primitive('d3m.primitives.data_transformation.column_parser.Common') | |||
step_1 = PrimitiveStep(primitive=primitive_1) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
pipeline_description.add_step(step_3) | |||
attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
# Step 4: test primitive | |||
primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_loda') | |||
step_4 = PrimitiveStep(primitive=primitive_4) | |||
step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_4.add_output('produce') | |||
pipeline_description.add_step(step_4) | |||
# Step 5: Predictions | |||
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) | |||
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') | |||
step_5.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_5.add_output('produce') | |||
pipeline_description.add_step(step_5) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') | |||
# Output to json | |||
data = pipeline_description.to_json() | |||
with open('example_pipeline.json', 'w') as f: | |||
f.write(data) | |||
print(data) | |||
@@ -1,70 +0,0 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
pipeline_description.add_step(step_3) | |||
attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
# Step 4: processing | |||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_4.add_output('produce') | |||
pipeline_description.add_step(step_4) | |||
# Step 5: algorithm` | |||
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.matrix_profile')) | |||
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') | |||
step_5.add_output('produce') | |||
pipeline_description.add_step(step_5) | |||
# Step 6: Predictions | |||
step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) | |||
step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') | |||
step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_6.add_output('produce') | |||
pipeline_description.add_step(step_6) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') | |||
# Output to json | |||
data = pipeline_description.to_json() | |||
with open('example_pipeline.json', 'w') as f: | |||
f.write(data) | |||
print(data) | |||
@@ -1,70 +0,0 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
pipeline_description.add_step(step_3) | |||
attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
# Step 4: processing | |||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_4.add_output('produce') | |||
pipeline_description.add_step(step_4) | |||
# Step 5: algorithm` | |||
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_sod')) | |||
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') | |||
step_5.add_output('produce') | |||
pipeline_description.add_step(step_5) | |||
# Step 6: Predictions | |||
step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) | |||
step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') | |||
step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_6.add_output('produce') | |||
pipeline_description.add_step(step_6) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') | |||
# Output to json | |||
data = pipeline_description.to_json() | |||
with open('example_pipeline.json', 'w') as f: | |||
f.write(data) | |||
print(data) | |||
@@ -1,74 +0,0 @@ | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
# extract_columns_by_semantic_types(targets) -> ^ | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
pipeline_description.add_step(step_3) | |||
attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
# Step 4: auto encoder | |||
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae')) | |||
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
step_4.add_output('produce_score') | |||
#step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=[2]) | |||
#step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
pipeline_description.add_step(step_4) | |||
# Step 5: ensemble | |||
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.system_wise_detection')) | |||
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce_score') | |||
step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') | |||
step_5.add_output('produce') | |||
pipeline_description.add_step(step_5) | |||
# Final Output | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') | |||
# Output to YAML | |||
#yaml = pipeline_description.to_yaml() | |||
#with open('pipeline.yml', 'w') as f: | |||
# f.write(yaml) | |||
#prin(yaml) | |||
# Output to json | |||
data = pipeline_description.to_json() | |||
with open('example_pipeline.json', 'w') as f: | |||
f.write(data) | |||
print(data) |
@@ -9,13 +9,13 @@ this_path = os.path.dirname(os.path.abspath(__file__)) | |||
#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset | |||
parser = argparse.ArgumentParser(description='Arguments for running predefined pipelin.') | |||
parser.add_argument('--table_path', type=str, default=os.path.join(this_path, '../datasets/yahoo_sub_5.csv'), | |||
parser.add_argument('--table_path', type=str, default=os.path.join(this_path, '../../datasets/yahoo_sub_5.csv'), | |||
help='Input the path of the input data table') | |||
parser.add_argument('--target_index', type=int, default=6, | |||
help='Index of the ground truth (for evaluation)') | |||
parser.add_argument('--metric',type=str, default='F1_MACRO', | |||
help='Evaluation Metric (F1, F1_MACRO)') | |||
parser.add_argument('--pipeline_path', default=os.path.join(this_path, './example_pipeline.json'), | |||
parser.add_argument('--pipeline_path', default=os.path.join(this_path, './example_pipelines/abod_pipeline.json'), | |||
help='Input the path of the pre-built pipeline description') | |||
# parser.add_argument('--pipeline_path', default=os.path.join(this_path, '../tods/resources/default_pipeline.json'), | |||
# help='Input the path of the pre-built pipeline description') | |||
@@ -33,6 +33,9 @@ dataset = generate_dataset(df, target_index) | |||
# Load the default pipeline | |||
pipeline = load_pipeline(pipeline_path) | |||
print(dir(pipeline)) | |||
print(pipeline.steps) | |||
print(dir(dataset)) | |||
# Run the pipeline | |||
pipeline_result = evaluate_pipeline(dataset, pipeline, metric) | |||
@@ -0,0 +1,148 @@ | |||
import os | |||
import pickle | |||
import unittest | |||
from d3m import container | |||
from d3m.metadata import base as metadata_base | |||
from tods.common import FixedSplit | |||
class FixedSplitDatasetSplitPrimitiveTestCase(unittest.TestCase): | |||
def test_produce_train_values(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
# We set semantic types like runtime would. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
hyperparams_class = FixedSplit.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||
hyperparams = hyperparams_class.defaults().replace({ | |||
'primary_index_values': ['9', '11', '13'], | |||
}) | |||
# We want to make sure "primary_index_values" is encoded just as a list and not | |||
# a pickle because runtime populates this primitive as a list from a split file. | |||
self.assertEqual(hyperparams.values_to_json_structure(), {'primary_index_values': ['9', '11', '13'], 'row_indices': [], 'delete_recursive': False}) | |||
primitive = FixedSplit.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams) | |||
primitive.set_training_data(dataset=dataset) | |||
primitive.fit() | |||
# To test that pickling works. | |||
pickle.dumps(primitive) | |||
results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value | |||
self.assertEqual(len(results), 1) | |||
for dataset in results: | |||
self.assertEqual(len(dataset), 1) | |||
self.assertEqual(results[0]['learningData'].shape[0], 147) | |||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i not in [9, 11, 13]]) | |||
def test_produce_score_values(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
# We set semantic types like runtime would. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
hyperparams_class = fixed_split.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||
hyperparams = hyperparams_class.defaults().replace({ | |||
'primary_index_values': ['9', '11', '13'], | |||
}) | |||
# We want to make sure "primary_index_values" is encoded just as a list and not | |||
# a pickle because runtime populates this primitive as a list from a split file. | |||
self.assertEqual(hyperparams.values_to_json_structure(), {'primary_index_values': ['9', '11', '13'], 'row_indices': [], 'delete_recursive': False}) | |||
primitive = fixed_split.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams) | |||
primitive.set_training_data(dataset=dataset) | |||
primitive.fit() | |||
results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value | |||
self.assertEqual(len(results), 1) | |||
for dataset in results: | |||
self.assertEqual(len(dataset), 1) | |||
self.assertEqual(results[0]['learningData'].shape[0], 3) | |||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i in [9, 11, 13]]) | |||
def test_produce_train_indices(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
# We set semantic types like runtime would. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
hyperparams_class = fixed_split.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||
primitive = fixed_split.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
'row_indices': [9, 11, 13], | |||
})) | |||
primitive.set_training_data(dataset=dataset) | |||
primitive.fit() | |||
# To test that pickling works. | |||
pickle.dumps(primitive) | |||
results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value | |||
self.assertEqual(len(results), 1) | |||
for dataset in results: | |||
self.assertEqual(len(dataset), 1) | |||
self.assertEqual(results[0]['learningData'].shape[0], 147) | |||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i not in [9, 11, 13]]) | |||
def test_produce_score_indices(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
# We set semantic types like runtime would. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
hyperparams_class = fixed_split.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||
primitive = fixed_split.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
'row_indices': [9, 11, 13], | |||
})) | |||
primitive.set_training_data(dataset=dataset) | |||
primitive.fit() | |||
results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value | |||
self.assertEqual(len(results), 1) | |||
for dataset in results: | |||
self.assertEqual(len(dataset), 1) | |||
self.assertEqual(results[0]['learningData'].shape[0], 3) | |||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i in [9, 11, 13]]) | |||
if __name__ == '__main__': | |||
unittest.main() |
@@ -0,0 +1,100 @@ | |||
import os | |||
import pickle | |||
import unittest | |||
from d3m import container | |||
from d3m.metadata import base as metadata_base | |||
from common_primitives import kfold_split | |||
class KFoldDatasetSplitPrimitiveTestCase(unittest.TestCase): | |||
def test_produce_train(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
# We set semantic types like runtime would. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
hyperparams_class = kfold_split.KFoldDatasetSplitPrimitive.metadata.get_hyperparams() | |||
primitive = kfold_split.KFoldDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
'number_of_folds': 10, | |||
'shuffle': True, | |||
'delete_recursive': True, | |||
})) | |||
primitive.set_training_data(dataset=dataset) | |||
primitive.fit() | |||
# To test that pickling works. | |||
pickle.dumps(primitive) | |||
results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value | |||
self.assertEqual(len(results), 2) | |||
for dataset in results: | |||
self.assertEqual(len(dataset), 4) | |||
self.assertEqual(results[0]['codes'].shape[0], 3) | |||
self.assertEqual(results[1]['codes'].shape[0], 3) | |||
self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) | |||
self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 40) | |||
self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) | |||
self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ccc', 'ddd', 'eee'}) | |||
self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990', '2000', '2010'}) | |||
self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) | |||
self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 40) | |||
self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) | |||
self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ccc', 'ddd', 'eee'}) | |||
self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'1990', '2000', '2010'}) | |||
def test_produce_score(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
# We set semantic types like runtime would. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
hyperparams_class = kfold_split.KFoldDatasetSplitPrimitive.metadata.get_hyperparams() | |||
primitive = kfold_split.KFoldDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
'number_of_folds': 10, | |||
'shuffle': True, | |||
'delete_recursive': True, | |||
})) | |||
primitive.set_training_data(dataset=dataset) | |||
primitive.fit() | |||
results = primitive.produce_score_data(inputs=container.List([0, 1], generate_metadata=True)).value | |||
self.assertEqual(len(results), 2) | |||
for dataset in results: | |||
self.assertEqual(len(dataset), 4) | |||
self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB'}) | |||
self.assertEqual(set(results[0]['learningData'].iloc[:, 0]), {'5', '11', '28', '31', '38'}) | |||
self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB'}) | |||
self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ddd', 'eee'}) | |||
self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990', '2000'}) | |||
self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'BBB', 'CCC'}) | |||
self.assertEqual(set(results[1]['learningData'].iloc[:, 0]), {'12', '26', '29', '32', '39'}) | |||
self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'BBB', 'CCC'}) | |||
self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'bbb', 'ccc', 'ddd', 'eee'}) | |||
self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'1990', '2000', '2010'}) | |||
if __name__ == '__main__': | |||
unittest.main() |
@@ -0,0 +1,223 @@ | |||
import os | |||
import pickle | |||
import unittest | |||
from d3m import container | |||
from d3m.metadata import base as metadata_base | |||
from common_primitives import kfold_split_timeseries | |||
class KFoldTimeSeriesSplitPrimitiveTestCase(unittest.TestCase): | |||
def test_produce_train_timeseries_1(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'timeseries_dataset_1', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
# We set semantic types like runtime would. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||
folds = 5 | |||
primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
'number_of_folds': folds, | |||
'number_of_window_folds': 1, | |||
})) | |||
primitive.set_training_data(dataset=dataset) | |||
primitive.fit() | |||
# To test that pickling works. | |||
pickle.dumps(primitive) | |||
results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value | |||
self.assertEqual(len(results), 2) | |||
for dataset in results: | |||
self.assertEqual(len(dataset), 1) | |||
self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 8) | |||
self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'2013-11-05', '2013-11-06', '2013-11-07', '2013-11-08', '2013-11-11', | |||
'2013-11-12', '2013-11-13', '2013-11-14'}) | |||
self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 8) | |||
self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2013-11-13', '2013-11-14', '2013-11-15', '2013-11-18', '2013-11-19', | |||
'2013-11-20', '2013-11-21', '2013-11-22'}) | |||
def test_produce_score_timeseries_1(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'timeseries_dataset_1', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
# We set semantic types like runtime would. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||
folds = 5 | |||
primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
'number_of_folds': folds, | |||
'number_of_window_folds': 1, | |||
})) | |||
primitive.set_training_data(dataset=dataset) | |||
primitive.fit() | |||
results = primitive.produce_score_data(inputs=container.List([0, 1], generate_metadata=True)).value | |||
self.assertEqual(len(results), 2) | |||
for dataset in results: | |||
self.assertEqual(len(dataset), 1) | |||
self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 6) | |||
self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'2013-11-15', '2013-11-18', '2013-11-19', | |||
'2013-11-20', '2013-11-21', '2013-11-22'}) | |||
self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 6) | |||
self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2013-11-25', '2013-11-26', '2013-11-27', | |||
'2013-11-29', '2013-12-02', '2013-12-03'}) | |||
def test_produce_train(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
# We set semantic types like runtime would. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
# We fake that the dataset is time-series. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 3), 'https://metadata.datadrivendiscovery.org/types/Time') | |||
hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||
folds = 5 | |||
primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
'number_of_folds': folds, | |||
'number_of_window_folds': 1, | |||
})) | |||
primitive.set_training_data(dataset=dataset) | |||
primitive.fit() | |||
# To test that pickling works. | |||
pickle.dumps(primitive) | |||
results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value | |||
self.assertEqual(len(results), 2) | |||
for dataset in results: | |||
self.assertEqual(len(dataset), 4) | |||
self.assertEqual(results[0]['codes'].shape[0], 3) | |||
self.assertEqual(results[1]['codes'].shape[0], 3) | |||
self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) | |||
self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 9) | |||
self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) | |||
self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'bbb', 'ccc', 'ddd'}) | |||
self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990'}) | |||
self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) | |||
self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 9) | |||
self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) | |||
self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ddd', 'eee'}) | |||
self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'1990', '2000'}) | |||
def test_produce_score(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
# We set semantic types like runtime would. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
# We fake that the dataset is time-series. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 3), 'https://metadata.datadrivendiscovery.org/types/Time') | |||
hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||
folds = 5 | |||
primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
'number_of_folds': folds, | |||
'number_of_window_folds': 1, | |||
})) | |||
primitive.set_training_data(dataset=dataset) | |||
primitive.fit() | |||
results = primitive.produce_score_data(inputs=container.List([0, 1], generate_metadata=True)).value | |||
self.assertEqual(len(results), 2) | |||
for dataset in results: | |||
self.assertEqual(len(dataset), 4) | |||
self.assertEqual(results[0]['codes'].shape[0], 3) | |||
self.assertEqual(results[1]['codes'].shape[0], 3) | |||
self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) | |||
self.assertEqual(set(results[0]['learningData'].iloc[:, 0]), {'2', '3', '32', '33', '37', '38', '39'}) | |||
self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) | |||
self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'aaa', 'ddd', 'eee'}) | |||
self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990', '2000'}) | |||
self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) | |||
self.assertEqual(set(results[1]['learningData'].iloc[:, 0]), {'22', '23', '24', '31', '40', '41', '42'}) | |||
self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) | |||
self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'ccc', 'ddd', 'eee'}) | |||
self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2000'}) | |||
def test_unsorted_datetimes_timeseries_4(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'timeseries_dataset_4', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
# We set semantic types like runtime would. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||
folds = 5 | |||
primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
'number_of_folds': folds, | |||
'number_of_window_folds': 1, | |||
})) | |||
primitive.set_training_data(dataset=dataset) | |||
primitive.fit() | |||
# To test that pickling works. | |||
pickle.dumps(primitive) | |||
results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value | |||
self.assertEqual(len(results), 2) | |||
for dataset in results: | |||
self.assertEqual(len(dataset), 1) | |||
self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 8) | |||
self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'2013-11-05', '2013-11-06', '2013-11-07', '2013-11-08', '2013-11-11', | |||
'2013-11-12', '2013-11-13', '2013-11-14'}) | |||
self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 8) | |||
self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2013-11-13', '2013-11-14', '2013-11-15', '2013-11-18', '2013-11-19', | |||
'2013-11-20', '2013-11-21', '2013-11-22'}) | |||
if __name__ == '__main__': | |||
unittest.main() |
@@ -0,0 +1,71 @@ | |||
import os | |||
import pickle | |||
import unittest | |||
from d3m import container | |||
from d3m.metadata import base as metadata_base | |||
from common_primitives import no_split | |||
class NoSplitDatasetSplitPrimitiveTestCase(unittest.TestCase): | |||
def test_produce_train(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
# We set semantic types like runtime would. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
hyperparams_class = no_split.NoSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||
primitive = no_split.NoSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults()) | |||
primitive.set_training_data(dataset=dataset) | |||
primitive.fit() | |||
# To test that pickling works. | |||
pickle.dumps(primitive) | |||
results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value | |||
self.assertEqual(len(results), 1) | |||
for dataset in results: | |||
self.assertEqual(len(dataset), 1) | |||
self.assertEqual(results[0]['learningData'].shape[0], 150) | |||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150)]) | |||
def test_produce_score(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
# We set semantic types like runtime would. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
hyperparams_class = no_split.NoSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||
primitive = no_split.NoSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults()) | |||
primitive.set_training_data(dataset=dataset) | |||
primitive.fit() | |||
results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value | |||
self.assertEqual(len(results), 1) | |||
for dataset in results: | |||
self.assertEqual(len(dataset), 1) | |||
self.assertEqual(results[0]['learningData'].shape[0], 150) | |||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150)]) | |||
if __name__ == '__main__': | |||
unittest.main() |
@@ -0,0 +1,173 @@ | |||
import os | |||
import unittest | |||
from d3m import container, utils | |||
from d3m.metadata import base as metadata_base | |||
from common_primitives import redact_columns | |||
class RedactColumnsPrimitiveTestCase(unittest.TestCase): | |||
def _get_datasets(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
# We set semantic types like runtime would. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
datasets = container.List([dataset], { | |||
'schema': metadata_base.CONTAINER_SCHEMA_VERSION, | |||
'structural_type': container.List, | |||
'dimension': { | |||
'length': 1, | |||
}, | |||
}, generate_metadata=False) | |||
# We update metadata based on metadata of each dataset. | |||
# TODO: In the future this might be done automatically by generate_metadata. | |||
# See: https://gitlab.com/datadrivendiscovery/d3m/issues/119 | |||
for index, dataset in enumerate(datasets): | |||
datasets.metadata = dataset.metadata.copy_to(datasets.metadata, (), (index,)) | |||
return dataset_doc_path, datasets | |||
def test_basic(self): | |||
dataset_doc_path, datasets = self._get_datasets() | |||
hyperparams_class = redact_columns.RedactColumnsPrimitive.metadata.get_hyperparams() | |||
primitive = redact_columns.RedactColumnsPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
'semantic_types': ('https://metadata.datadrivendiscovery.org/types/TrueTarget',), | |||
'add_semantic_types': ('https://metadata.datadrivendiscovery.org/types/RedactedTarget', 'https://metadata.datadrivendiscovery.org/types/MissingData'), | |||
})) | |||
redacted_datasets = primitive.produce(inputs=datasets).value | |||
self.assertTrue(len(redacted_datasets), 1) | |||
redacted_dataset = redacted_datasets[0] | |||
self.assertIsInstance(redacted_dataset, container.Dataset) | |||
self.assertEqual(redacted_dataset['learningData']['species'].values.tolist(), [''] * 150) | |||
self._test_metadata(redacted_datasets.metadata, dataset_doc_path, True) | |||
self._test_metadata(redacted_dataset.metadata, dataset_doc_path, False) | |||
def _test_metadata(self, metadata, dataset_doc_path, is_list): | |||
top_metadata = { | |||
'structural_type': 'd3m.container.dataset.Dataset', | |||
'id': 'iris_dataset_1', | |||
'version': '4.0.0', | |||
'name': 'Iris Dataset', | |||
'location_uris': [ | |||
'file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path), | |||
], | |||
'dimension': { | |||
'name': 'resources', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'], | |||
'length': 1, | |||
}, | |||
'digest': '49404bf166238fbdac2b6d6baa899a0d1bf8ed5976525fa7353fd732ac218a85', | |||
'source': { | |||
'license': 'CC', | |||
'redacted': False, | |||
'human_subjects_research': False, | |||
}, | |||
} | |||
if is_list: | |||
prefix = [0] | |||
list_metadata = [{ | |||
'selector': [], | |||
'metadata': { | |||
'dimension': { | |||
'length': 1, | |||
}, | |||
'schema': metadata_base.CONTAINER_SCHEMA_VERSION, | |||
'structural_type': 'd3m.container.list.List', | |||
}, | |||
}] | |||
else: | |||
prefix = [] | |||
list_metadata = [] | |||
top_metadata['schema'] = metadata_base.CONTAINER_SCHEMA_VERSION | |||
self.assertEqual(utils.to_json_structure(metadata.to_internal_simple_structure()), list_metadata + [{ | |||
'selector': prefix + [], | |||
'metadata': top_metadata, | |||
}, { | |||
'selector': prefix + ['learningData'], | |||
'metadata': { | |||
'structural_type': 'd3m.container.pandas.DataFrame', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table', 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint'], | |||
'dimension': { | |||
'name': 'rows', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], | |||
'length': 150, | |||
}, | |||
}, | |||
}, { | |||
'selector': prefix + ['learningData', '__ALL_ELEMENTS__'], | |||
'metadata': { | |||
'dimension': { | |||
'name': 'columns', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], | |||
'length': 6, | |||
}, | |||
}, | |||
}, { | |||
'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 0], | |||
'metadata': { | |||
'name': 'd3mIndex', | |||
'structural_type': 'str', | |||
'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], | |||
}, | |||
}, { | |||
'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 1], | |||
'metadata': { | |||
'name': 'sepalLength', | |||
'structural_type': 'str', | |||
'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], | |||
}, | |||
}, { | |||
'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 2], | |||
'metadata': { | |||
'name': 'sepalWidth', | |||
'structural_type': 'str', | |||
'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], | |||
}, | |||
}, { | |||
'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 3], | |||
'metadata': { | |||
'name': 'petalLength', | |||
'structural_type': 'str', | |||
'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], | |||
}, | |||
}, { | |||
'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 4], | |||
'metadata': { | |||
'name': 'petalWidth', | |||
'structural_type': 'str', | |||
'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], | |||
}, | |||
}, { | |||
'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 5], | |||
'metadata': { | |||
'name': 'species', | |||
'structural_type': 'str', | |||
'semantic_types': [ | |||
'https://metadata.datadrivendiscovery.org/types/CategoricalData', | |||
'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', | |||
'https://metadata.datadrivendiscovery.org/types/Target', | |||
'https://metadata.datadrivendiscovery.org/types/TrueTarget', | |||
'https://metadata.datadrivendiscovery.org/types/RedactedTarget', | |||
'https://metadata.datadrivendiscovery.org/types/MissingData', | |||
], | |||
}, | |||
}]) | |||
if __name__ == '__main__': | |||
unittest.main() |
@@ -0,0 +1,130 @@ | |||
import os | |||
import pickle | |||
import unittest | |||
from d3m import container | |||
from d3m.metadata import base as metadata_base | |||
from common_primitives import train_score_split | |||
class TrainScoreDatasetSplitPrimitiveTestCase(unittest.TestCase): | |||
def test_produce_train(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
# We set semantic types like runtime would. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
hyperparams_class = train_score_split.TrainScoreDatasetSplitPrimitive.metadata.get_hyperparams() | |||
primitive = train_score_split.TrainScoreDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
'shuffle': True, | |||
})) | |||
primitive.set_training_data(dataset=dataset) | |||
primitive.fit() | |||
# To test that pickling works. | |||
pickle.dumps(primitive) | |||
results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value | |||
self.assertEqual(len(results), 1) | |||
for dataset in results: | |||
self.assertEqual(len(dataset), 1) | |||
self.assertEqual(results[0]['learningData'].shape[0], 112) | |||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [ | |||
'0', '1', '2', '3', '4', '5', '6', '9', '10', '11', '12', '13', '14', '15', '17', '19', '20', | |||
'21', '23', '25', '28', '29', '30', '31', '32', '34', '35', '36', '38', '39', '41', '42', '43', | |||
'46', '47', '48', '49', '50', '52', '53', '55', '56', '57', '58', '60', '61', '64', '65', '67', | |||
'68', '69', '70', '72', '74', '75', '77', '79', '80', '81', '82', '85', '87', '88', '89', '91', | |||
'92', '94', '95', '96', '98', '99', '101', '102', '103', '104', '105', '106', '108', '109', '110', | |||
'111', '112', '113', '115', '116', '117', '118', '119', '120', '122', '123', '124', '125', '128', | |||
'129', '130', '131', '133', '135', '136', '138', '139', '140', '141', '142', '143', '144', '145', | |||
'146', '147', '148', '149', | |||
]) | |||
self.assertEqual(results.metadata.query((0, 'learningData'))['dimension']['length'], 112) | |||
column_names = ['d3mIndex', 'sepalLength', 'sepalWidth', 'petalLength', 'petalWidth', 'species'] | |||
for i in range(6): | |||
self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['name'], column_names[i]) | |||
self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 0))['semantic_types'], ( | |||
"http://schema.org/Integer", "https://metadata.datadrivendiscovery.org/types/PrimaryKey" | |||
)) | |||
for i in range(1, 5): | |||
self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['semantic_types'], ( | |||
'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute' | |||
)) | |||
self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 5))['semantic_types'],( | |||
'https://metadata.datadrivendiscovery.org/types/CategoricalData', | |||
'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', | |||
'https://metadata.datadrivendiscovery.org/types/Target', | |||
'https://metadata.datadrivendiscovery.org/types/TrueTarget' | |||
)) | |||
def test_produce_score(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
# We set semantic types like runtime would. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
hyperparams_class = train_score_split.TrainScoreDatasetSplitPrimitive.metadata.get_hyperparams() | |||
primitive = train_score_split.TrainScoreDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
'shuffle': True, | |||
})) | |||
primitive.set_training_data(dataset=dataset) | |||
primitive.fit() | |||
results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value | |||
self.assertEqual(len(results), 1) | |||
for dataset in results: | |||
self.assertEqual(len(dataset), 1) | |||
self.assertEqual(results[0]['learningData'].shape[0], 38) | |||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [ | |||
'7', '8', '16', '18', '22', '24', '26', '27', '33', '37', '40', '44', '45', '51', '54', | |||
'59', '62', '63', '66', '71', '73', '76', '78', '83', '84', '86', '90', '93', '97', '100', | |||
'107', '114', '121', '126', '127', '132', '134', '137', | |||
]) | |||
self.assertEqual(results.metadata.query((0, 'learningData'))['dimension']['length'], 38) | |||
column_names = ['d3mIndex', 'sepalLength', 'sepalWidth', 'petalLength', 'petalWidth', 'species'] | |||
for i in range(6): | |||
self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['name'], | |||
column_names[i]) | |||
self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 0))['semantic_types'], ( | |||
"http://schema.org/Integer", "https://metadata.datadrivendiscovery.org/types/PrimaryKey" | |||
)) | |||
for i in range(1, 5): | |||
self.assertEqual( | |||
results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['semantic_types'], ( | |||
'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute' | |||
)) | |||
self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 5))['semantic_types'], ( | |||
'https://metadata.datadrivendiscovery.org/types/CategoricalData', | |||
'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', | |||
'https://metadata.datadrivendiscovery.org/types/Target', | |||
'https://metadata.datadrivendiscovery.org/types/TrueTarget' | |||
)) | |||
if __name__ == '__main__': | |||
unittest.main() |