|
- from d3m import index
- from d3m.metadata.base import ArgumentType
- from d3m.metadata.pipeline import Pipeline, PrimitiveStep
-
- # -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
- # extract_columns_by_semantic_types(targets) -> ^
-
- # Creating pipeline
- pipeline_description = Pipeline()
- pipeline_description.add_input(name='inputs')
-
- # Step 0: dataset_to_dataframe
- primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
- step_0 = PrimitiveStep(primitive=primitive_0)
- step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
- step_0.add_output('produce')
- pipeline_description.add_step(step_0)
-
- # # Step 1: dataframe transformation
- # primitive_1 = index.get_primitive('d3m.primitives.data_transformation.SKPowerTransformer')
- # primitive_1 = index.get_primitive('d3m.primitives.data_transformation.SKStandardization')
- # primitive_1 = index.get_primitive('d3m.primitives.data_transformation.SKQuantileTransformer')
-
- #Step 1: column_parser
- step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
- step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
- step_1.add_output('produce')
- pipeline_description.add_step(step_1)
-
- primitive_2 = index.get_primitive('d3m.primitives.tods.data_processing.time_interval_transform')
- step_2 = PrimitiveStep(primitive=primitive_2)
- step_2.add_hyperparameter(name="time_interval", argument_type=ArgumentType.VALUE, data = '5T')
- step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
- step_2.add_output('produce')
- pipeline_description.add_step(step_2)
- #
- # # Step 2: column_parser
- # step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
- # step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
- # step_2.add_output('produce')
- # pipeline_description.add_step(step_2)
- #
- #
- # # Step 3: extract_columns_by_semantic_types(attributes)
- # step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
- # step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
- # step_3.add_output('produce')
- # step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
- # data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
- # pipeline_description.add_step(step_3)
- #
- # # Step 4: extract_columns_by_semantic_types(targets)
- # step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
- # step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
- # step_4.add_output('produce')
- # step_4.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
- # data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
- # pipeline_description.add_step(step_4)
- #
- # attributes = 'steps.3.produce'
- # targets = 'steps.4.produce'
- #
- # # Step 5: imputer
- # step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_cleaning.imputer.SKlearn'))
- # step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
- # step_5.add_output('produce')
- # pipeline_description.add_step(step_5)
- #
- # # Step 6: random_forest
- # step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.regression.random_forest.SKlearn'))
- # step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
- # step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets)
- # step_6.add_output('produce')
- # pipeline_description.add_step(step_6)
-
- # Final Output
- pipeline_description.add_output(name='output predictions', data_reference='steps.1.produce')
-
- # Output to YAML
- yaml = pipeline_description.to_yaml()
- with open('pipeline.yml', 'w') as f:
- f.write(yaml)
- print(yaml)
-
- # Or you can output json
- #data = pipline_description.to_json()
|