Browse Source

intial commit

Former-commit-id: 94043c2300 [formerly b45733a1d0] [formerly d5042bb136 [formerly 51bb45768e]] [formerly 95ffadd6d8 [formerly e90788f729] [formerly 4baa1bbd36 [formerly e7c713266c]]] [formerly 772bf095ea [formerly d03cf02caa] [formerly fea615997e [formerly fcae9cb8b6]] [formerly b8967be962 [formerly 55dbea29ed] [formerly 5f76bf379a [formerly 25720b7121]]]] [formerly 5d0c719dce [formerly f5dbbf1595] [formerly 836b4006c7 [formerly 7e18c6b333]] [formerly 7e83679307 [formerly 3f42064f1e] [formerly dcdf152eea [formerly 28079a22ff]]] [formerly 103942552a [formerly 4a083d8357] [formerly 3faae41350 [formerly a9cf746458]] [formerly f7a29ee08b [formerly 74f3ddb1cb] [formerly 8080c82fc1 [formerly 527e8c2ab3]]]]] [formerly 9c40821770 [formerly a0a0b218c0] [formerly 1a6449b3da [formerly f28562eed4]] [formerly 6d2aa95124 [formerly fe33e93915] [formerly 4fc4c5d693 [formerly 595c3bc468]]] [formerly d041990a94 [formerly 8928771710] [formerly 6ce7b1b42a [formerly 04df89fd7e]] [formerly 6a0bfe64a9 [formerly e348490342] [formerly 8ff59698a3 [formerly 45034f2ad2]]]] [formerly c382eece45 [formerly 9a3e9804d3] [formerly c6dd56bdfb [formerly 4316d74e90]] [formerly cd5cc08abf [formerly df4670ec94] [formerly a3920bd132 [formerly 4385082926]]] [formerly f2d68829fa [formerly d617b51f8e] [formerly 2c0363e761 [formerly d30774ccb9]] [formerly 1aefdb7a6d [formerly 0983b312de] [formerly 16af77255b [formerly 351d94a1a0]]]]]]
Former-commit-id: 0aac720ed9 [formerly 4c9b0637b0] [formerly e7ab703570 [formerly 065727f4b8]] [formerly a919abf3eb [formerly 6fa494b0b3] [formerly d7e6376187 [formerly ceee8b5ad6]]] [formerly 8135d4aaf5 [formerly 0477e8c377] [formerly 78272aa1a3 [formerly 43b5408fd2]] [formerly 8364ea76fa [formerly 14305a7625] [formerly 3f6573a401 [formerly c56850302d]]]] [formerly 4e2575e559 [formerly d8068f5026] [formerly e51a3dd42f [formerly 81dd7e2f1d]] [formerly f604061af2 [formerly e407b5d0b1] [formerly 8d3602120d [formerly e563b7e7c5]]] [formerly aab553bb2f [formerly 631d8d76c3] [formerly 1cec7b5f2f [formerly 945f34dbca]] [formerly 080e18f0be [formerly f1132fa762] [formerly 16af77255b]]]]
Former-commit-id: ff7eb7ad18 [formerly e2e1163ef1] [formerly abc1fff948 [formerly da6d629afa]] [formerly ff7dfe520b [formerly 015cc45265] [formerly 8f9148b059 [formerly 4d8370bc45]]] [formerly be941c6dc4 [formerly dde5829498] [formerly 69d9c1a20a [formerly fcc8faa993]] [formerly 71bb6b09de [formerly 7cb44f0d9d] [formerly 282bb41261 [formerly 6d91069889]]]]
Former-commit-id: e5a920084c [formerly e8ac010da9] [formerly dc836f921f [formerly 6fbd3b41e4]] [formerly 710c662c14 [formerly 737958f86d] [formerly b38a85b151 [formerly e9ce6a2129]]]
Former-commit-id: 46f2a522dd [formerly ba84173dab] [formerly 7990bdcb4c [formerly b6dc6ec409]]
Former-commit-id: 90c1b4b06e [formerly aab646e394]
Former-commit-id: 7532188e1b
master
Devesh Kumar 4 years ago
parent
commit
77d72c7b86
71 changed files with 4667 additions and 2 deletions
  1. +70
    -0
      new_tests/build_ABOD_pipline.py
  2. +67
    -0
      new_tests/build_AutoEncoder.py
  3. +71
    -0
      new_tests/build_AutoRegODetect_pipeline.py
  4. +50
    -0
      new_tests/build_AxiswiseScale_pipline.py
  5. +44
    -0
      new_tests/build_BKFilter_pipline.py
  6. +51
    -0
      new_tests/build_CBLOF_pipline.py
  7. +48
    -0
      new_tests/build_CategoricalToBinary.py
  8. +49
    -0
      new_tests/build_ColumnFilter_pipeline.py
  9. +43
    -0
      new_tests/build_ContinuityValidation_pipline.py
  10. +49
    -0
      new_tests/build_DeepLog_pipeline.py
  11. +50
    -0
      new_tests/build_DiscreteCosineTransform.py
  12. +42
    -0
      new_tests/build_DuplicationValidation_pipline.py
  13. +48
    -0
      new_tests/build_FastFourierTransform.py
  14. +68
    -0
      new_tests/build_HBOS_pipline.py
  15. +71
    -0
      new_tests/build_HBOS_score_pipline.py
  16. +46
    -0
      new_tests/build_HPFilter_pipline.py
  17. +76
    -0
      new_tests/build_HoltSmoothing_pipline.py
  18. +76
    -0
      new_tests/build_HoltWintersExponentialSmoothing_pipline.py
  19. +59
    -0
      new_tests/build_IsolationForest_pipline.py
  20. +71
    -0
      new_tests/build_KDiscord_pipeline.py
  21. +51
    -0
      new_tests/build_KNN_pipline.py
  22. +51
    -0
      new_tests/build_LODA_pipline.py
  23. +51
    -0
      new_tests/build_LOF_pipline.py
  24. +70
    -0
      new_tests/build_LSTMOD_pipline.py
  25. +49
    -0
      new_tests/build_MatrixProfile_pipeline.py
  26. +77
    -0
      new_tests/build_MeanAverageTransform_pipline.py
  27. +50
    -0
      new_tests/build_NonNegativeMatrixFactorization.py
  28. +51
    -0
      new_tests/build_OCSVM_pipline.py
  29. +71
    -0
      new_tests/build_PCAODetect_pipeline.py
  30. +49
    -0
      new_tests/build_PowerTransform_pipline.py
  31. +51
    -0
      new_tests/build_PyodCOF.py
  32. +49
    -0
      new_tests/build_QuantileTransform_pipline.py
  33. +54
    -0
      new_tests/build_RuleBasedFilter_pipline.py
  34. +49
    -0
      new_tests/build_SOD_pipeline.py
  35. +76
    -0
      new_tests/build_SimpleExponentialSmoothing_pipline.py
  36. +49
    -0
      new_tests/build_Standardize_pipline.py
  37. +44
    -0
      new_tests/build_TRMF_pipline.py
  38. +48
    -0
      new_tests/build_Telemanom.py
  39. +86
    -0
      new_tests/build_TimeIntervalTransform_pipeline.py
  40. +44
    -0
      new_tests/build_TruncatedSVD_pipline.py
  41. +67
    -0
      new_tests/build_VariationalAutoEncoder.py
  42. +64
    -0
      new_tests/build_WaveletTransform_pipline.py
  43. +50
    -0
      new_tests/build_test_detection_algorithm_PyodMoGaal.py
  44. +50
    -0
      new_tests/build_test_detection_algorithm_PyodSoGaal.py
  45. +61
    -0
      new_tests/build_test_feature_analysis_spectral_residual_transform_pipeline.py
  46. +62
    -0
      new_tests/build_test_feature_analysis_statistical_abs_energy.py
  47. +62
    -0
      new_tests/build_test_feature_analysis_statistical_abs_sum.py
  48. +62
    -0
      new_tests/build_test_feature_analysis_statistical_gmean.py
  49. +62
    -0
      new_tests/build_test_feature_analysis_statistical_hmean.py
  50. +62
    -0
      new_tests/build_test_feature_analysis_statistical_kurtosis.py
  51. +62
    -0
      new_tests/build_test_feature_analysis_statistical_maximum.py
  52. +62
    -0
      new_tests/build_test_feature_analysis_statistical_mean.py
  53. +62
    -0
      new_tests/build_test_feature_analysis_statistical_mean_abs.py
  54. +62
    -0
      new_tests/build_test_feature_analysis_statistical_mean_abs_temporal_derivative.py
  55. +62
    -0
      new_tests/build_test_feature_analysis_statistical_mean_temporal_derivative.py
  56. +62
    -0
      new_tests/build_test_feature_analysis_statistical_median.py
  57. +63
    -0
      new_tests/build_test_feature_analysis_statistical_median_absolute_deviation.py
  58. +62
    -0
      new_tests/build_test_feature_analysis_statistical_minimum.py
  59. +62
    -0
      new_tests/build_test_feature_analysis_statistical_skew.py
  60. +62
    -0
      new_tests/build_test_feature_analysis_statistical_std.py
  61. +62
    -0
      new_tests/build_test_feature_analysis_statistical_var.py
  62. +62
    -0
      new_tests/build_test_feature_analysis_statistical_variation.py
  63. +62
    -0
      new_tests/build_test_feature_analysis_statistical_vec_sum.py
  64. +62
    -0
      new_tests/build_test_feature_analysis_statistical_willison_amplitude.py
  65. +62
    -0
      new_tests/build_test_feature_analysis_statistical_zero_crossing.py
  66. +61
    -0
      new_tests/build_test_time_series_seasonality_trend_decomposition.py
  67. +1
    -1
      test.sh
  68. +398
    -0
      tods/data_processing/ColumnParser.py
  69. +261
    -0
      tods/data_processing/ContructPredictions.py
  70. +141
    -0
      tods/data_processing/ExtractColumnsBySemanticTypes.py
  71. +3
    -1
      tods/resources/.entry_points.ini

+ 70
- 0
new_tests/build_ABOD_pipline.py View File

@@ -0,0 +1,70 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: ABOD
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_abod'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')

step_5.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_5.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_5.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2, 4,))
step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='replace')

step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 67
- 0
new_tests/build_AutoEncoder.py View File

@@ -0,0 +1,67 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: auto encoder
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_5.add_output('produce')
pipeline_description.add_step(step_5)


# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 71
- 0
new_tests/build_AutoRegODetect_pipeline.py View File

@@ -0,0 +1,71 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import numpy as np

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# # Step 3: Standardization
primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(1,2,3,4,5,))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# # Step 4: test primitive
primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.AutoRegODetector')
step_4 = PrimitiveStep(primitive=primitive_4)
step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_4.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=10)
# step_4.add_hyperparameter(name='weights', argument_type=ArgumentType.VALUE, data=weights_ndarray)
step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False)
# step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) # There is sth wrong with multi-dimensional
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_4.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True)
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_output('produce')
step_4.add_output('produce_score')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 50
- 0
new_tests/build_AxiswiseScale_pipline.py View File

@@ -0,0 +1,50 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)


# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 44
- 0
new_tests/build_BKFilter_pipline.py View File

@@ -0,0 +1,44 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)


# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)


# Step 2: BKFilter
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.bk_filter'))
# step_2.add_hyperparameter(name = 'columns_using_method', argument_type=ArgumentType.VALUE, data = 'name')
step_2.add_hyperparameter(name = 'use_semantic_types', argument_type=ArgumentType.VALUE, data = True)
step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = (2,3))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 51
- 0
new_tests/build_CBLOF_pipline.py View File

@@ -0,0 +1,51 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_cblof')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 48
- 0
new_tests/build_CategoricalToBinary.py View File

@@ -0,0 +1,48 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')


# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: Column Parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: Categorical to Binary
primitive_2 = index.get_primitive('d3m.primitives.tods.data_processing.categorical_to_binary')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(3,))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)


# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 49
- 0
new_tests/build_ColumnFilter_pipeline.py View File

@@ -0,0 +1,49 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

#Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.auto_correlation')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True)
step_2.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (2, 3))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

primitive_3 = index.get_primitive('d3m.primitives.tods.data_processing.column_filter')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 43
- 0
new_tests/build_ContinuityValidation_pipline.py View File

@@ -0,0 +1,43 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)


# Step 2: ContinuityValidation
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.continuity_validation'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name = 'continuity_option', argument_type=ArgumentType.VALUE, data = 'imputation')
step_2.add_hyperparameter(name = 'interval', argument_type=ArgumentType.VALUE, data = 0.3)
# Or:
# step_2.add_hyperparameter(name = 'continuity_option', argument_type=ArgumentType.VALUE, data = 'ablation')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 49
- 0
new_tests/build_DeepLog_pipeline.py View File

@@ -0,0 +1,49 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.deeplog')

step_2 = PrimitiveStep(primitive=primitive_2)
#step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# # Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 50
- 0
new_tests/build_DiscreteCosineTransform.py View File

@@ -0,0 +1,50 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')


# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: Column Parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)


# Step 2: Discrete Cosine Transform
primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.discrete_cosine_transform')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)


# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 42
- 0
new_tests/build_DuplicationValidation_pipline.py View File

@@ -0,0 +1,42 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)


# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)


# Step 2: DuplicationValidation
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.duplication_validation'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name = 'keep_option', argument_type=ArgumentType.VALUE, data = 'average') # Or: 'first'
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 48
- 0
new_tests/build_FastFourierTransform.py View File

@@ -0,0 +1,48 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')


# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: Column Parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: Fast Fourier Transform
primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.fast_fourier_transform')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)


# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 68
- 0
new_tests/build_HBOS_pipline.py View File

@@ -0,0 +1,68 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: HBOS
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_hbos'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')

step_5.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
# step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')

step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 71
- 0
new_tests/build_HBOS_score_pipline.py View File

@@ -0,0 +1,71 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: HBOS
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_hbos'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')

step_5.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_5.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True)
# step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')

step_5.add_output('produce_score')
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')
# pipeline_description.add_output(name='output score', data_reference='steps.5.produce_score')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 46
- 0
new_tests/build_HPFilter_pipline.py View File

@@ -0,0 +1,46 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)


# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)


# Step 2: HPFilter
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.hp_filter'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')

step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = [2,3,6])

step_2.add_hyperparameter(name = 'use_semantic_types', argument_type=ArgumentType.VALUE, data = True)
step_2.add_hyperparameter(name = 'return_result', argument_type=ArgumentType.VALUE, data = 'append')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 76
- 0
new_tests/build_HoltSmoothing_pipline.py View File

@@ -0,0 +1,76 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: holt smoothing
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.holt_smoothing'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_5.add_hyperparameter(name="exclude_columns", argument_type=ArgumentType.VALUE, data = (2, 3))
step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True)
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Step 6: isolation forest
#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm'))
#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets)
#step_6.add_output('produce')
#pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 76
- 0
new_tests/build_HoltWintersExponentialSmoothing_pipline.py View File

@@ -0,0 +1,76 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: holt winters exponential smoothing
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.holt_winters_exponential_smoothing'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_5.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (2, 3))
step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True)
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Step 6: isolation forest
#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm'))
#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets)
#step_6.add_output('produce')
#pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 59
- 0
new_tests/build_IsolationForest_pipline.py View File

@@ -0,0 +1,59 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# # Step 3: test primitive
primitive_3 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_iforest')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
# step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
# step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True)
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce_score')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce_score')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 71
- 0
new_tests/build_KDiscord_pipeline.py View File

@@ -0,0 +1,71 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import numpy as np

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# # Step 3: Standardization
primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(1,2,3,4,5,))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# # Step 4: test primitive
primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.KDiscordODetector')
step_4 = PrimitiveStep(primitive=primitive_4)
step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_4.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=10)
# step_4.add_hyperparameter(name='weights', argument_type=ArgumentType.VALUE, data=weights_ndarray)
step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False)
# step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) # There is sth wrong with multi-dimensional
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_4.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True)
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_output('produce')
step_4.add_output('produce_score')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 51
- 0
new_tests/build_KNN_pipline.py View File

@@ -0,0 +1,51 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_knn')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 51
- 0
new_tests/build_LODA_pipline.py View File

@@ -0,0 +1,51 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_loda')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 51
- 0
new_tests/build_LOF_pipline.py View File

@@ -0,0 +1,51 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_lof')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 70
- 0
new_tests/build_LSTMOD_pipline.py View File

@@ -0,0 +1,70 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import numpy as np

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# # Step 2: Standardization
primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(1,2,3,4,5,))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# # Step 3: test primitive
primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.LSTMODetector')
step_4 = PrimitiveStep(primitive=primitive_4)
step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_4.add_hyperparameter(name='diff_group_method', argument_type=ArgumentType.VALUE, data='average')
step_4.add_hyperparameter(name='feature_dim', argument_type=ArgumentType.VALUE, data=5)
step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False)
# step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) # There is sth wrong with multi-dimensional
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_4.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True)
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 49
- 0
new_tests/build_MatrixProfile_pipeline.py View File

@@ -0,0 +1,49 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.matrix_profile')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=3) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# # Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 77
- 0
new_tests/build_MeanAverageTransform_pipline.py View File

@@ -0,0 +1,77 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)


# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: mean average transform
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.moving_average_transform'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_5.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (2, 3))
step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True)
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Step 6: isolation forest
#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm'))
#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets)
#step_6.add_output('produce')
#pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 50
- 0
new_tests/build_NonNegativeMatrixFactorization.py View File

@@ -0,0 +1,50 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')


# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: Column Parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: Non Negative Matrix Factorization
primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.non_negative_matrix_factorization')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_hyperparameter(name='rank', argument_type=ArgumentType.VALUE, data=5)
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)


# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 51
- 0
new_tests/build_OCSVM_pipline.py View File

@@ -0,0 +1,51 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ocsvm')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 71
- 0
new_tests/build_PCAODetect_pipeline.py View File

@@ -0,0 +1,71 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import numpy as np

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# # Step 3: Standardization
primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(1,2,3,4,5,))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# # Step 4: test primitive
primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.PCAODetector')
step_4 = PrimitiveStep(primitive=primitive_4)
step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_4.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=10)
# step_4.add_hyperparameter(name='weights', argument_type=ArgumentType.VALUE, data=weights_ndarray)
step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False)
# step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) # There is sth wrong with multi-dimensional
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_4.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True)
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_output('produce')
step_4.add_output('produce_score')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 49
- 0
new_tests/build_PowerTransform_pipline.py View File

@@ -0,0 +1,49 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.power_transformer')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 51
- 0
new_tests/build_PyodCOF.py View File

@@ -0,0 +1,51 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_cof')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 49
- 0
new_tests/build_QuantileTransform_pipline.py View File

@@ -0,0 +1,49 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.quantile_transformer')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 54
- 0
new_tests/build_RuleBasedFilter_pipline.py View File

@@ -0,0 +1,54 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)


step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.reinforcement.rule_filter'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')

step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2, 4,))
step_3.add_hyperparameter(name='rule', argument_type=ArgumentType.VALUE, data='#4# % 2 == 0 and #2# <= 0.3')

step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
pipeline_description.add_step(step_3)




# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 49
- 0
new_tests/build_SOD_pipeline.py View File

@@ -0,0 +1,49 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_sod')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# # Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 76
- 0
new_tests/build_SimpleExponentialSmoothing_pipline.py View File

@@ -0,0 +1,76 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: simple exponential smoothing
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.simple_exponential_smoothing'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_5.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (1,))
step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True)
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Step 6: isolation forest
#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm'))
#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets)
#step_6.add_output('produce')
#pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 49
- 0
new_tests/build_Standardize_pipline.py View File

@@ -0,0 +1,49 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 44
- 0
new_tests/build_TRMF_pipline.py View File

@@ -0,0 +1,44 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)


# Step 2: TRMF
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.trmf'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')

step_2.add_hyperparameter(name = 'lags', argument_type=ArgumentType.VALUE, data = [1,2,10,100])
# step_2.add_hyperparameter(name = 'K', argument_type=ArgumentType.VALUE, data = 3)
# step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = (2, 3, 4, 5, 6))

pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 48
- 0
new_tests/build_Telemanom.py View File

@@ -0,0 +1,48 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')


# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: Column Parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: Fast Fourier Transform
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.telemanom')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)


# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 86
- 0
new_tests/build_TimeIntervalTransform_pipeline.py View File

@@ -0,0 +1,86 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: dataframe transformation
# primitive_1 = index.get_primitive('d3m.primitives.data_transformation.SKPowerTransformer')
# primitive_1 = index.get_primitive('d3m.primitives.data_transformation.SKStandardization')
# primitive_1 = index.get_primitive('d3m.primitives.data_transformation.SKQuantileTransformer')

#Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

primitive_2 = index.get_primitive('d3m.primitives.tods.data_processing.time_interval_transform')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name="time_interval", argument_type=ArgumentType.VALUE, data = '5T')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)
#
# # Step 2: column_parser
# step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
# step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
# step_2.add_output('produce')
# pipeline_description.add_step(step_2)
#
#
# # Step 3: extract_columns_by_semantic_types(attributes)
# step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
# step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
# step_3.add_output('produce')
# step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
# data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
# pipeline_description.add_step(step_3)
#
# # Step 4: extract_columns_by_semantic_types(targets)
# step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
# step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
# step_4.add_output('produce')
# step_4.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
# data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
# pipeline_description.add_step(step_4)
#
# attributes = 'steps.3.produce'
# targets = 'steps.4.produce'
#
# # Step 5: imputer
# step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_cleaning.imputer.SKlearn'))
# step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
# step_5.add_output('produce')
# pipeline_description.add_step(step_5)
#
# # Step 6: random_forest
# step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.regression.random_forest.SKlearn'))
# step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
# step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets)
# step_6.add_output('produce')
# pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.1.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 44
- 0
new_tests/build_TruncatedSVD_pipline.py View File

@@ -0,0 +1,44 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)


# Step 2: TruncatedSVD
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.truncated_svd'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name = 'n_components', argument_type=ArgumentType.VALUE, data = 3)
step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = (2, 3, 4, 5, 6))
step_2.add_hyperparameter(name = 'return_result', argument_type=ArgumentType.VALUE, data = 'append')
step_2.add_hyperparameter(name = 'use_semantic_types', argument_type=ArgumentType.VALUE, data = True)
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 67
- 0
new_tests/build_VariationalAutoEncoder.py View File

@@ -0,0 +1,67 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: variatinal auto encoder
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_vae'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_5.add_output('produce')
pipeline_description.add_step(step_5)


# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 64
- 0
new_tests/build_WaveletTransform_pipline.py View File

@@ -0,0 +1,64 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test WaveletTransform
primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.wavelet_transform')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='wavelet', argument_type=ArgumentType.VALUE, data='db8')
step_2.add_hyperparameter(name='level', argument_type=ArgumentType.VALUE, data=2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 2: test inverse WaveletTransform
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.wavelet_transform')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='wavelet', argument_type=ArgumentType.VALUE, data='db8')
step_3.add_hyperparameter(name='level', argument_type=ArgumentType.VALUE, data=2)
step_3.add_hyperparameter(name='inverse', argument_type=ArgumentType.VALUE, data=1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False)
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)


# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 50
- 0
new_tests/build_test_detection_algorithm_PyodMoGaal.py View File

@@ -0,0 +1,50 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_mogaal')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 50
- 0
new_tests/build_test_detection_algorithm_PyodSoGaal.py View File

@@ -0,0 +1,50 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_sogaal')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 61
- 0
new_tests/build_test_feature_analysis_spectral_residual_transform_pipeline.py View File

@@ -0,0 +1,61 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.spectral_residual_transform')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='avg_filter_dimension', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_abs_energy.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_abs_energy')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_abs_sum.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_abs_sum')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_gmean.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_g_mean')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_hmean.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_h_mean')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_kurtosis.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_kurtosis')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_maximum.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_maximum')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_mean.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_mean_abs.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean_abs')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_mean_abs_temporal_derivative.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean_abs_temporal_derivative')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_mean_temporal_derivative.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean_temporal_derivative')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_median.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_median')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 63
- 0
new_tests/build_test_feature_analysis_statistical_median_absolute_deviation.py View File

@@ -0,0 +1,63 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)


# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_median_abs_deviation')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_minimum.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_minimum')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_skew.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_skew')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_std.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_std')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_var.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_var')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_variation.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_variation')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_vec_sum.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_vec_sum')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_willison_amplitude.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_willison_amplitude')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 62
- 0
new_tests/build_test_feature_analysis_statistical_zero_crossing.py View File

@@ -0,0 +1,62 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)


# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_zero_crossing')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(9,10)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 61
- 0
new_tests/build_test_time_series_seasonality_trend_decomposition.py View File

@@ -0,0 +1,61 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.decomposition.time_series_seasonality_trend_decomposition')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='period', argument_type=ArgumentType.VALUE, data=5)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 1
- 1
test.sh View File

@@ -1,6 +1,6 @@
#!/bin/bash

test_scripts=$(ls primitive_tests)
test_scripts=$(ls new_tests)
#test_scripts=$(ls primitive_tests | grep -v -f tested_file.txt)

for file in $test_scripts


+ 398
- 0
tods/data_processing/ColumnParser.py View File

@@ -0,0 +1,398 @@
import hashlib
import os
import typing

import numpy # type: ignore

from d3m import container, utils as d3m_utils
from d3m.base import utils as base_utils
from d3m.metadata import base as metadata_base, hyperparams
from d3m.primitive_interfaces import base, transformer

import common_primitives
from common_primitives import utils

__all__ = ('ColumnParserPrimitive',)

Inputs = container.DataFrame
Outputs = container.DataFrame


class Hyperparams(hyperparams.Hyperparams):
parse_semantic_types = hyperparams.Set(
elements=hyperparams.Enumeration(
values=[
'http://schema.org/Boolean', 'https://metadata.datadrivendiscovery.org/types/CategoricalData',
'http://schema.org/Integer', 'http://schema.org/Float',
'https://metadata.datadrivendiscovery.org/types/FloatVector', 'http://schema.org/DateTime',
],
# Default is ignored.
# TODO: Remove default. See: https://gitlab.com/datadrivendiscovery/d3m/issues/141
default='http://schema.org/Boolean',
),
default=(
'http://schema.org/Boolean', 'https://metadata.datadrivendiscovery.org/types/CategoricalData',
'http://schema.org/Integer', 'http://schema.org/Float',
'https://metadata.datadrivendiscovery.org/types/FloatVector', 'http://schema.org/DateTime',
),
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="A set of semantic types to parse. One can provide a subset of supported semantic types to limit what the primitive parses.",
)
use_columns = hyperparams.Set(
elements=hyperparams.Hyperparameter[int](-1),
default=(),
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
)
exclude_columns = hyperparams.Set(
elements=hyperparams.Hyperparameter[int](-1),
default=(),
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
)
return_result = hyperparams.Enumeration(
values=['append', 'replace', 'new'],
default='replace',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned?",
)
add_index_columns = hyperparams.UniformBool(
default=True,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
)
parse_categorical_target_columns = hyperparams.UniformBool(
default=False,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Should it parse also categorical target columns?",
)
replace_index_columns = hyperparams.UniformBool(
default=True,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Replace primary index columns even if otherwise appending columns. Applicable only if \"return_result\" is set to \"append\".",
)
fuzzy_time_parsing = hyperparams.UniformBool(
default=True,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Use fuzzy time parsing.",
)


class ColumnParserPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
A primitive which parses strings into their parsed values.

It goes over all columns (by default, controlled by ``use_columns``, ``exclude_columns``)
and checks those with structural type ``str`` if they have a semantic type suggesting
that they are a boolean value, categorical, integer, float, or time (by default,
controlled by ``parse_semantic_types``). Categorical values are converted with
hash encoding.

What is returned is controlled by ``return_result`` and ``add_index_columns``.
"""

metadata = metadata_base.PrimitiveMetadata(
{
'id': 'd510cb7a-1782-4f51-b44c-58f0236e47c7',
'version': '0.6.0',
'name': "Parses strings into their types",
'python_path': 'd3m.primitives.tods.data_processing.column_parser',
'source': {
'name': common_primitives.__author__,
'contact': 'mailto:mitar.commonprimitives@tnode.com',
'uris': [
'https://gitlab.com/datadrivendiscovery/common-primitives/blob/master/common_primitives/column_parser.py',
'https://gitlab.com/datadrivendiscovery/common-primitives.git',
],
},
'installation': [{
'type': metadata_base.PrimitiveInstallationType.PIP,
'package_uri': 'git+https://gitlab.com/datadrivendiscovery/common-primitives.git@{git_commit}#egg=common_primitives'.format(
git_commit=d3m_utils.current_git_commit(os.path.dirname(__file__)),
),
}],
'algorithm_types': [
metadata_base.PrimitiveAlgorithmType.DATA_CONVERSION,
],
'primitive_family': metadata_base.PrimitiveFamily.DATA_TRANSFORMATION,
},
)

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
columns_to_use, output_columns = self._produce_columns(inputs)

if self.hyperparams['replace_index_columns'] and self.hyperparams['return_result'] == 'append':
assert len(columns_to_use) == len(output_columns)

index_columns = inputs.metadata.get_index_columns()

index_columns_to_use = []
other_columns_to_use = []
index_output_columns = []
other_output_columns = []
for column_to_use, output_column in zip(columns_to_use, output_columns):
if column_to_use in index_columns:
index_columns_to_use.append(column_to_use)
index_output_columns.append(output_column)
else:
other_columns_to_use.append(column_to_use)
other_output_columns.append(output_column)

outputs = base_utils.combine_columns(inputs, index_columns_to_use, index_output_columns, return_result='replace', add_index_columns=self.hyperparams['add_index_columns'])
outputs = base_utils.combine_columns(outputs, other_columns_to_use, other_output_columns, return_result='append', add_index_columns=self.hyperparams['add_index_columns'])
else:
outputs = base_utils.combine_columns(inputs, columns_to_use, output_columns, return_result=self.hyperparams['return_result'], add_index_columns=self.hyperparams['add_index_columns'])

return base.CallResult(outputs)

def _can_use_column(self, inputs_metadata: metadata_base.DataMetadata, column_index: int) -> bool:
column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index))

# We produce only on columns which have not yet been parsed (are strings).
if column_metadata['structural_type'] != str:
return False

semantic_types = column_metadata.get('semantic_types', [])

for semantic_type in self.hyperparams['parse_semantic_types']:
if semantic_type not in semantic_types:
continue

if semantic_type == 'https://metadata.datadrivendiscovery.org/types/CategoricalData':
# Skip parsing if a column is categorical, but also a target column.
if not self.hyperparams['parse_categorical_target_columns'] and 'https://metadata.datadrivendiscovery.org/types/Target' in semantic_types:
continue

return True

return False

def _produce_columns(self, inputs: Inputs) -> typing.Tuple[typing.List[int], typing.List[Outputs]]:
# The logic of parsing values tries to mirror also the logic of detecting
# values in "SimpleProfilerPrimitive". One should keep them in sync.

columns_to_use = self._get_columns(inputs.metadata)

# We check against this list again, because there might be multiple matching semantic types
# (which is not really valid).
parse_semantic_types = self.hyperparams['parse_semantic_types']

output_columns = []

for column_index in columns_to_use:
column_metadata = inputs.metadata.query((metadata_base.ALL_ELEMENTS, column_index))
semantic_types = column_metadata.get('semantic_types', [])
if column_metadata['structural_type'] == str:
if 'http://schema.org/Boolean' in parse_semantic_types and 'http://schema.org/Boolean' in semantic_types:
output_columns.append(self._parse_boolean_data(inputs, column_index))

elif 'https://metadata.datadrivendiscovery.org/types/CategoricalData' in parse_semantic_types and \
'https://metadata.datadrivendiscovery.org/types/CategoricalData' in semantic_types and \
(self.hyperparams['parse_categorical_target_columns'] or 'https://metadata.datadrivendiscovery.org/types/Target' not in semantic_types):
output_columns.append(self._parse_categorical_data(inputs, column_index))

elif 'http://schema.org/Integer' in parse_semantic_types and 'http://schema.org/Integer' in semantic_types:
# For primary key we know all values have to exist so we can assume they can always be represented as integers.
if 'https://metadata.datadrivendiscovery.org/types/PrimaryKey' in semantic_types:
integer_required = True
else:
integer_required = False

output_columns.append(self._parse_integer(inputs, column_index, integer_required))

elif 'http://schema.org/Float' in parse_semantic_types and 'http://schema.org/Float' in semantic_types:
output_columns.append(self._parse_float_data(inputs, column_index))

elif 'https://metadata.datadrivendiscovery.org/types/FloatVector' in parse_semantic_types and 'https://metadata.datadrivendiscovery.org/types/FloatVector' in semantic_types:
output_columns.append(self._parse_float_vector_data(inputs, column_index))

elif 'http://schema.org/DateTime' in parse_semantic_types and 'http://schema.org/DateTime' in semantic_types:
output_columns.append(self._parse_time_data(inputs, column_index, self.hyperparams['fuzzy_time_parsing']))

else:
assert False, column_index

assert len(output_columns) == len(columns_to_use)

return columns_to_use, output_columns

def _produce_columns_metadata(self, inputs_metadata: metadata_base.DataMetadata) -> typing.Tuple[typing.List[int], typing.List[metadata_base.DataMetadata]]:
columns_to_use = self._get_columns(inputs_metadata)

# We check against this list again, because there might be multiple matching semantic types
# (which is not really valid).
parse_semantic_types = self.hyperparams['parse_semantic_types']

output_columns = []

for column_index in columns_to_use:
column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index))
semantic_types = column_metadata.get('semantic_types', [])
if column_metadata['structural_type'] == str:
if 'http://schema.org/Boolean' in parse_semantic_types and 'http://schema.org/Boolean' in semantic_types:
output_columns.append(self._parse_boolean_metadata(inputs_metadata, column_index))

elif 'https://metadata.datadrivendiscovery.org/types/CategoricalData' in parse_semantic_types and \
'https://metadata.datadrivendiscovery.org/types/CategoricalData' in semantic_types and \
(self.hyperparams['parse_categorical_target_columns'] or 'https://metadata.datadrivendiscovery.org/types/Target' not in semantic_types):
output_columns.append(self._parse_categorical_metadata(inputs_metadata, column_index))

elif 'http://schema.org/Integer' in parse_semantic_types and 'http://schema.org/Integer' in semantic_types:
output_columns.append(self._parse_integer_metadata(inputs_metadata, column_index))

elif 'http://schema.org/Float' in parse_semantic_types and 'http://schema.org/Float' in semantic_types:
output_columns.append(self._parse_float_metadata(inputs_metadata, column_index))

elif 'https://metadata.datadrivendiscovery.org/types/FloatVector' in parse_semantic_types and 'https://metadata.datadrivendiscovery.org/types/FloatVector' in semantic_types:
output_columns.append(self._parse_float_vector_metadata(inputs_metadata, column_index))

elif 'http://schema.org/DateTime' in parse_semantic_types and 'http://schema.org/DateTime' in semantic_types:
output_columns.append(self._parse_time_metadata(inputs_metadata, column_index))

else:
assert False, column_index

assert len(output_columns) == len(columns_to_use)

return columns_to_use, output_columns

def _get_columns(self, inputs_metadata: metadata_base.DataMetadata) -> typing.List[int]:
def can_use_column(column_index: int) -> bool:
return self._can_use_column(inputs_metadata, column_index)

columns_to_use, columns_not_to_use = base_utils.get_columns_to_use(inputs_metadata, self.hyperparams['use_columns'], self.hyperparams['exclude_columns'], can_use_column)

# We are OK if no columns ended up being parsed.
# "base_utils.combine_columns" will throw an error if it cannot work with this.

if self.hyperparams['use_columns'] and columns_not_to_use:
self.logger.warning("Not all specified columns can parsed. Skipping columns: %(columns)s", {
'columns': columns_not_to_use,
})

return columns_to_use

@classmethod
def _parse_boolean_data(cls, inputs: Inputs, column_index: metadata_base.SimpleSelectorSegment) -> Outputs:
return cls._parse_categorical_data(inputs, column_index)

@classmethod
def _parse_boolean_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_index: metadata_base.SimpleSelectorSegment) -> metadata_base.DataMetadata:
return cls._parse_categorical_metadata(inputs_metadata, column_index)

@classmethod
def _parse_categorical_data(cls, inputs: Inputs, column_index: metadata_base.SimpleSelectorSegment) -> Outputs:
values_map: typing.Dict[str, int] = {}
for value in inputs.iloc[:, column_index]:
value = value.strip()
if value not in values_map:
value_hash = hashlib.sha256(value.encode('utf8'))
values_map[value] = int.from_bytes(value_hash.digest()[0:8], byteorder='little') ^ int.from_bytes(value_hash.digest()[8:16], byteorder='little') ^ \
int.from_bytes(value_hash.digest()[16:24], byteorder='little') ^ int.from_bytes(value_hash.digest()[24:32], byteorder='little')

outputs = container.DataFrame({inputs.columns[column_index]: [values_map[value.strip()] for value in inputs.iloc[:, column_index]]}, generate_metadata=False)
outputs.metadata = cls._parse_categorical_metadata(inputs.metadata, column_index)

return outputs

@classmethod
def _parse_categorical_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_index: metadata_base.SimpleSelectorSegment) -> metadata_base.DataMetadata:
outputs_metadata = inputs_metadata.select_columns([column_index])
return outputs_metadata.update_column(0, {'structural_type': int})

@classmethod
def _str_to_int(cls, value: str) -> typing.Union[float, int]:
try:
return int(value.strip())
except ValueError:
try:
# Maybe it is an int represented as a float. Let's try this. This can get rid of non-integer
# part of the value, but the integer was requested through a semantic type, so this is probably OK.
return int(float(value.strip()))
except ValueError:
# No luck, use NaN to represent a missing value.
return float('nan')

@classmethod
def _parse_integer(cls, inputs: Inputs, column_index: metadata_base.SimpleSelectorSegment,
integer_required: bool) -> container.DataFrame:
outputs = container.DataFrame({inputs.columns[column_index]: [cls._str_to_int(value) for value in inputs.iloc[:, column_index]]}, generate_metadata=False)

if outputs.dtypes.iloc[0].kind == 'f':
structural_type: type = float
elif outputs.dtypes.iloc[0].kind in ['i', 'u']:
structural_type = int
else:
assert False, outputs.dtypes.iloc[0]

if structural_type is float and integer_required:
raise ValueError("Not all values in a column can be parsed into integers, but only integers were expected.")

outputs.metadata = inputs.metadata.select_columns([column_index])
outputs.metadata = outputs.metadata.update_column(0, {'structural_type': structural_type})

return outputs

@classmethod
def _parse_integer_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_index: metadata_base.SimpleSelectorSegment) -> metadata_base.DataMetadata:
outputs_metadata = inputs_metadata.select_columns([column_index])
# Without data we assume we can parse everything into integers. This might not be true and
# we might end up parsing into floats if we have to represent missing (or invalid) values.
return outputs_metadata.update_column(0, {'structural_type': int})

@classmethod
def _str_to_float(cls, value: str) -> float:
try:
return float(value.strip())
except ValueError:
return float('nan')

@classmethod
def _parse_float_data(cls, inputs: Inputs, column_index: metadata_base.SimpleSelectorSegment) -> Outputs:
outputs = container.DataFrame({inputs.columns[column_index]: [cls._str_to_float(value) for value in inputs.iloc[:, column_index]]}, generate_metadata=False)
outputs.metadata = cls._parse_float_metadata(inputs.metadata, column_index)

return outputs

@classmethod
def _parse_float_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_index: metadata_base.SimpleSelectorSegment) -> metadata_base.DataMetadata:
outputs_metadata = inputs_metadata.select_columns([column_index])
return outputs_metadata.update_column(0, {'structural_type': float})

@classmethod
def _parse_float_vector_data(cls, inputs: Inputs, column_index: metadata_base.SimpleSelectorSegment) -> Outputs:
# We are pretty strict here because we are assuming this was generated programmatically.
outputs = container.DataFrame(
{
inputs.columns[column_index]: [
container.ndarray([cls._str_to_float(value) for value in values.split(',')])
for values in inputs.iloc[:, column_index]
],
},
generate_metadata=False,
)
outputs.metadata = cls._parse_float_metadata(inputs.metadata, column_index)
# We have to automatically generate metadata to set ndarray dimension(s).
outputs.metadata = outputs.metadata.generate(outputs)

return outputs

@classmethod
def _parse_float_vector_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_index: metadata_base.SimpleSelectorSegment) -> metadata_base.DataMetadata:
outputs_metadata = inputs_metadata.select_columns([column_index])
# We cannot know the dimension of the ndarray without data.
outputs_metadata = outputs_metadata.update_column(0, {'structural_type': container.ndarray})
outputs_metadata = outputs_metadata.update((metadata_base.ALL_ELEMENTS, 0, metadata_base.ALL_ELEMENTS), {'structural_type': numpy.float64})
return outputs_metadata

@classmethod
def _parse_time_data(cls, inputs: Inputs, column_index: metadata_base.SimpleSelectorSegment, fuzzy: bool) -> Outputs:
outputs = container.DataFrame({inputs.columns[column_index]: [utils.parse_datetime_to_float(value, fuzzy=fuzzy) for value in inputs.iloc[:, column_index]]}, generate_metadata=False)
outputs.metadata = cls._parse_time_metadata(inputs.metadata, column_index)

return outputs

@classmethod
def _parse_time_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_index: metadata_base.SimpleSelectorSegment) -> metadata_base.DataMetadata:
outputs_metadata = inputs_metadata.select_columns([column_index])
return outputs_metadata.update_column(0, {'structural_type': float})

+ 261
- 0
tods/data_processing/ContructPredictions.py View File

@@ -0,0 +1,261 @@
import os
import typing

from d3m import container, utils as d3m_utils
from d3m.metadata import base as metadata_base, hyperparams
from d3m.primitive_interfaces import base, transformer
from d3m.contrib.primitives import compute_scores

import common_primitives

__all__ = ('ConstructPredictionsPrimitive',)

Inputs = container.DataFrame
Outputs = container.DataFrame


class Hyperparams(hyperparams.Hyperparams):
use_columns = hyperparams.Set(
elements=hyperparams.Hyperparameter[int](-1),
default=(),
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="A set of column indices to force primitive to operate on. If metadata reconstruction happens, this is used for reference columns."
" If any specified column is not a primary index or a predicted target, it is skipped.",
)
exclude_columns = hyperparams.Set(
elements=hyperparams.Hyperparameter[int](-1),
default=(),
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="A set of column indices to not operate on. If metadata reconstruction happens, this is used for reference columns. Applicable only if \"use_columns\" is not provided.",
)


class ConstructPredictionsPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
A primitive which takes as input a DataFrame and outputs a DataFrame in Lincoln Labs predictions
format: first column is a d3mIndex column (and other primary index columns, e.g., for object detection
problem), and then predicted targets, each in its column, followed by optional confidence column(s).

It supports both input columns annotated with semantic types (``https://metadata.datadrivendiscovery.org/types/PrimaryKey``,
``https://metadata.datadrivendiscovery.org/types/PrimaryMultiKey``, ``https://metadata.datadrivendiscovery.org/types/PredictedTarget``,
``https://metadata.datadrivendiscovery.org/types/Confidence``), or trying to reconstruct metadata.
This is why the primitive takes also additional input of a reference DataFrame which should
have metadata to help reconstruct missing metadata. If metadata is missing, the primitive
assumes that all ``inputs`` columns are predicted targets, without confidence column(s).
"""

metadata = metadata_base.PrimitiveMetadata(
{
'id': '8d38b340-f83f-4877-baaa-162f8e551736',
'version': '0.3.0',
'name': "Construct pipeline predictions output",
'python_path': 'd3m.primitives.tods.data_processing.construct_predictions',
'source': {
'name': common_primitives.__author__,
'contact': 'mailto:mitar.commonprimitives@tnode.com',
'uris': [
'https://gitlab.com/datadrivendiscovery/common-primitives/blob/master/common_primitives/construct_predictions.py',
'https://gitlab.com/datadrivendiscovery/common-primitives.git',
],
},
'installation': [{
'type': metadata_base.PrimitiveInstallationType.PIP,
'package_uri': 'git+https://gitlab.com/datadrivendiscovery/common-primitives.git@{git_commit}#egg=common_primitives'.format(
git_commit=d3m_utils.current_git_commit(os.path.dirname(__file__)),
),
}],
'algorithm_types': [
metadata_base.PrimitiveAlgorithmType.DATA_CONVERSION,
],
'primitive_family': metadata_base.PrimitiveFamily.DATA_TRANSFORMATION,
},
)

def produce(self, *, inputs: Inputs, reference: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: # type: ignore
index_columns = inputs.metadata.get_index_columns()
target_columns = inputs.metadata.list_columns_with_semantic_types(('https://metadata.datadrivendiscovery.org/types/PredictedTarget',))

# Target columns cannot be also index columns. This should not really happen,
# but it could happen with buggy primitives.
target_columns = [target_column for target_column in target_columns if target_column not in index_columns]

if index_columns and target_columns:
outputs = self._produce_using_semantic_types(inputs, index_columns, target_columns)
else:
outputs = self._produce_reconstruct(inputs, reference, index_columns, target_columns)

outputs = compute_scores.ComputeScoresPrimitive._encode_columns(outputs)

# Generally we do not care about column names in DataFrame itself (but use names of columns from metadata),
# but in this case setting column names makes it easier to assure that "to_csv" call produces correct output.
# See: https://gitlab.com/datadrivendiscovery/d3m/issues/147
column_names = []
for column_index in range(len(outputs.columns)):
column_names.append(outputs.metadata.query_column(column_index).get('name', outputs.columns[column_index]))
outputs.columns = column_names

return base.CallResult(outputs)

def _filter_index_columns(self, inputs_metadata: metadata_base.DataMetadata, index_columns: typing.Sequence[int]) -> typing.Sequence[int]:
if self.hyperparams['use_columns']:
index_columns = [index_column_index for index_column_index in index_columns if index_column_index in self.hyperparams['use_columns']]
if not index_columns:
raise ValueError("No index columns listed in \"use_columns\" hyper-parameter, but index columns are required.")

else:
index_columns = [index_column_index for index_column_index in index_columns if index_column_index not in self.hyperparams['exclude_columns']]
if not index_columns:
raise ValueError("All index columns listed in \"exclude_columns\" hyper-parameter, but index columns are required.")

names = []
for index_column in index_columns:
index_metadata = inputs_metadata.query_column(index_column)
# We do not care about empty strings for names either.
if index_metadata.get('name', None):
names.append(index_metadata['name'])

if 'd3mIndex' not in names:
raise ValueError("\"d3mIndex\" index column is missing.")

names_set = set(names)
if len(names) != len(names_set):
duplicate_names = names
for name in names_set:
# Removes just the first occurrence.
duplicate_names.remove(name)

self.logger.warning("Duplicate names for index columns: %(duplicate_names)s", {
'duplicate_names': list(set(duplicate_names)),
})

return index_columns

def _get_columns(self, inputs_metadata: metadata_base.DataMetadata, index_columns: typing.Sequence[int], target_columns: typing.Sequence[int]) -> typing.List[int]:
assert index_columns
assert target_columns

index_columns = self._filter_index_columns(inputs_metadata, index_columns)

if self.hyperparams['use_columns']:
target_columns = [target_column_index for target_column_index in target_columns if target_column_index in self.hyperparams['use_columns']]
if not target_columns:
raise ValueError("No target columns listed in \"use_columns\" hyper-parameter, but target columns are required.")

else:
target_columns = [target_column_index for target_column_index in target_columns if target_column_index not in self.hyperparams['exclude_columns']]
if not target_columns:
raise ValueError("All target columns listed in \"exclude_columns\" hyper-parameter, but target columns are required.")

assert index_columns
assert target_columns

return list(index_columns) + list(target_columns)

def _get_confidence_columns(self, inputs_metadata: metadata_base.DataMetadata) -> typing.List[int]:
confidence_columns = inputs_metadata.list_columns_with_semantic_types(('https://metadata.datadrivendiscovery.org/types/Confidence',))

if self.hyperparams['use_columns']:
confidence_columns = [confidence_column_index for confidence_column_index in confidence_columns if confidence_column_index in self.hyperparams['use_columns']]
else:
confidence_columns = [confidence_column_index for confidence_column_index in confidence_columns if confidence_column_index not in self.hyperparams['exclude_columns']]

return confidence_columns

def _produce_using_semantic_types(self, inputs: Inputs, index_columns: typing.Sequence[int],
target_columns: typing.Sequence[int]) -> Outputs:
confidence_columns = self._get_confidence_columns(inputs.metadata)

output_columns = self._get_columns(inputs.metadata, index_columns, target_columns) + confidence_columns

# "get_index_columns" makes sure that "d3mIndex" is always listed first.
# And "select_columns" selects columns in order listed, which then
# always puts "d3mIndex" first.
outputs = inputs.select_columns(output_columns)

if confidence_columns:
outputs.metadata = self._update_confidence_columns(outputs.metadata, confidence_columns)

return outputs

def _update_confidence_columns(self, inputs_metadata: metadata_base.DataMetadata, confidence_columns: typing.Sequence[int]) -> metadata_base.DataMetadata:
output_columns_length = inputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']

outputs_metadata = inputs_metadata

# All confidence columns have to be named "confidence".
for column_index in range(output_columns_length - len(confidence_columns), output_columns_length):
outputs_metadata = outputs_metadata.update((metadata_base.ALL_ELEMENTS, column_index), {
'name': 'confidence',
})

return outputs_metadata

def _produce_reconstruct(self, inputs: Inputs, reference: Inputs, index_columns: typing.Sequence[int], target_columns: typing.Sequence[int]) -> Outputs:
if not index_columns:
reference_index_columns = reference.metadata.get_index_columns()

if not reference_index_columns:
raise ValueError("Cannot find an index column in reference data, but index column is required.")

filtered_index_columns = self._filter_index_columns(reference.metadata, reference_index_columns)
index = reference.select_columns(filtered_index_columns)
else:
filtered_index_columns = self._filter_index_columns(inputs.metadata, index_columns)
index = inputs.select_columns(filtered_index_columns)

if not target_columns:
if index_columns:
raise ValueError("No target columns in input data, but index column(s) present.")

# We assume all inputs are targets.
targets = inputs

# We make sure at least basic metadata is generated correctly, so we regenerate metadata.
targets.metadata = targets.metadata.generate(targets)

# We set target column names from the reference. We set semantic types.
targets.metadata = self._update_targets_metadata(targets.metadata, self._get_target_names(reference.metadata))

else:
targets = inputs.select_columns(target_columns)

return index.append_columns(targets)

def multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, reference: Inputs, timeout: float = None, iterations: int = None) -> base.MultiCallResult: # type: ignore
return self._multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs, reference=reference)

def fit_multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, reference: Inputs, timeout: float = None, iterations: int = None) -> base.MultiCallResult: # type: ignore
return self._fit_multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs, reference=reference)

def _get_target_names(self, metadata: metadata_base.DataMetadata) -> typing.List[typing.Union[str, None]]:
target_names = []

for column_index in metadata.list_columns_with_semantic_types(('https://metadata.datadrivendiscovery.org/types/TrueTarget',)):
column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index))

target_names.append(column_metadata.get('name', None))

return target_names

def _update_targets_metadata(self, metadata: metadata_base.DataMetadata, target_names: typing.Sequence[typing.Union[str, None]]) -> metadata_base.DataMetadata:
targets_length = metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']

if targets_length != len(target_names):
raise ValueError("Not an expected number of target columns to apply names for. Expected {target_names}, provided {targets_length}.".format(
target_names=len(target_names),
targets_length=targets_length,
))

for column_index, target_name in enumerate(target_names):
metadata = metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, column_index), 'https://metadata.datadrivendiscovery.org/types/Target')
metadata = metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, column_index), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')

# We do not have it, let's skip it and hope for the best.
if target_name is None:
continue

metadata = metadata.update_column(column_index, {
'name': target_name,
})

return metadata

+ 141
- 0
tods/data_processing/ExtractColumnsBySemanticTypes.py View File

@@ -0,0 +1,141 @@
import os
import typing

from d3m import container, exceptions, utils as d3m_utils
from d3m.base import utils as base_utils
from d3m.metadata import base as metadata_base, hyperparams
from d3m.primitive_interfaces import base, transformer

import common_primitives

__all__ = ('ExtractColumnsBySemanticTypesPrimitive',)

Inputs = container.DataFrame
Outputs = container.DataFrame


class Hyperparams(hyperparams.Hyperparams):
semantic_types = hyperparams.Set(
elements=hyperparams.Hyperparameter[str](''),
default=('https://metadata.datadrivendiscovery.org/types/Attribute',),
min_size=1,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Semantic types to use to extract columns. If any of them matches, by default.",
)
match_logic = hyperparams.Enumeration(
values=['all', 'any', 'equal'],
default='any',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Should a column have all of semantic types in \"semantic_types\" to be extracted, or any of them?",
)
negate = hyperparams.UniformBool(
default=False,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Should columns which do not match semantic types in \"semantic_types\" be extracted?",
)
use_columns = hyperparams.Set(
elements=hyperparams.Hyperparameter[int](-1),
default=(),
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="A set of column indices to force primitive to operate on. If any specified column does not match any semantic type, it is skipped.",
)
exclude_columns = hyperparams.Set(
elements=hyperparams.Hyperparameter[int](-1),
default=(),
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
)
add_index_columns = hyperparams.UniformBool(
default=False,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Also include primary index columns if input data has them.",
)


class ExtractColumnsBySemanticTypesPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
A primitive which extracts columns from input data based on semantic types provided.
Columns which match any of the listed semantic types are extracted.

If you want to extract only attributes, you can use ``https://metadata.datadrivendiscovery.org/types/Attribute``
semantic type (also default).

For real targets (not suggested targets) use ``https://metadata.datadrivendiscovery.org/types/Target``.
For this to work, columns have to be are marked as targets by the TA2 in a dataset before passing the dataset
through a pipeline. Or something else has to mark them at some point in a pipeline.

It uses ``use_columns`` and ``exclude_columns`` to control which columns it considers.
"""

metadata = metadata_base.PrimitiveMetadata(
{
'id': '4503a4c6-42f7-45a1-a1d4-ed69699cf5e1',
'version': '0.4.0',
'name': "Extracts columns by semantic type",
'python_path': 'd3m.primitives.tods.data_processing.extract_columns_by_semantic_types',
'source': {
'name': common_primitives.__author__,
'contact': 'mailto:mitar.commonprimitives@tnode.com',
'uris': [
'https://gitlab.com/datadrivendiscovery/common-primitives/blob/master/common_primitives/extract_columns_semantic_types.py',
'https://gitlab.com/datadrivendiscovery/common-primitives.git',
],
},
'installation': [{
'type': metadata_base.PrimitiveInstallationType.PIP,
'package_uri': 'git+https://gitlab.com/datadrivendiscovery/common-primitives.git@{git_commit}#egg=common_primitives'.format(
git_commit=d3m_utils.current_git_commit(os.path.dirname(__file__)),
),
}],
'algorithm_types': [
metadata_base.PrimitiveAlgorithmType.ARRAY_SLICING,
],
'primitive_family': metadata_base.PrimitiveFamily.DATA_TRANSFORMATION,
},
)

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
columns_to_use = self._get_columns(inputs.metadata)

output_columns = inputs.select_columns(columns_to_use)

outputs = base_utils.combine_columns(inputs, columns_to_use, [output_columns], return_result='new', add_index_columns=self.hyperparams['add_index_columns'])

return base.CallResult(outputs)

def _can_use_column(self, inputs_metadata: metadata_base.DataMetadata, column_index: int) -> bool:
column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index))

semantic_types = column_metadata.get('semantic_types', [])

if self.hyperparams['match_logic'] == 'all':
match = all(semantic_type in semantic_types for semantic_type in self.hyperparams['semantic_types'])
elif self.hyperparams['match_logic'] == 'any':
match = any(semantic_type in semantic_types for semantic_type in self.hyperparams['semantic_types'])
elif self.hyperparams["match_logic"] == "equal":
match = set(semantic_types) == set(self.hyperparams["semantic_types"])
else:
raise exceptions.UnexpectedValueError("Unknown value of hyper-parameter \"match_logic\": {value}".format(value=self.hyperparams['match_logic']))

if self.hyperparams['negate']:
return not match
else:
return match

def _get_columns(self, inputs_metadata: metadata_base.DataMetadata) -> typing.Sequence[int]:
def can_use_column(column_index: int) -> bool:
return self._can_use_column(inputs_metadata, column_index)

columns_to_use, columns_not_to_use = base_utils.get_columns_to_use(inputs_metadata, self.hyperparams['use_columns'], self.hyperparams['exclude_columns'], can_use_column)

if not columns_to_use:
raise ValueError("Input data has no columns matching semantic types: {semantic_types}".format(
semantic_types=self.hyperparams['semantic_types'],
))

if self.hyperparams['use_columns'] and columns_not_to_use:
self.logger.warning("Not all specified columns match semantic types. Skipping columns: %(columns)s", {
'columns': columns_not_to_use,
})

return columns_to_use

+ 3
- 1
tods/resources/.entry_points.ini View File

@@ -7,7 +7,9 @@ tods.data_processing.timestamp_validation = tods.data_processing.TimeStampValida
tods.data_processing.duplication_validation = tods.data_processing.DuplicationValidation:DuplicationValidationPrimitive
tods.data_processing.continuity_validation = tods.data_processing.ContinuityValidation:ContinuityValidationPrimitive
tods.data_processing.impute_missing = tods.data_processing.SKImputer:SKImputerPrimitive

tods.data_processing.column_parser = tods.data_processing.ColumnParser:ColumnParserPrimitive
tods.data_processing.extract_columns_by_semantic_types = tods.data_processing.ExtractColumnsBySemanticTypes:ExtractColumnsBySemanticTypesPrimitive
tods.data_processing.construct_predictions = tods.data_processing.ConstructPredictions:ConstructPredictionsPrimitive

tods.timeseries_processing.transformation.axiswise_scaler = tods.timeseries_processing.SKAxiswiseScaler:SKAxiswiseScalerPrimitive
tods.timeseries_processing.transformation.standard_scaler = tods.timeseries_processing.SKStandardScaler:SKStandardScalerPrimitive


Loading…
Cancel
Save