Browse Source

merge primitive_tests from Junjie branch, modified metadata for data_processing and timeseries_processing module

master
lhenry15 4 years ago
parent
commit
4598bfad1f
100 changed files with 9192 additions and 40775 deletions
  1. +1
    -1
      datasets/anomaly/transform_yahoo.py
  2. +27
    -3
      datasets/anomaly/yahoo_sub_5/SCORE/dataset_TEST/datasetDoc.json
  3. +141
    -0
      datasets/anomaly/yahoo_sub_5/SCORE/dataset_TEST/tables/learningData.csv
  4. +1261
    -0
      datasets/anomaly/yahoo_sub_5/SCORE/problem_TEST/dataSplits.csv
  5. +10
    -10
      datasets/anomaly/yahoo_sub_5/SCORE/problem_TEST/problemDoc.json
  6. +0
    -0
      datasets/anomaly/yahoo_sub_5/SCORE/targets.csv
  7. +27
    -3
      datasets/anomaly/yahoo_sub_5/TEST/dataset_TEST/datasetDoc.json
  8. +141
    -0
      datasets/anomaly/yahoo_sub_5/TEST/dataset_TEST/tables/learningData.csv
  9. +1261
    -0
      datasets/anomaly/yahoo_sub_5/TEST/problem_TEST/dataSplits.csv
  10. +10
    -10
      datasets/anomaly/yahoo_sub_5/TEST/problem_TEST/problemDoc.json
  11. +28
    -4
      datasets/anomaly/yahoo_sub_5/TRAIN/dataset_TRAIN/datasetDoc.json
  12. +1261
    -0
      datasets/anomaly/yahoo_sub_5/TRAIN/dataset_TRAIN/tables/learningData.csv
  13. +1261
    -0
      datasets/anomaly/yahoo_sub_5/TRAIN/problem_TRAIN/dataSplits.csv
  14. +10
    -10
      datasets/anomaly/yahoo_sub_5/TRAIN/problem_TRAIN/problemDoc.json
  15. +29
    -5
      datasets/anomaly/yahoo_sub_5/yahoo_sub_5_dataset/datasetDoc.json
  16. +1401
    -0
      datasets/anomaly/yahoo_sub_5/yahoo_sub_5_dataset/tables/learningData.csv
  17. +1261
    -0
      datasets/anomaly/yahoo_sub_5/yahoo_sub_5_problem/dataSplits.csv
  18. +10
    -10
      datasets/anomaly/yahoo_sub_5/yahoo_sub_5_problem/problemDoc.json
  19. +0
    -1401
      datasets/anomaly/yahoo_system_sub_5/SCORE/dataset_TEST/tables/learningData.csv
  20. +0
    -5601
      datasets/anomaly/yahoo_system_sub_5/SCORE/problem_TEST/dataSplits.csv
  21. +0
    -1401
      datasets/anomaly/yahoo_system_sub_5/TEST/dataset_TEST/tables/learningData.csv
  22. +0
    -5601
      datasets/anomaly/yahoo_system_sub_5/TEST/problem_TEST/dataSplits.csv
  23. +0
    -5601
      datasets/anomaly/yahoo_system_sub_5/TRAIN/dataset_TRAIN/tables/learningData.csv
  24. +0
    -5601
      datasets/anomaly/yahoo_system_sub_5/TRAIN/problem_TRAIN/dataSplits.csv
  25. +0
    -7001
      datasets/anomaly/yahoo_system_sub_5/yahoo_system_sub_5_dataset/tables/learningData.csv
  26. +0
    -5601
      datasets/anomaly/yahoo_system_sub_5/yahoo_system_sub_5_problem/dataSplits.csv
  27. +0
    -70
      primitive_tests/build_ABOD_pipline.py
  28. +0
    -51
      primitive_tests/build_CBLOF_pipline.py
  29. +0
    -49
      primitive_tests/build_DeepLog_pipeline.py
  30. +0
    -76
      primitive_tests/build_HoltSmoothing_pipline.py
  31. +0
    -76
      primitive_tests/build_HoltWintersExponentialSmoothing_pipline.py
  32. +0
    -71
      primitive_tests/build_KDiscord_pipeline.py
  33. +0
    -51
      primitive_tests/build_KNN_pipline.py
  34. +0
    -51
      primitive_tests/build_LODA_pipline.py
  35. +0
    -51
      primitive_tests/build_LOF_pipline.py
  36. +0
    -49
      primitive_tests/build_MatrixProfile_pipeline.py
  37. +0
    -77
      primitive_tests/build_MeanAverageTransform_pipline.py
  38. +0
    -51
      primitive_tests/build_OCSVM_pipline.py
  39. +0
    -51
      primitive_tests/build_PyodCOF.py
  40. +0
    -49
      primitive_tests/build_QuantileTransform_pipline.py
  41. +0
    -49
      primitive_tests/build_SOD_pipeline.py
  42. +0
    -76
      primitive_tests/build_SimpleExponentialSmoothing_pipline.py
  43. +0
    -49
      primitive_tests/build_Standardize_pipline.py
  44. +0
    -80
      primitive_tests/build_SubsequenceClustering_pipline.py
  45. +0
    -48
      primitive_tests/build_Telemanom.py
  46. +0
    -86
      primitive_tests/build_TimeIntervalTransform_pipeline.py
  47. +0
    -64
      primitive_tests/build_WaveletTransform_pipline.py
  48. +0
    -50
      primitive_tests/build_test_detection_algorithm_PyodMoGaal.py
  49. +0
    -50
      primitive_tests/build_test_detection_algorithm_PyodSoGaal.py
  50. +0
    -61
      primitive_tests/build_test_feature_analysis_spectral_residual_transform_pipeline.py
  51. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_abs_energy.py
  52. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_abs_sum.py
  53. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_gmean.py
  54. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_hmean.py
  55. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_kurtosis.py
  56. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_maximum.py
  57. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_mean.py
  58. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_mean_abs.py
  59. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_mean_abs_temporal_derivative.py
  60. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_mean_temporal_derivative.py
  61. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_median.py
  62. +0
    -63
      primitive_tests/build_test_feature_analysis_statistical_median_absolute_deviation.py
  63. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_minimum.py
  64. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_skew.py
  65. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_variation.py
  66. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_vec_sum.py
  67. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_willison_amplitude.py
  68. +0
    -61
      primitive_tests/build_test_time_series_seasonality_trend_decomposition.py
  69. +18
    -18
      primitive_tests/data_processing/CategoricalToBinary_pipeline.py
  70. +11
    -14
      primitive_tests/data_processing/ColumnFilter_pipeline.py
  71. +6
    -10
      primitive_tests/data_processing/ContinuityValidation_pipline.py
  72. +5
    -10
      primitive_tests/data_processing/DuplicationValidation_pipeline.py
  73. +10
    -17
      primitive_tests/data_processing/TimeIntervalTransform_pipeline.py
  74. +53
    -0
      primitive_tests/detection_algorithm/ABOD_pipeline.py
  75. +13
    -29
      primitive_tests/detection_algorithm/AutoEncoder_pipeline.py
  76. +54
    -0
      primitive_tests/detection_algorithm/AutoRegODetect_pipeline.py
  77. +57
    -0
      primitive_tests/detection_algorithm/CBLOF_pipline.py
  78. +54
    -0
      primitive_tests/detection_algorithm/DeepLog_pipeline.py
  79. +14
    -30
      primitive_tests/detection_algorithm/HBOS_pipline.py
  80. +14
    -37
      primitive_tests/detection_algorithm/HBOS_score_pipeline.py
  81. +13
    -18
      primitive_tests/detection_algorithm/IsolationForest_pipline.py
  82. +54
    -0
      primitive_tests/detection_algorithm/KDiscord_pipeline.py
  83. +55
    -0
      primitive_tests/detection_algorithm/KNN_pipeline.py
  84. +55
    -0
      primitive_tests/detection_algorithm/LODA_pipeline.py
  85. +55
    -0
      primitive_tests/detection_algorithm/LOF_pipeline.py
  86. +55
    -0
      primitive_tests/detection_algorithm/LSTMOD_pipeline.py
  87. +14
    -29
      primitive_tests/detection_algorithm/MatrixProfile_pipeline.py
  88. +55
    -0
      primitive_tests/detection_algorithm/OCSVM_pipline.py
  89. +53
    -0
      primitive_tests/detection_algorithm/PCAODetect_pipeline.py
  90. +55
    -0
      primitive_tests/detection_algorithm/PyodCOF.py
  91. +54
    -0
      primitive_tests/detection_algorithm/PyodMoGaal_pipeline.py
  92. +54
    -0
      primitive_tests/detection_algorithm/PyodSoGaal_pipeline.py
  93. +12
    -28
      primitive_tests/detection_algorithm/SOD_pipeline.py
  94. +54
    -0
      primitive_tests/detection_algorithm/Telemanom_pipeline.py
  95. +13
    -29
      primitive_tests/detection_algorithm/VariationalAutoEncoder_pipeline.py
  96. +18
    -16
      primitive_tests/feature_analysis/BKFilter_pipeline.py
  97. +18
    -17
      primitive_tests/feature_analysis/DiscreteCosineTransform_pipeline.py
  98. +18
    -19
      primitive_tests/feature_analysis/FastFourierTransform_pipeline.py
  99. +19
    -19
      primitive_tests/feature_analysis/HPFilter_pipeline.py
  100. +19
    -19
      primitive_tests/feature_analysis/NonNegativeMatrixFactorization_pipeline.py

+ 1
- 1
datasets/anomaly/transform_yahoo.py View File

@@ -10,7 +10,7 @@ import json
# Designed for time series data # Designed for time series data
name = 'yahoo_sub_5' name = 'yahoo_sub_5'
src_path = './raw_data/yahoo_sub_5.csv' src_path = './raw_data/yahoo_sub_5.csv'
label_name = 'is_anomaly'
label_name = 'anomaly'
timestamp_name = 'timestamp' timestamp_name = 'timestamp'
value_names = ['value_{}'.format(i) for i in range(5)] value_names = ['value_{}'.format(i) for i in range(5)]
ratio = 0.9 # Ratio of training data, the rest is for testing ratio = 0.9 # Ratio of training data, the rest is for testing


datasets/anomaly/yahoo_system_sub_5/SCORE/dataset_TEST/datasetDoc.json → datasets/anomaly/yahoo_sub_5/SCORE/dataset_TEST/datasetDoc.json View File

@@ -1,6 +1,6 @@
{ {
"about": { "about": {
"datasetID": "yahoo_system_sub_5_dataset_TEST",
"datasetID": "yahoo_sub_5_dataset_TEST",
"datasetName": "NULL", "datasetName": "NULL",
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", "description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'",
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", "citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ",
@@ -50,7 +50,7 @@
}, },
{ {
"colIndex": 3, "colIndex": 3,
"colName": "system_id",
"colName": "value_1",
"colType": "real", "colType": "real",
"role": [ "role": [
"attribute" "attribute"
@@ -58,6 +58,30 @@
}, },
{ {
"colIndex": 4, "colIndex": 4,
"colName": "value_2",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 5,
"colName": "value_3",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 6,
"colName": "value_4",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 7,
"colName": "ground_truth", "colName": "ground_truth",
"colType": "integer", "colType": "integer",
"role": [ "role": [
@@ -65,7 +89,7 @@
] ]
} }
], ],
"columnsCount": 5
"columnsCount": 8
} }
] ]
} }

+ 141
- 0
datasets/anomaly/yahoo_sub_5/SCORE/dataset_TEST/tables/learningData.csv View File

@@ -0,0 +1,141 @@
d3mIndex,timestamp,value_0,value_1,value_2,value_3,value_4,ground_truth
1260,1261,7782,0.03428038631974298,2.5072222222222003,104,3119,0
1261,1262,7829,0.039360296791109,2.5927777777778,82,3590,0
1262,1263,7902,0.0,2.6894444444444,208,3893,0
1263,1264,8039,0.03894406599435602,2.6291666666667,92,3264,0
1264,1265,8350,0.18176011684739002,2.6469444444444,53,3963,0
1265,1266,8142,0.18521047165852,2.7461111111111003,65,2757,0
1266,1267,7886,0.13079770999921,2.9363888888889,62,2306,0
1267,1268,7743,0.13310058077443,3.2797222222222002,73,2549,0
1268,1269,7707,0.054750658073534006,3.5194444444444,84,2212,0
1269,1270,7726,0.030588852697706,3.8130555555556,90,2286,0
1270,1271,7717,0.12998124134227002,3.7941666666667,80,2979,0
1271,1272,10331,0.09100057249197198,3.6086111111111,90,3158,0
1272,1273,10515,0.19464543002904008,3.3858333333333,84,2645,0
1273,1274,10415,0.22178651521516,3.3336111111111,34,3161,0
1274,1275,10387,0.22983578430825,3.3116666666667003,67,4460,0
1275,1276,10471,0.298229429356,3.2616666666667005,74,2630,0
1276,1277,10385,0.12923377484588,3.0044444444444003,44,2593,0
1277,1278,10439,0.19609416059774,2.6741666666667,64,2625,0
1278,1279,10516,0.04051853381938501,2.3191666666667,70,4834,0
1279,1280,10587,0.07099894663641,2.0597222222222,96,4056,0
1280,1281,10586,0.07584150637714701,2.0547222222222,110,5713,0
1281,1282,10684,0.08180100127782801,2.1511111111111,68,3940,0
1282,1283,10880,0.0,2.2602777777778,90,4414,0
1283,1284,10830,0.0,2.2883333333333,90,5044,0
1284,1285,10794,0.09140162014739303,2.3736111111111002,69,3894,0
1285,1286,10843,0.0,2.5869444444444,46,3993,0
1286,1287,10805,0.0,2.6480555555556,74,4404,0
1287,1288,10996,0.0,2.6077777777777995,68,4072,0
1288,1289,11327,0.05363316840061,2.6069444444444,67,4182,0
1289,1290,11090,0.26818151064716,2.6908333333332997,51,3351,0
1290,1291,10578,0.21887772653901,2.9019444444444003,39,4183,0
1291,1292,10528,0.32371296573811,3.2711111111111,26,4068,0
1292,1293,10475,0.12565805017257,3.5872222222222,25,8139,0
1293,1294,10664,0.092277247744574,3.6913888888888997,32,11000,0
1294,1295,10513,0.077016875742983,3.6313888888888997,17,2975,0
1295,1296,9072,0.3714480797312501,3.5605555555556,19,2692,0
1296,1297,9069,0.19332372237792,3.4402777777778,16,2502,0
1297,1298,9089,0.06345811641554701,3.35,28,2510,0
1298,1299,9027,0.22671215594729996,3.3469444444444,24,2663,0
1299,1300,8969,0.053072279964629,3.2708333333332997,35,3575,0
1300,1301,9073,0.13336345197744,3.2519444444444,49,2586,0
1301,1302,8957,0.1252855094715,2.7311111111111,106,2908,0
1302,1303,9126,0.096211952864224,2.3875,80,3530,0
1303,1304,9122,0.09652446751775501,2.0847222222222,90,2776,0
1304,1305,9231,0.08924770147957402,2.0975,169,2962,0
1305,1306,9368,0.11889606284161999,2.1763888888889,98,3441,0
1306,1307,9458,0.031429841710104,2.2327777777777995,92,4376,0
1307,1308,9463,0.0,2.2725,91,3857,0
1308,1309,9356,0.036512411627867995,2.3202777777778,99,4685,0
1309,1310,9340,0.0,2.5425,90,4585,0
1310,1311,9340,0.0,2.5986111111111,126,3542,0
1311,1312,9276,0.0,2.6319444444444,102,3370,0
1312,1313,9611,0.10106696361212,2.5836111111111,132,3515,0
1313,1314,9532,0.14854949043035,2.675,88,3793,0
1314,1315,9156,0.08612162048398897,2.8522222222222,135,2954,0
1315,1316,9222,0.16494200410492002,3.1302777777778,114,2627,0
1316,1317,9282,0.28637713141253,3.4805555555556,35,2550,0
1317,1318,9573,0.13206535647488,3.5994444444444,24,2480,0
1318,1319,9333,0.27364025607799,3.5847222222222,44,2521,0
1319,1320,9987,0.38382339961227,3.4963888888889,26,2860,0
1320,1321,10133,0.08426242877623301,3.3825,37,3675,0
1321,1322,10010,0.3290413568025901,3.2694444444444,45,2704,0
1322,1323,10028,0.22632868808707998,3.2322222222222,42,3121,0
1323,1324,9984,0.17914189971361,3.1936111111111005,47,2603,0
1324,1325,10041,0.30046815361859003,3.0536111111111004,34,3984,0
1325,1326,10072,0.22650915594248,2.7819444444444,56,2537,0
1326,1327,10025,0.0,2.4152777777777996,87,3349,0
1327,1328,10116,0.1223093269317,2.1569444444443997,74,3958,0
1328,1329,10232,0.1696074188221,2.1125,90,4243,0
1329,1330,10516,0.0,2.1833333333333003,79,4159,0
1330,1331,10449,0.028193633007367002,2.205,97,5637,0
1331,1332,10598,0.0,2.1697222222222,90,8142,0
1332,1333,10337,0.0,2.3075,77,5713,0
1333,1334,10469,0.097305232437507,2.4575,101,3668,0
1334,1335,10426,0.11905908868378999,2.6077777777777995,74,4307,0
1335,1336,10531,0.11660374103282001,2.6275,439,4354,0
1336,1337,10875,0.060474297756584014,2.6144444444443997,79,4262,0
1337,1338,10494,0.22568442027805,2.6477777777777995,165,3446,0
1338,1339,10195,0.14077736537045002,2.8594444444444003,139,2677,0
1339,1340,9918,0.1924574892026,3.2675,56,4450,0
1340,1341,9889,0.18922597300629002,3.5136111111111004,102,3044,0
1341,1342,9947,0.041593949118095004,3.5725,101,3428,0
1342,1343,9977,0.2502095174271,3.6863888888889,41,2845,0
1343,1344,10835,0.18663972932643,3.5636111111111,94,2781,0
1344,1345,10765,0.07351854082400297,3.4127777777778,116,2743,0
1345,1346,10656,0.081949111399618,3.295,94,4470,0
1346,1347,10485,0.20148511394008997,3.2666666666667004,89,2596,0
1347,1348,10681,0.11515101921294,3.1933333333332996,141,3249,0
1348,1349,10852,0.07797276382811,3.0688888888888997,167,2529,0
1349,1350,10728,0.07244862879413201,2.8102777777778,148,2452,0
1350,1351,10874,0.07310929970435699,2.42,105,2934,0
1351,1352,10964,0.066868365737218,2.1358333333333,210,3159,0
1352,1353,10984,0.05788512501593701,1.9916666666667,145,3974,0
1353,1354,11055,0.09727414207464803,2.0947222222222,136,4305,0
1354,1355,11233,0.033270317741557996,2.1591666666667,126,5012,0
1355,1356,11161,0.0,2.2377777777778,157,4455,0
1356,1357,10966,0.038270957919533,2.2511111111111,105,4108,0
1357,1358,11193,0.08728058888363299,2.4208333333332996,114,4339,0
1358,1359,11167,0.10536774813238,2.5241666666667,104,5056,0
1359,1360,11367,0.1233991317089,2.5794444444443996,69,5573,0
1360,1361,51251,0.042565915766552,2.5936111111111,75,3366,1
1361,1362,17953,0.23147422367229,2.6830555555556,73,2559,1
1362,1363,170029,0.08983405162538903,2.8188888888888997,74,1999,1
1363,1364,10955,0.07464756469365201,2.9513888888888995,126,1993,0
1364,1365,10984,0.09924410491893401,3.2830555555556,67,1913,0
1365,1366,10964,0.11535172009194,3.4819444444444,32,1760,0
1366,1367,10980,0.21774881707851998,3.5886111111111005,38,1890,0
1367,1368,10852,0.1305066423559,3.4836111111111,34,2469,0
1368,1369,10786,0.10054853030204,3.3955555555556,36,2133,0
1369,1370,10841,0.02468393737575,3.2847222222222,26,3359,0
1370,1371,10762,0.10018007414459,3.2383333333332995,74,3783,0
1371,1372,10419,0.12522619841308,3.2188888888888996,85,1809,0
1372,1373,10467,0.11781887197077001,2.9483333333333,67,2143,0
1373,1374,10502,0.13417256350298,2.5855555555556,84,2567,0
1374,1375,10519,0.07474686582090599,2.3005555555556003,1630,2176,0
1375,1376,10579,0.13570963056519,2.0855555555556,1435,1929,0
1376,1377,10502,0.076431907457478,1.9027777777777999,857,2244,0
1377,1378,10661,0.0,1.9411111111111,31,1810,0
1378,1379,10818,0.1936428046839,2.0444444444444,500,2088,0
1379,1380,10918,0.05282677388968402,2.1363888888889,53,2371,0
1380,1381,10871,0.0,2.22,61,1843,0
1381,1382,10796,0.054466597481213,2.3530555555556,158,2668,0
1382,1383,10774,0.057459020289436,2.545,184,2309,0
1383,1384,10898,0.28750562005936,2.6202777777777997,91,1998,0
1384,1385,11442,0.075538554674309,2.6847222222222,60,2480,0
1385,1386,11113,0.08112608570492501,2.6591666666667004,107,2147,0
1386,1387,10888,0.21563803296368,2.7863888888888995,5157,1802,0
1387,1388,10894,0.09572500230568501,3.0269444444444003,28,1789,0
1388,1389,10888,0.17516056892320994,3.3227777777778,24,1999,0
1389,1390,10896,0.32902836018585996,3.6097222222222,21,2142,0
1390,1391,10800,0.10216065221678,3.6805555555555998,12,1904,0
1391,1392,11000,0.19741931250852,3.6075,24,1876,0
1392,1393,10985,0.10149107903671001,3.4091666666667004,17,2434,0
1393,1394,11017,0.17479255893624,3.3666666666667004,48,2472,0
1394,1395,10863,0.034385029573777,3.3158333333332997,41,1744,0
1395,1396,10875,0.21988771218053,3.1622222222222,1088,2404,0
1396,1397,10987,0.10149107903671001,3.1086111111111,68,1971,0
1397,1398,10778,0.10269981175444999,2.6552777777778,2575,1713,0
1398,1399,10957,0.11258759940039,2.2730555555556,4688,1765,0
1399,1400,10832,0.13022351806001,2.0591666666667,477,3156,0

+ 1261
- 0
datasets/anomaly/yahoo_sub_5/SCORE/problem_TEST/dataSplits.csv
File diff suppressed because it is too large
View File


datasets/anomaly/yahoo_system_sub_5/TRAIN/problem_TRAIN/problemDoc.json → datasets/anomaly/yahoo_sub_5/SCORE/problem_TEST/problemDoc.json View File

@@ -1,7 +1,7 @@
{ {
"about": { "about": {
"problemID": "yahoo_system_sub_5_problem",
"problemName": "yahoo_system_sub_5_problem",
"problemID": "yahoo_sub_5_problem",
"problemName": "yahoo_sub_5_problem",
"problemDescription": "Anomaly detection", "problemDescription": "Anomaly detection",
"problemVersion": "4.0.0", "problemVersion": "4.0.0",
"problemSchemaVersion": "4.0.0", "problemSchemaVersion": "4.0.0",
@@ -14,12 +14,12 @@
"inputs": { "inputs": {
"data": [ "data": [
{ {
"datasetID": "yahoo_system_sub_5_dataset",
"datasetID": "yahoo_sub_5_dataset",
"targets": [ "targets": [
{ {
"targetIndex": 0, "targetIndex": 0,
"resID": "learningData", "resID": "learningData",
"colIndex": 4,
"colIndex": 7,
"colName": "ground_truth" "colName": "ground_truth"
} }
] ]
@@ -35,20 +35,20 @@
"datasetViewMaps": { "datasetViewMaps": {
"train": [ "train": [
{ {
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_TRAIN"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TRAIN"
} }
], ],
"test": [ "test": [
{ {
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_TEST"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TEST"
} }
], ],
"score": [ "score": [
{ {
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_SCORE"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_SCORE"
} }
] ]
} }

datasets/anomaly/yahoo_system_sub_5/SCORE/targets.csv → datasets/anomaly/yahoo_sub_5/SCORE/targets.csv View File


datasets/anomaly/yahoo_system_sub_5/TEST/dataset_TEST/datasetDoc.json → datasets/anomaly/yahoo_sub_5/TEST/dataset_TEST/datasetDoc.json View File

@@ -1,6 +1,6 @@
{ {
"about": { "about": {
"datasetID": "yahoo_system_sub_5_dataset_TEST",
"datasetID": "yahoo_sub_5_dataset_TEST",
"datasetName": "NULL", "datasetName": "NULL",
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", "description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'",
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", "citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ",
@@ -50,7 +50,7 @@
}, },
{ {
"colIndex": 3, "colIndex": 3,
"colName": "system_id",
"colName": "value_1",
"colType": "real", "colType": "real",
"role": [ "role": [
"attribute" "attribute"
@@ -58,6 +58,30 @@
}, },
{ {
"colIndex": 4, "colIndex": 4,
"colName": "value_2",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 5,
"colName": "value_3",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 6,
"colName": "value_4",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 7,
"colName": "ground_truth", "colName": "ground_truth",
"colType": "integer", "colType": "integer",
"role": [ "role": [
@@ -65,7 +89,7 @@
] ]
} }
], ],
"columnsCount": 5
"columnsCount": 8
} }
] ]
} }

+ 141
- 0
datasets/anomaly/yahoo_sub_5/TEST/dataset_TEST/tables/learningData.csv View File

@@ -0,0 +1,141 @@
d3mIndex,timestamp,value_0,value_1,value_2,value_3,value_4,ground_truth
1260,1261,7782,0.03428038631974298,2.5072222222222003,104,3119,0
1261,1262,7829,0.039360296791109,2.5927777777778,82,3590,0
1262,1263,7902,0.0,2.6894444444444,208,3893,0
1263,1264,8039,0.03894406599435602,2.6291666666667,92,3264,0
1264,1265,8350,0.18176011684739002,2.6469444444444,53,3963,0
1265,1266,8142,0.18521047165852,2.7461111111111003,65,2757,0
1266,1267,7886,0.13079770999921,2.9363888888889,62,2306,0
1267,1268,7743,0.13310058077443,3.2797222222222002,73,2549,0
1268,1269,7707,0.054750658073534006,3.5194444444444,84,2212,0
1269,1270,7726,0.030588852697706,3.8130555555556,90,2286,0
1270,1271,7717,0.12998124134227002,3.7941666666667,80,2979,0
1271,1272,10331,0.09100057249197198,3.6086111111111,90,3158,0
1272,1273,10515,0.19464543002904008,3.3858333333333,84,2645,0
1273,1274,10415,0.22178651521516,3.3336111111111,34,3161,0
1274,1275,10387,0.22983578430825,3.3116666666667003,67,4460,0
1275,1276,10471,0.298229429356,3.2616666666667005,74,2630,0
1276,1277,10385,0.12923377484588,3.0044444444444003,44,2593,0
1277,1278,10439,0.19609416059774,2.6741666666667,64,2625,0
1278,1279,10516,0.04051853381938501,2.3191666666667,70,4834,0
1279,1280,10587,0.07099894663641,2.0597222222222,96,4056,0
1280,1281,10586,0.07584150637714701,2.0547222222222,110,5713,0
1281,1282,10684,0.08180100127782801,2.1511111111111,68,3940,0
1282,1283,10880,0.0,2.2602777777778,90,4414,0
1283,1284,10830,0.0,2.2883333333333,90,5044,0
1284,1285,10794,0.09140162014739303,2.3736111111111002,69,3894,0
1285,1286,10843,0.0,2.5869444444444,46,3993,0
1286,1287,10805,0.0,2.6480555555556,74,4404,0
1287,1288,10996,0.0,2.6077777777777995,68,4072,0
1288,1289,11327,0.05363316840061,2.6069444444444,67,4182,0
1289,1290,11090,0.26818151064716,2.6908333333332997,51,3351,0
1290,1291,10578,0.21887772653901,2.9019444444444003,39,4183,0
1291,1292,10528,0.32371296573811,3.2711111111111,26,4068,0
1292,1293,10475,0.12565805017257,3.5872222222222,25,8139,0
1293,1294,10664,0.092277247744574,3.6913888888888997,32,11000,0
1294,1295,10513,0.077016875742983,3.6313888888888997,17,2975,0
1295,1296,9072,0.3714480797312501,3.5605555555556,19,2692,0
1296,1297,9069,0.19332372237792,3.4402777777778,16,2502,0
1297,1298,9089,0.06345811641554701,3.35,28,2510,0
1298,1299,9027,0.22671215594729996,3.3469444444444,24,2663,0
1299,1300,8969,0.053072279964629,3.2708333333332997,35,3575,0
1300,1301,9073,0.13336345197744,3.2519444444444,49,2586,0
1301,1302,8957,0.1252855094715,2.7311111111111,106,2908,0
1302,1303,9126,0.096211952864224,2.3875,80,3530,0
1303,1304,9122,0.09652446751775501,2.0847222222222,90,2776,0
1304,1305,9231,0.08924770147957402,2.0975,169,2962,0
1305,1306,9368,0.11889606284161999,2.1763888888889,98,3441,0
1306,1307,9458,0.031429841710104,2.2327777777777995,92,4376,0
1307,1308,9463,0.0,2.2725,91,3857,0
1308,1309,9356,0.036512411627867995,2.3202777777778,99,4685,0
1309,1310,9340,0.0,2.5425,90,4585,0
1310,1311,9340,0.0,2.5986111111111,126,3542,0
1311,1312,9276,0.0,2.6319444444444,102,3370,0
1312,1313,9611,0.10106696361212,2.5836111111111,132,3515,0
1313,1314,9532,0.14854949043035,2.675,88,3793,0
1314,1315,9156,0.08612162048398897,2.8522222222222,135,2954,0
1315,1316,9222,0.16494200410492002,3.1302777777778,114,2627,0
1316,1317,9282,0.28637713141253,3.4805555555556,35,2550,0
1317,1318,9573,0.13206535647488,3.5994444444444,24,2480,0
1318,1319,9333,0.27364025607799,3.5847222222222,44,2521,0
1319,1320,9987,0.38382339961227,3.4963888888889,26,2860,0
1320,1321,10133,0.08426242877623301,3.3825,37,3675,0
1321,1322,10010,0.3290413568025901,3.2694444444444,45,2704,0
1322,1323,10028,0.22632868808707998,3.2322222222222,42,3121,0
1323,1324,9984,0.17914189971361,3.1936111111111005,47,2603,0
1324,1325,10041,0.30046815361859003,3.0536111111111004,34,3984,0
1325,1326,10072,0.22650915594248,2.7819444444444,56,2537,0
1326,1327,10025,0.0,2.4152777777777996,87,3349,0
1327,1328,10116,0.1223093269317,2.1569444444443997,74,3958,0
1328,1329,10232,0.1696074188221,2.1125,90,4243,0
1329,1330,10516,0.0,2.1833333333333003,79,4159,0
1330,1331,10449,0.028193633007367002,2.205,97,5637,0
1331,1332,10598,0.0,2.1697222222222,90,8142,0
1332,1333,10337,0.0,2.3075,77,5713,0
1333,1334,10469,0.097305232437507,2.4575,101,3668,0
1334,1335,10426,0.11905908868378999,2.6077777777777995,74,4307,0
1335,1336,10531,0.11660374103282001,2.6275,439,4354,0
1336,1337,10875,0.060474297756584014,2.6144444444443997,79,4262,0
1337,1338,10494,0.22568442027805,2.6477777777777995,165,3446,0
1338,1339,10195,0.14077736537045002,2.8594444444444003,139,2677,0
1339,1340,9918,0.1924574892026,3.2675,56,4450,0
1340,1341,9889,0.18922597300629002,3.5136111111111004,102,3044,0
1341,1342,9947,0.041593949118095004,3.5725,101,3428,0
1342,1343,9977,0.2502095174271,3.6863888888889,41,2845,0
1343,1344,10835,0.18663972932643,3.5636111111111,94,2781,0
1344,1345,10765,0.07351854082400297,3.4127777777778,116,2743,0
1345,1346,10656,0.081949111399618,3.295,94,4470,0
1346,1347,10485,0.20148511394008997,3.2666666666667004,89,2596,0
1347,1348,10681,0.11515101921294,3.1933333333332996,141,3249,0
1348,1349,10852,0.07797276382811,3.0688888888888997,167,2529,0
1349,1350,10728,0.07244862879413201,2.8102777777778,148,2452,0
1350,1351,10874,0.07310929970435699,2.42,105,2934,0
1351,1352,10964,0.066868365737218,2.1358333333333,210,3159,0
1352,1353,10984,0.05788512501593701,1.9916666666667,145,3974,0
1353,1354,11055,0.09727414207464803,2.0947222222222,136,4305,0
1354,1355,11233,0.033270317741557996,2.1591666666667,126,5012,0
1355,1356,11161,0.0,2.2377777777778,157,4455,0
1356,1357,10966,0.038270957919533,2.2511111111111,105,4108,0
1357,1358,11193,0.08728058888363299,2.4208333333332996,114,4339,0
1358,1359,11167,0.10536774813238,2.5241666666667,104,5056,0
1359,1360,11367,0.1233991317089,2.5794444444443996,69,5573,0
1360,1361,51251,0.042565915766552,2.5936111111111,75,3366,1
1361,1362,17953,0.23147422367229,2.6830555555556,73,2559,1
1362,1363,170029,0.08983405162538903,2.8188888888888997,74,1999,1
1363,1364,10955,0.07464756469365201,2.9513888888888995,126,1993,0
1364,1365,10984,0.09924410491893401,3.2830555555556,67,1913,0
1365,1366,10964,0.11535172009194,3.4819444444444,32,1760,0
1366,1367,10980,0.21774881707851998,3.5886111111111005,38,1890,0
1367,1368,10852,0.1305066423559,3.4836111111111,34,2469,0
1368,1369,10786,0.10054853030204,3.3955555555556,36,2133,0
1369,1370,10841,0.02468393737575,3.2847222222222,26,3359,0
1370,1371,10762,0.10018007414459,3.2383333333332995,74,3783,0
1371,1372,10419,0.12522619841308,3.2188888888888996,85,1809,0
1372,1373,10467,0.11781887197077001,2.9483333333333,67,2143,0
1373,1374,10502,0.13417256350298,2.5855555555556,84,2567,0
1374,1375,10519,0.07474686582090599,2.3005555555556003,1630,2176,0
1375,1376,10579,0.13570963056519,2.0855555555556,1435,1929,0
1376,1377,10502,0.076431907457478,1.9027777777777999,857,2244,0
1377,1378,10661,0.0,1.9411111111111,31,1810,0
1378,1379,10818,0.1936428046839,2.0444444444444,500,2088,0
1379,1380,10918,0.05282677388968402,2.1363888888889,53,2371,0
1380,1381,10871,0.0,2.22,61,1843,0
1381,1382,10796,0.054466597481213,2.3530555555556,158,2668,0
1382,1383,10774,0.057459020289436,2.545,184,2309,0
1383,1384,10898,0.28750562005936,2.6202777777777997,91,1998,0
1384,1385,11442,0.075538554674309,2.6847222222222,60,2480,0
1385,1386,11113,0.08112608570492501,2.6591666666667004,107,2147,0
1386,1387,10888,0.21563803296368,2.7863888888888995,5157,1802,0
1387,1388,10894,0.09572500230568501,3.0269444444444003,28,1789,0
1388,1389,10888,0.17516056892320994,3.3227777777778,24,1999,0
1389,1390,10896,0.32902836018585996,3.6097222222222,21,2142,0
1390,1391,10800,0.10216065221678,3.6805555555555998,12,1904,0
1391,1392,11000,0.19741931250852,3.6075,24,1876,0
1392,1393,10985,0.10149107903671001,3.4091666666667004,17,2434,0
1393,1394,11017,0.17479255893624,3.3666666666667004,48,2472,0
1394,1395,10863,0.034385029573777,3.3158333333332997,41,1744,0
1395,1396,10875,0.21988771218053,3.1622222222222,1088,2404,0
1396,1397,10987,0.10149107903671001,3.1086111111111,68,1971,0
1397,1398,10778,0.10269981175444999,2.6552777777778,2575,1713,0
1398,1399,10957,0.11258759940039,2.2730555555556,4688,1765,0
1399,1400,10832,0.13022351806001,2.0591666666667,477,3156,0

+ 1261
- 0
datasets/anomaly/yahoo_sub_5/TEST/problem_TEST/dataSplits.csv
File diff suppressed because it is too large
View File


datasets/anomaly/yahoo_system_sub_5/yahoo_system_sub_5_problem/problemDoc.json → datasets/anomaly/yahoo_sub_5/TEST/problem_TEST/problemDoc.json View File

@@ -1,7 +1,7 @@
{ {
"about": { "about": {
"problemID": "yahoo_system_sub_5_problem",
"problemName": "yahoo_system_sub_5_problem",
"problemID": "yahoo_sub_5_problem",
"problemName": "yahoo_sub_5_problem",
"problemDescription": "Anomaly detection", "problemDescription": "Anomaly detection",
"problemVersion": "4.0.0", "problemVersion": "4.0.0",
"problemSchemaVersion": "4.0.0", "problemSchemaVersion": "4.0.0",
@@ -14,12 +14,12 @@
"inputs": { "inputs": {
"data": [ "data": [
{ {
"datasetID": "yahoo_system_sub_5_dataset",
"datasetID": "yahoo_sub_5_dataset",
"targets": [ "targets": [
{ {
"targetIndex": 0, "targetIndex": 0,
"resID": "learningData", "resID": "learningData",
"colIndex": 4,
"colIndex": 7,
"colName": "ground_truth" "colName": "ground_truth"
} }
] ]
@@ -35,20 +35,20 @@
"datasetViewMaps": { "datasetViewMaps": {
"train": [ "train": [
{ {
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_TRAIN"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TRAIN"
} }
], ],
"test": [ "test": [
{ {
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_TEST"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TEST"
} }
], ],
"score": [ "score": [
{ {
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_SCORE"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_SCORE"
} }
] ]
} }

datasets/anomaly/yahoo_system_sub_5/TRAIN/dataset_TRAIN/datasetDoc.json → datasets/anomaly/yahoo_sub_5/TRAIN/dataset_TRAIN/datasetDoc.json View File

@@ -1,6 +1,6 @@
{ {
"about": { "about": {
"datasetID": "yahoo_system_sub_5_dataset_TRAIN",
"datasetID": "yahoo_sub_5_dataset_TRAIN",
"datasetName": "NULL", "datasetName": "NULL",
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", "description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'",
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", "citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ",
@@ -50,7 +50,7 @@
}, },
{ {
"colIndex": 3, "colIndex": 3,
"colName": "system_id",
"colName": "value_1",
"colType": "real", "colType": "real",
"role": [ "role": [
"attribute" "attribute"
@@ -58,6 +58,30 @@
}, },
{ {
"colIndex": 4, "colIndex": 4,
"colName": "value_2",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 5,
"colName": "value_3",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 6,
"colName": "value_4",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 7,
"colName": "ground_truth", "colName": "ground_truth",
"colType": "integer", "colType": "integer",
"role": [ "role": [
@@ -65,7 +89,7 @@
] ]
} }
], ],
"columnsCount": 5
"columnsCount": 8
} }
] ]
}
}

+ 1261
- 0
datasets/anomaly/yahoo_sub_5/TRAIN/dataset_TRAIN/tables/learningData.csv
File diff suppressed because it is too large
View File


+ 1261
- 0
datasets/anomaly/yahoo_sub_5/TRAIN/problem_TRAIN/dataSplits.csv
File diff suppressed because it is too large
View File


datasets/anomaly/yahoo_system_sub_5/TEST/problem_TEST/problemDoc.json → datasets/anomaly/yahoo_sub_5/TRAIN/problem_TRAIN/problemDoc.json View File

@@ -1,7 +1,7 @@
{ {
"about": { "about": {
"problemID": "yahoo_system_sub_5_problem",
"problemName": "yahoo_system_sub_5_problem",
"problemID": "yahoo_sub_5_problem",
"problemName": "yahoo_sub_5_problem",
"problemDescription": "Anomaly detection", "problemDescription": "Anomaly detection",
"problemVersion": "4.0.0", "problemVersion": "4.0.0",
"problemSchemaVersion": "4.0.0", "problemSchemaVersion": "4.0.0",
@@ -14,12 +14,12 @@
"inputs": { "inputs": {
"data": [ "data": [
{ {
"datasetID": "yahoo_system_sub_5_dataset",
"datasetID": "yahoo_sub_5_dataset",
"targets": [ "targets": [
{ {
"targetIndex": 0, "targetIndex": 0,
"resID": "learningData", "resID": "learningData",
"colIndex": 4,
"colIndex": 7,
"colName": "ground_truth" "colName": "ground_truth"
} }
] ]
@@ -35,20 +35,20 @@
"datasetViewMaps": { "datasetViewMaps": {
"train": [ "train": [
{ {
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_TRAIN"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TRAIN"
} }
], ],
"test": [ "test": [
{ {
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_TEST"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TEST"
} }
], ],
"score": [ "score": [
{ {
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_SCORE"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_SCORE"
} }
] ]
} }

datasets/anomaly/yahoo_system_sub_5/yahoo_system_sub_5_dataset/datasetDoc.json → datasets/anomaly/yahoo_sub_5/yahoo_sub_5_dataset/datasetDoc.json View File

@@ -1,7 +1,7 @@
{ {
"about": { "about": {
"datasetID": "yahoo_system_sub_5_dataset",
"datasetName": "yahoo_system_sub_5",
"datasetID": "yahoo_sub_5_dataset",
"datasetName": "yahoo_sub_5",
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", "description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'",
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", "citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ",
"license": " CC Public Domain Mark 1.0 ", "license": " CC Public Domain Mark 1.0 ",
@@ -50,7 +50,7 @@
}, },
{ {
"colIndex": 3, "colIndex": 3,
"colName": "system_id",
"colName": "value_1",
"colType": "real", "colType": "real",
"role": [ "role": [
"attribute" "attribute"
@@ -58,6 +58,30 @@
}, },
{ {
"colIndex": 4, "colIndex": 4,
"colName": "value_2",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 5,
"colName": "value_3",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 6,
"colName": "value_4",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 7,
"colName": "ground_truth", "colName": "ground_truth",
"colType": "integer", "colType": "integer",
"role": [ "role": [
@@ -65,7 +89,7 @@
] ]
} }
], ],
"columnsCount": 5
"columnsCount": 8
} }
] ]
}
}

+ 1401
- 0
datasets/anomaly/yahoo_sub_5/yahoo_sub_5_dataset/tables/learningData.csv
File diff suppressed because it is too large
View File


+ 1261
- 0
datasets/anomaly/yahoo_sub_5/yahoo_sub_5_problem/dataSplits.csv
File diff suppressed because it is too large
View File


datasets/anomaly/yahoo_system_sub_5/SCORE/problem_TEST/problemDoc.json → datasets/anomaly/yahoo_sub_5/yahoo_sub_5_problem/problemDoc.json View File

@@ -1,7 +1,7 @@
{ {
"about": { "about": {
"problemID": "yahoo_system_sub_5_problem",
"problemName": "yahoo_system_sub_5_problem",
"problemID": "yahoo_sub_5_problem",
"problemName": "yahoo_sub_5_problem",
"problemDescription": "Anomaly detection", "problemDescription": "Anomaly detection",
"problemVersion": "4.0.0", "problemVersion": "4.0.0",
"problemSchemaVersion": "4.0.0", "problemSchemaVersion": "4.0.0",
@@ -14,12 +14,12 @@
"inputs": { "inputs": {
"data": [ "data": [
{ {
"datasetID": "yahoo_system_sub_5_dataset",
"datasetID": "yahoo_sub_5_dataset",
"targets": [ "targets": [
{ {
"targetIndex": 0, "targetIndex": 0,
"resID": "learningData", "resID": "learningData",
"colIndex": 4,
"colIndex": 7,
"colName": "ground_truth" "colName": "ground_truth"
} }
] ]
@@ -35,20 +35,20 @@
"datasetViewMaps": { "datasetViewMaps": {
"train": [ "train": [
{ {
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_TRAIN"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TRAIN"
} }
], ],
"test": [ "test": [
{ {
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_TEST"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TEST"
} }
], ],
"score": [ "score": [
{ {
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_SCORE"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_SCORE"
} }
] ]
} }

+ 0
- 1401
datasets/anomaly/yahoo_system_sub_5/SCORE/dataset_TEST/tables/learningData.csv
File diff suppressed because it is too large
View File


+ 0
- 5601
datasets/anomaly/yahoo_system_sub_5/SCORE/problem_TEST/dataSplits.csv
File diff suppressed because it is too large
View File


+ 0
- 1401
datasets/anomaly/yahoo_system_sub_5/TEST/dataset_TEST/tables/learningData.csv
File diff suppressed because it is too large
View File


+ 0
- 5601
datasets/anomaly/yahoo_system_sub_5/TEST/problem_TEST/dataSplits.csv
File diff suppressed because it is too large
View File


+ 0
- 5601
datasets/anomaly/yahoo_system_sub_5/TRAIN/dataset_TRAIN/tables/learningData.csv
File diff suppressed because it is too large
View File


+ 0
- 5601
datasets/anomaly/yahoo_system_sub_5/TRAIN/problem_TRAIN/dataSplits.csv
File diff suppressed because it is too large
View File


+ 0
- 7001
datasets/anomaly/yahoo_system_sub_5/yahoo_system_sub_5_dataset/tables/learningData.csv
File diff suppressed because it is too large
View File


+ 0
- 5601
datasets/anomaly/yahoo_system_sub_5/yahoo_system_sub_5_problem/dataSplits.csv
File diff suppressed because it is too large
View File


+ 0
- 70
primitive_tests/build_ABOD_pipline.py View File

@@ -1,70 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: ABOD
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_abod'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')

step_5.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_5.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_5.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2, 4,))
step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='replace')

step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 51
primitive_tests/build_CBLOF_pipline.py View File

@@ -1,51 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_cblof')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 49
primitive_tests/build_DeepLog_pipeline.py View File

@@ -1,49 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.deeplog')

step_2 = PrimitiveStep(primitive=primitive_2)
#step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# # Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 0
- 76
primitive_tests/build_HoltSmoothing_pipline.py View File

@@ -1,76 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: holt smoothing
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.holt_smoothing'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_5.add_hyperparameter(name="exclude_columns", argument_type=ArgumentType.VALUE, data = (2, 3))
step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True)
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Step 6: isolation forest
#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm'))
#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets)
#step_6.add_output('produce')
#pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 76
primitive_tests/build_HoltWintersExponentialSmoothing_pipline.py View File

@@ -1,76 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: holt winters exponential smoothing
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.holt_winters_exponential_smoothing'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_5.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (2, 3))
step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True)
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Step 6: isolation forest
#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm'))
#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets)
#step_6.add_output('produce')
#pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 71
primitive_tests/build_KDiscord_pipeline.py View File

@@ -1,71 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import numpy as np

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# # Step 3: Standardization
primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(1,2,3,4,5,))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# # Step 4: test primitive
primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.KDiscordODetector')
step_4 = PrimitiveStep(primitive=primitive_4)
step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_4.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=10)
# step_4.add_hyperparameter(name='weights', argument_type=ArgumentType.VALUE, data=weights_ndarray)
step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False)
# step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) # There is sth wrong with multi-dimensional
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_4.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True)
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_output('produce')
step_4.add_output('produce_score')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 51
primitive_tests/build_KNN_pipline.py View File

@@ -1,51 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_knn')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 51
primitive_tests/build_LODA_pipline.py View File

@@ -1,51 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_loda')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 51
primitive_tests/build_LOF_pipline.py View File

@@ -1,51 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_lof')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 49
primitive_tests/build_MatrixProfile_pipeline.py View File

@@ -1,49 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.matrix_profile')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=3) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# # Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 0
- 77
primitive_tests/build_MeanAverageTransform_pipline.py View File

@@ -1,77 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)


# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: mean average transform
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.moving_average_transform'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_5.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (2, 3))
step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True)
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Step 6: isolation forest
#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm'))
#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets)
#step_6.add_output('produce')
#pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 51
primitive_tests/build_OCSVM_pipline.py View File

@@ -1,51 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ocsvm')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 51
primitive_tests/build_PyodCOF.py View File

@@ -1,51 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_cof')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 49
primitive_tests/build_QuantileTransform_pipline.py View File

@@ -1,49 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.quantile_transformer')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 49
primitive_tests/build_SOD_pipeline.py View File

@@ -1,49 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_sod')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# # Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 0
- 76
primitive_tests/build_SimpleExponentialSmoothing_pipline.py View File

@@ -1,76 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: simple exponential smoothing
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.simple_exponential_smoothing'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_5.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (1,))
step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True)
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Step 6: isolation forest
#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm'))
#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets)
#step_6.add_output('produce')
#pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 49
primitive_tests/build_Standardize_pipline.py View File

@@ -1,49 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 80
primitive_tests/build_SubsequenceClustering_pipline.py View File

@@ -1,80 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)


# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)


# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: test primitive
primitive_4 = index.get_primitive('d3m.primitives.tods.timeseries_processing.subsequence_clustering')
step_4 = PrimitiveStep(primitive=primitive_4)

step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: test primitive
primitive_5 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_loda')
step_5 = PrimitiveStep(primitive=primitive_5)
step_5.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Step 6: Predictions
step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common'))
step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_6.add_output('produce')
pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce')

# Output to json
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 0
- 48
primitive_tests/build_Telemanom.py View File

@@ -1,48 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')


# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: Column Parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: Fast Fourier Transform
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.telemanom')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)


# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 0
- 86
primitive_tests/build_TimeIntervalTransform_pipeline.py View File

@@ -1,86 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: dataframe transformation
# primitive_1 = index.get_primitive('d3m.primitives.data_transformation.SKPowerTransformer')
# primitive_1 = index.get_primitive('d3m.primitives.data_transformation.SKStandardization')
# primitive_1 = index.get_primitive('d3m.primitives.data_transformation.SKQuantileTransformer')

#Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

primitive_2 = index.get_primitive('d3m.primitives.tods.data_processing.time_interval_transform')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name="time_interval", argument_type=ArgumentType.VALUE, data = '5T')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)
#
# # Step 2: column_parser
# step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
# step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
# step_2.add_output('produce')
# pipeline_description.add_step(step_2)
#
#
# # Step 3: extract_columns_by_semantic_types(attributes)
# step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
# step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
# step_3.add_output('produce')
# step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
# data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
# pipeline_description.add_step(step_3)
#
# # Step 4: extract_columns_by_semantic_types(targets)
# step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
# step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
# step_4.add_output('produce')
# step_4.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
# data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
# pipeline_description.add_step(step_4)
#
# attributes = 'steps.3.produce'
# targets = 'steps.4.produce'
#
# # Step 5: imputer
# step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_cleaning.imputer.SKlearn'))
# step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
# step_5.add_output('produce')
# pipeline_description.add_step(step_5)
#
# # Step 6: random_forest
# step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.regression.random_forest.SKlearn'))
# step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
# step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets)
# step_6.add_output('produce')
# pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.1.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 0
- 64
primitive_tests/build_WaveletTransform_pipline.py View File

@@ -1,64 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test WaveletTransform
primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.wavelet_transform')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='wavelet', argument_type=ArgumentType.VALUE, data='db8')
step_2.add_hyperparameter(name='level', argument_type=ArgumentType.VALUE, data=2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 2: test inverse WaveletTransform
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.wavelet_transform')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='wavelet', argument_type=ArgumentType.VALUE, data='db8')
step_3.add_hyperparameter(name='level', argument_type=ArgumentType.VALUE, data=2)
step_3.add_hyperparameter(name='inverse', argument_type=ArgumentType.VALUE, data=1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False)
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)


# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 50
primitive_tests/build_test_detection_algorithm_PyodMoGaal.py View File

@@ -1,50 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_mogaal')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 0
- 50
primitive_tests/build_test_detection_algorithm_PyodSoGaal.py View File

@@ -1,50 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_sogaal')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 0
- 61
primitive_tests/build_test_feature_analysis_spectral_residual_transform_pipeline.py View File

@@ -1,61 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.spectral_residual_transform')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='avg_filter_dimension', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_abs_energy.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_abs_energy')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_abs_sum.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_abs_sum')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_gmean.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_g_mean')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_hmean.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_h_mean')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_kurtosis.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_kurtosis')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_maximum.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_maximum')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_mean.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_mean_abs.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean_abs')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_mean_abs_temporal_derivative.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean_abs_temporal_derivative')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_mean_temporal_derivative.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean_temporal_derivative')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_median.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_median')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 63
primitive_tests/build_test_feature_analysis_statistical_median_absolute_deviation.py View File

@@ -1,63 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)


# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_median_abs_deviation')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_minimum.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_minimum')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_skew.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_skew')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_variation.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_variation')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_vec_sum.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_vec_sum')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_willison_amplitude.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_willison_amplitude')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 61
primitive_tests/build_test_time_series_seasonality_trend_decomposition.py View File

@@ -1,61 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.decomposition.time_series_seasonality_trend_decomposition')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='period', argument_type=ArgumentType.VALUE, data=5)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)

# Or you can output json
#data = pipline_description.to_json()


primitive_tests/build_CategoricalToBinary.py → primitive_tests/data_processing/CategoricalToBinary_pipeline.py View File

@@ -2,14 +2,11 @@ from d3m import index
from d3m.metadata.base import ArgumentType from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^


# Creating pipeline # Creating pipeline
pipeline_description = Pipeline() pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs') pipeline_description.add_input(name='inputs')



# Step 0: dataset_to_dataframe # Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0) step_0 = PrimitiveStep(primitive=primitive_0)
@@ -24,25 +21,28 @@ step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_1.add_output('produce') step_1.add_output('produce')
pipeline_description.add_step(step_1) pipeline_description.add_step(step_1)


# Step 2: Categorical to Binary
primitive_2 = index.get_primitive('d3m.primitives.tods.data_processing.categorical_to_binary')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(3,))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce') step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2) pipeline_description.add_step(step_2)


# Step 3: Categorical to Binary
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.categorical_to_binary'))
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(3,))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)


# Final Output # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce')


# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

primitive_tests/build_ColumnFilter_pipeline.py → primitive_tests/data_processing/ColumnFilter_pipeline.py View File

@@ -22,16 +22,16 @@ step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_1.add_output('produce') step_1.add_output('produce')
pipeline_description.add_step(step_1) pipeline_description.add_step(step_1)


primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.auto_correlation')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True)
step_2.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (2, 3))
# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce') step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2) pipeline_description.add_step(step_2)


primitive_3 = index.get_primitive('d3m.primitives.tods.data_processing.column_filter')
step_3 = PrimitiveStep(primitive=primitive_3)
# Step 3: column_filter
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_filter'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce') step_3.add_output('produce')
pipeline_description.add_step(step_3) pipeline_description.add_step(step_3)
@@ -39,11 +39,8 @@ pipeline_description.add_step(step_3)
# Final Output # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce') pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce')


# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

primitive_tests/build_ContinuityValidation_pipline.py → primitive_tests/data_processing/ContinuityValidation_pipline.py View File

@@ -18,8 +18,7 @@ step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_1.add_output('produce') step_1.add_output('produce')
pipeline_description.add_step(step_1) pipeline_description.add_step(step_1)



# Step 2: ContinuityValidation
# Step 3: ContinuityValidation
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.continuity_validation')) step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.continuity_validation'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce') step_2.add_output('produce')
@@ -32,12 +31,9 @@ pipeline_description.add_step(step_2)
# Final Output # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')


# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)



primitive_tests/build_DuplicationValidation_pipline.py → primitive_tests/data_processing/DuplicationValidation_pipeline.py View File

@@ -13,14 +13,12 @@ step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_0.add_output('produce') step_0.add_output('produce')
pipeline_description.add_step(step_0) pipeline_description.add_step(step_0)



# Step 1: column_parser # Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce') step_1.add_output('produce')
pipeline_description.add_step(step_1) pipeline_description.add_step(step_1)



# Step 2: DuplicationValidation # Step 2: DuplicationValidation
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.duplication_validation')) step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.duplication_validation'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
@@ -31,12 +29,9 @@ pipeline_description.add_step(step_2)
# Final Output # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')


# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)



primitive_tests/build_TRMF_pipline.py → primitive_tests/data_processing/TimeIntervalTransform_pipeline.py View File

@@ -8,7 +8,8 @@ pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs') pipeline_description.add_input(name='inputs')


# Step 0: dataset_to_dataframe # Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce') step_0.add_output('produce')
pipeline_description.add_step(step_0) pipeline_description.add_step(step_0)
@@ -19,26 +20,18 @@ step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_1.add_output('produce') step_1.add_output('produce')
pipeline_description.add_step(step_1) pipeline_description.add_step(step_1)


# Step 2: TRMF
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.trmf'))
# Step 2: time_interval_transform
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.time_interval_transform'))
step_2.add_hyperparameter(name="time_interval", argument_type=ArgumentType.VALUE, data = 'T')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce') step_2.add_output('produce')

step_2.add_hyperparameter(name = 'lags', argument_type=ArgumentType.VALUE, data = [1,2,10,100])
# step_2.add_hyperparameter(name = 'K', argument_type=ArgumentType.VALUE, data = 3)
# step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = (2, 3, 4, 5, 6))

pipeline_description.add_step(step_2) pipeline_description.add_step(step_2)


# Final Output # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')


# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

+ 53
- 0
primitive_tests/detection_algorithm/ABOD_pipeline.py View File

@@ -0,0 +1,53 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: ABOD
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_abod'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)



primitive_tests/build_AutoEncoder.py → primitive_tests/detection_algorithm/AutoEncoder_pipeline.py View File

@@ -2,8 +2,6 @@ from d3m import index
from d3m.metadata.base import ArgumentType from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^


# Creating pipeline # Creating pipeline
pipeline_description = Pipeline() pipeline_description = Pipeline()
@@ -29,39 +27,25 @@ step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALU
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2) pipeline_description.add_step(step_2)


# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
# Step 3: auto encoder
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce') step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3) pipeline_description.add_step(step_3)


attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce') step_4.add_output('produce')
pipeline_description.add_step(step_4) pipeline_description.add_step(step_4)


# Step 5: auto encoder
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_5.add_output('produce')
pipeline_description.add_step(step_5)


# Final Output # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')


# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)



+ 54
- 0
primitive_tests/detection_algorithm/AutoRegODetect_pipeline.py View File

@@ -0,0 +1,54 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import numpy as np


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: AutoRegODetector
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.AutoRegODetector'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 57
- 0
primitive_tests/detection_algorithm/CBLOF_pipline.py View File

@@ -0,0 +1,57 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: CBLOF
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_cblof'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 54
- 0
primitive_tests/detection_algorithm/DeepLog_pipeline.py View File

@@ -0,0 +1,54 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: deeplog
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.deeplog'))
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

primitive_tests/build_HBOS_pipline.py → primitive_tests/detection_algorithm/HBOS_pipline.py View File

@@ -27,42 +27,26 @@ step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALU
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2) pipeline_description.add_step(step_2)


# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
# Step 3: HBOS
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_hbos'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_output('produce') step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3) pipeline_description.add_step(step_3)


attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce') step_4.add_output('produce')
pipeline_description.add_step(step_4) pipeline_description.add_step(step_4)


# Step 5: HBOS
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_hbos'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')

step_5.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
# step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')

step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Final Output # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')


# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)



primitive_tests/build_HBOS_score_pipline.py → primitive_tests/detection_algorithm/HBOS_score_pipeline.py View File

@@ -27,45 +27,22 @@ step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALU
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2) pipeline_description.add_step(step_2)


# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
# Step 3: HBOS
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_hbos'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True)
step_3.add_output('produce_score')
step_3.add_output('produce') step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3) pipeline_description.add_step(step_3)


attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: HBOS
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_hbos'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')

step_5.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_5.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True)
# step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')

step_5.add_output('produce_score')
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Final Output # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')
# pipeline_description.add_output(name='output score', data_reference='steps.5.produce_score')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()
# pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')
pipeline_description.add_output(name='output score', data_reference='steps.3.produce_score')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)



primitive_tests/build_IsolationForest_pipline.py → primitive_tests/detection_algorithm/IsolationForest_pipline.py View File

@@ -1,11 +1,7 @@
from d3m import index from d3m import index
from d3m.metadata.base import ArgumentType from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy


# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^


# Creating pipeline # Creating pipeline
pipeline_description = Pipeline() pipeline_description = Pipeline()
@@ -36,24 +32,23 @@ pipeline_description.add_step(step_2)
primitive_3 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_iforest') primitive_3 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_iforest')
step_3 = PrimitiveStep(primitive=primitive_3) step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
# step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
# step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True)
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce_score')
step_3.add_output('produce') step_3.add_output('produce')
pipeline_description.add_step(step_3) pipeline_description.add_step(step_3)


# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce_score')
# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)


# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)
# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')


# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)



+ 54
- 0
primitive_tests/detection_algorithm/KDiscord_pipeline.py View File

@@ -0,0 +1,54 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: KDiscordODetector
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.KDiscordODetector'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=10)
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 55
- 0
primitive_tests/detection_algorithm/KNN_pipeline.py View File

@@ -0,0 +1,55 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: KNN
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_knn'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 55
- 0
primitive_tests/detection_algorithm/LODA_pipeline.py View File

@@ -0,0 +1,55 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: LODA
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_loda'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 55
- 0
primitive_tests/detection_algorithm/LOF_pipeline.py View File

@@ -0,0 +1,55 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: LOF
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_lof'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 55
- 0
primitive_tests/detection_algorithm/LSTMOD_pipeline.py View File

@@ -0,0 +1,55 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: LSTMODetector
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.LSTMODetector'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='diff_group_method', argument_type=ArgumentType.VALUE, data='average')
step_3.add_hyperparameter(name='feature_dim', argument_type=ArgumentType.VALUE, data=6)
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


primitive_tests/build_AutoRegODetect_pipeline.py → primitive_tests/detection_algorithm/MatrixProfile_pipeline.py View File

@@ -2,10 +2,7 @@ from d3m import index
from d3m.metadata.base import ArgumentType from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams from d3m.metadata import hyperparams
import numpy as np


# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^


# Creating pipeline # Creating pipeline
pipeline_description = Pipeline() pipeline_description = Pipeline()
@@ -18,7 +15,7 @@ step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_0.add_output('produce') step_0.add_output('produce')
pipeline_description.add_step(step_0) pipeline_description.add_step(step_0)


# # Step 1: column_parser
# Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1) step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
@@ -32,40 +29,28 @@ step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2) pipeline_description.add_step(step_2)


# # Step 3: Standardization
primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_3 = PrimitiveStep(primitive=primitive_3)
# Step 3: matrix_profile
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.matrix_profile'))
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(1,2,3,4,5,))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=3) # There is sth wrong with multi-dimensional
# step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce') step_3.add_output('produce')
pipeline_description.add_step(step_3) pipeline_description.add_step(step_3)


# # Step 4: test primitive
primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.AutoRegODetector')
step_4 = PrimitiveStep(primitive=primitive_4)
step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_4.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=10)
# step_4.add_hyperparameter(name='weights', argument_type=ArgumentType.VALUE, data=weights_ndarray)
step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False)
# step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) # There is sth wrong with multi-dimensional
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_4.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True)
# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce') step_4.add_output('produce')
step_4.add_output('produce_score')
pipeline_description.add_step(step_4) pipeline_description.add_step(step_4)


# Final Output # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce') pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')


# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

+ 55
- 0
primitive_tests/detection_algorithm/OCSVM_pipline.py View File

@@ -0,0 +1,55 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: OCSVM
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ocsvm'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 53
- 0
primitive_tests/detection_algorithm/PCAODetect_pipeline.py View File

@@ -0,0 +1,53 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: PCAODetector
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.PCAODetector'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 55
- 0
primitive_tests/detection_algorithm/PyodCOF.py View File

@@ -0,0 +1,55 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# # Step 3: COF
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_cof'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 54
- 0
primitive_tests/detection_algorithm/PyodMoGaal_pipeline.py View File

@@ -0,0 +1,54 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: MoGaal
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_mogaal'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

+ 54
- 0
primitive_tests/detection_algorithm/PyodSoGaal_pipeline.py View File

@@ -0,0 +1,54 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: SoGaal
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_sogaal'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

primitive_tests/build_PCAODetect_pipeline.py → primitive_tests/detection_algorithm/SOD_pipeline.py View File

@@ -2,10 +2,7 @@ from d3m import index
from d3m.metadata.base import ArgumentType from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams from d3m.metadata import hyperparams
import numpy as np


# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^


# Creating pipeline # Creating pipeline
pipeline_description = Pipeline() pipeline_description = Pipeline()
@@ -32,40 +29,27 @@ step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2) pipeline_description.add_step(step_2)


# # Step 3: Standardization
primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_3 = PrimitiveStep(primitive=primitive_3)
# Step 3: SOD
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_sod'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(1,2,3,4,5,))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce') step_3.add_output('produce')
pipeline_description.add_step(step_3) pipeline_description.add_step(step_3)


# # Step 4: test primitive
primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.PCAODetector')
step_4 = PrimitiveStep(primitive=primitive_4)
step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_4.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=10)
# step_4.add_hyperparameter(name='weights', argument_type=ArgumentType.VALUE, data=weights_ndarray)
step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False)
# step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) # There is sth wrong with multi-dimensional
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_4.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True)
# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce') step_4.add_output('produce')
step_4.add_output('produce_score')
pipeline_description.add_step(step_4) pipeline_description.add_step(step_4)


# Final Output # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce') pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')


# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

+ 54
- 0
primitive_tests/detection_algorithm/Telemanom_pipeline.py View File

@@ -0,0 +1,54 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')


# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: Column Parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: telemanom
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.telemanom'))
# step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
# step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

primitive_tests/build_VariationalAutoEncoder.py → primitive_tests/detection_algorithm/VariationalAutoEncoder_pipeline.py View File

@@ -2,8 +2,6 @@ from d3m import index
from d3m.metadata.base import ArgumentType from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^


# Creating pipeline # Creating pipeline
pipeline_description = Pipeline() pipeline_description = Pipeline()
@@ -29,39 +27,25 @@ step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALU
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2) pipeline_description.add_step(step_2)


# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
# Step 3: variatinal auto encoder
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_vae'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce') step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3) pipeline_description.add_step(step_3)


attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce') step_4.add_output('produce')
pipeline_description.add_step(step_4) pipeline_description.add_step(step_4)


# Step 5: variatinal auto encoder
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_vae'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_5.add_output('produce')
pipeline_description.add_step(step_5)


# Final Output # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')


# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)



primitive_tests/build_BKFilter_pipline.py → primitive_tests/feature_analysis/BKFilter_pipeline.py View File

@@ -13,32 +13,34 @@ step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_0.add_output('produce') step_0.add_output('produce')
pipeline_description.add_step(step_0) pipeline_description.add_step(step_0)



# Step 1: column_parser # Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce') step_1.add_output('produce')
pipeline_description.add_step(step_1) pipeline_description.add_step(step_1)



# Step 2: BKFilter
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.bk_filter'))
# step_2.add_hyperparameter(name = 'columns_using_method', argument_type=ArgumentType.VALUE, data = 'name')
step_2.add_hyperparameter(name = 'use_semantic_types', argument_type=ArgumentType.VALUE, data = True)
step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = (2,3))
# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce') step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2) pipeline_description.add_step(step_2)


# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')
# Step 3: BKFilter
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.bk_filter'))
step_3.add_hyperparameter(name = 'use_semantic_types', argument_type=ArgumentType.VALUE, data = True)
step_3.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = (2,3))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)


# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)
# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce')


# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)



primitive_tests/build_FastFourierTransform.py → primitive_tests/feature_analysis/DiscreteCosineTransform_pipeline.py View File

@@ -2,8 +2,6 @@ from d3m import index
from d3m.metadata.base import ArgumentType from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^


# Creating pipeline # Creating pipeline
pipeline_description = Pipeline() pipeline_description = Pipeline()
@@ -24,25 +22,28 @@ step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_1.add_output('produce') step_1.add_output('produce')
pipeline_description.add_step(step_1) pipeline_description.add_step(step_1)


# Step 2: Fast Fourier Transform
primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.fast_fourier_transform')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce') step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2) pipeline_description.add_step(step_2)


# Step 3: discrete_cosine_transform
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.discrete_cosine_transform'))
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)


# Final Output # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce')


# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

primitive_tests/build_DiscreteCosineTransform.py → primitive_tests/feature_analysis/FastFourierTransform_pipeline.py View File

@@ -2,8 +2,6 @@ from d3m import index
from d3m.metadata.base import ArgumentType from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^


# Creating pipeline # Creating pipeline
pipeline_description = Pipeline() pipeline_description = Pipeline()
@@ -24,27 +22,28 @@ step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_1.add_output('produce') step_1.add_output('produce')
pipeline_description.add_step(step_1) pipeline_description.add_step(step_1)



# Step 2: Discrete Cosine Transform
primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.discrete_cosine_transform')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce') step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2) pipeline_description.add_step(step_2)


# Step 3: Fast Fourier Transform
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.fast_fourier_transform'))
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)


# Final Output # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce')


# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

primitive_tests/build_HPFilter_pipline.py → primitive_tests/feature_analysis/HPFilter_pipeline.py View File

@@ -13,34 +13,34 @@ step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_0.add_output('produce') step_0.add_output('produce')
pipeline_description.add_step(step_0) pipeline_description.add_step(step_0)



# Step 1: column_parser # Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce') step_1.add_output('produce')
pipeline_description.add_step(step_1) pipeline_description.add_step(step_1)



# Step 2: HPFilter
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.hp_filter'))
# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce') step_2.add_output('produce')

step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = [2,3,6])

step_2.add_hyperparameter(name = 'use_semantic_types', argument_type=ArgumentType.VALUE, data = True)
step_2.add_hyperparameter(name = 'return_result', argument_type=ArgumentType.VALUE, data = 'append')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2) pipeline_description.add_step(step_2)


# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')
# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)
# Step 3: HPFilter
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.hp_filter'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = (2,3))
step_3.add_hyperparameter(name = 'use_semantic_types', argument_type=ArgumentType.VALUE, data = True)
step_3.add_hyperparameter(name = 'return_result', argument_type=ArgumentType.VALUE, data = 'append')
step_3.add_output('produce')
pipeline_description.add_step(step_3)


# Or you can output json
#data = pipline_description.to_json()
# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce')


# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

primitive_tests/build_NonNegativeMatrixFactorization.py → primitive_tests/feature_analysis/NonNegativeMatrixFactorization_pipeline.py View File

@@ -2,8 +2,6 @@ from d3m import index
from d3m.metadata.base import ArgumentType from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^


# Creating pipeline # Creating pipeline
pipeline_description = Pipeline() pipeline_description = Pipeline()
@@ -24,27 +22,29 @@ step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_1.add_output('produce') step_1.add_output('produce')
pipeline_description.add_step(step_1) pipeline_description.add_step(step_1)


# Step 2: Non Negative Matrix Factorization
primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.non_negative_matrix_factorization')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_hyperparameter(name='rank', argument_type=ArgumentType.VALUE, data=5)
# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce') step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2) pipeline_description.add_step(step_2)


# Step 3: Non Negative Matrix Factorization
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.non_negative_matrix_factorization'))
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_hyperparameter(name='rank', argument_type=ArgumentType.VALUE, data=5)
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)


# Final Output # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce')


# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save