Browse Source

merge primitive_tests from Junjie branch, modified metadata for data_processing and timeseries_processing module

master
lhenry15 4 years ago
parent
commit
4598bfad1f
100 changed files with 9192 additions and 40775 deletions
  1. +1
    -1
      datasets/anomaly/transform_yahoo.py
  2. +27
    -3
      datasets/anomaly/yahoo_sub_5/SCORE/dataset_TEST/datasetDoc.json
  3. +141
    -0
      datasets/anomaly/yahoo_sub_5/SCORE/dataset_TEST/tables/learningData.csv
  4. +1261
    -0
      datasets/anomaly/yahoo_sub_5/SCORE/problem_TEST/dataSplits.csv
  5. +10
    -10
      datasets/anomaly/yahoo_sub_5/SCORE/problem_TEST/problemDoc.json
  6. +0
    -0
      datasets/anomaly/yahoo_sub_5/SCORE/targets.csv
  7. +27
    -3
      datasets/anomaly/yahoo_sub_5/TEST/dataset_TEST/datasetDoc.json
  8. +141
    -0
      datasets/anomaly/yahoo_sub_5/TEST/dataset_TEST/tables/learningData.csv
  9. +1261
    -0
      datasets/anomaly/yahoo_sub_5/TEST/problem_TEST/dataSplits.csv
  10. +10
    -10
      datasets/anomaly/yahoo_sub_5/TEST/problem_TEST/problemDoc.json
  11. +28
    -4
      datasets/anomaly/yahoo_sub_5/TRAIN/dataset_TRAIN/datasetDoc.json
  12. +1261
    -0
      datasets/anomaly/yahoo_sub_5/TRAIN/dataset_TRAIN/tables/learningData.csv
  13. +1261
    -0
      datasets/anomaly/yahoo_sub_5/TRAIN/problem_TRAIN/dataSplits.csv
  14. +10
    -10
      datasets/anomaly/yahoo_sub_5/TRAIN/problem_TRAIN/problemDoc.json
  15. +29
    -5
      datasets/anomaly/yahoo_sub_5/yahoo_sub_5_dataset/datasetDoc.json
  16. +1401
    -0
      datasets/anomaly/yahoo_sub_5/yahoo_sub_5_dataset/tables/learningData.csv
  17. +1261
    -0
      datasets/anomaly/yahoo_sub_5/yahoo_sub_5_problem/dataSplits.csv
  18. +10
    -10
      datasets/anomaly/yahoo_sub_5/yahoo_sub_5_problem/problemDoc.json
  19. +0
    -1401
      datasets/anomaly/yahoo_system_sub_5/SCORE/dataset_TEST/tables/learningData.csv
  20. +0
    -5601
      datasets/anomaly/yahoo_system_sub_5/SCORE/problem_TEST/dataSplits.csv
  21. +0
    -1401
      datasets/anomaly/yahoo_system_sub_5/TEST/dataset_TEST/tables/learningData.csv
  22. +0
    -5601
      datasets/anomaly/yahoo_system_sub_5/TEST/problem_TEST/dataSplits.csv
  23. +0
    -5601
      datasets/anomaly/yahoo_system_sub_5/TRAIN/dataset_TRAIN/tables/learningData.csv
  24. +0
    -5601
      datasets/anomaly/yahoo_system_sub_5/TRAIN/problem_TRAIN/dataSplits.csv
  25. +0
    -7001
      datasets/anomaly/yahoo_system_sub_5/yahoo_system_sub_5_dataset/tables/learningData.csv
  26. +0
    -5601
      datasets/anomaly/yahoo_system_sub_5/yahoo_system_sub_5_problem/dataSplits.csv
  27. +0
    -70
      primitive_tests/build_ABOD_pipline.py
  28. +0
    -51
      primitive_tests/build_CBLOF_pipline.py
  29. +0
    -49
      primitive_tests/build_DeepLog_pipeline.py
  30. +0
    -76
      primitive_tests/build_HoltSmoothing_pipline.py
  31. +0
    -76
      primitive_tests/build_HoltWintersExponentialSmoothing_pipline.py
  32. +0
    -71
      primitive_tests/build_KDiscord_pipeline.py
  33. +0
    -51
      primitive_tests/build_KNN_pipline.py
  34. +0
    -51
      primitive_tests/build_LODA_pipline.py
  35. +0
    -51
      primitive_tests/build_LOF_pipline.py
  36. +0
    -49
      primitive_tests/build_MatrixProfile_pipeline.py
  37. +0
    -77
      primitive_tests/build_MeanAverageTransform_pipline.py
  38. +0
    -51
      primitive_tests/build_OCSVM_pipline.py
  39. +0
    -51
      primitive_tests/build_PyodCOF.py
  40. +0
    -49
      primitive_tests/build_QuantileTransform_pipline.py
  41. +0
    -49
      primitive_tests/build_SOD_pipeline.py
  42. +0
    -76
      primitive_tests/build_SimpleExponentialSmoothing_pipline.py
  43. +0
    -49
      primitive_tests/build_Standardize_pipline.py
  44. +0
    -80
      primitive_tests/build_SubsequenceClustering_pipline.py
  45. +0
    -48
      primitive_tests/build_Telemanom.py
  46. +0
    -86
      primitive_tests/build_TimeIntervalTransform_pipeline.py
  47. +0
    -64
      primitive_tests/build_WaveletTransform_pipline.py
  48. +0
    -50
      primitive_tests/build_test_detection_algorithm_PyodMoGaal.py
  49. +0
    -50
      primitive_tests/build_test_detection_algorithm_PyodSoGaal.py
  50. +0
    -61
      primitive_tests/build_test_feature_analysis_spectral_residual_transform_pipeline.py
  51. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_abs_energy.py
  52. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_abs_sum.py
  53. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_gmean.py
  54. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_hmean.py
  55. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_kurtosis.py
  56. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_maximum.py
  57. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_mean.py
  58. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_mean_abs.py
  59. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_mean_abs_temporal_derivative.py
  60. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_mean_temporal_derivative.py
  61. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_median.py
  62. +0
    -63
      primitive_tests/build_test_feature_analysis_statistical_median_absolute_deviation.py
  63. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_minimum.py
  64. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_skew.py
  65. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_variation.py
  66. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_vec_sum.py
  67. +0
    -62
      primitive_tests/build_test_feature_analysis_statistical_willison_amplitude.py
  68. +0
    -61
      primitive_tests/build_test_time_series_seasonality_trend_decomposition.py
  69. +18
    -18
      primitive_tests/data_processing/CategoricalToBinary_pipeline.py
  70. +11
    -14
      primitive_tests/data_processing/ColumnFilter_pipeline.py
  71. +6
    -10
      primitive_tests/data_processing/ContinuityValidation_pipline.py
  72. +5
    -10
      primitive_tests/data_processing/DuplicationValidation_pipeline.py
  73. +10
    -17
      primitive_tests/data_processing/TimeIntervalTransform_pipeline.py
  74. +53
    -0
      primitive_tests/detection_algorithm/ABOD_pipeline.py
  75. +13
    -29
      primitive_tests/detection_algorithm/AutoEncoder_pipeline.py
  76. +54
    -0
      primitive_tests/detection_algorithm/AutoRegODetect_pipeline.py
  77. +57
    -0
      primitive_tests/detection_algorithm/CBLOF_pipline.py
  78. +54
    -0
      primitive_tests/detection_algorithm/DeepLog_pipeline.py
  79. +14
    -30
      primitive_tests/detection_algorithm/HBOS_pipline.py
  80. +14
    -37
      primitive_tests/detection_algorithm/HBOS_score_pipeline.py
  81. +13
    -18
      primitive_tests/detection_algorithm/IsolationForest_pipline.py
  82. +54
    -0
      primitive_tests/detection_algorithm/KDiscord_pipeline.py
  83. +55
    -0
      primitive_tests/detection_algorithm/KNN_pipeline.py
  84. +55
    -0
      primitive_tests/detection_algorithm/LODA_pipeline.py
  85. +55
    -0
      primitive_tests/detection_algorithm/LOF_pipeline.py
  86. +55
    -0
      primitive_tests/detection_algorithm/LSTMOD_pipeline.py
  87. +14
    -29
      primitive_tests/detection_algorithm/MatrixProfile_pipeline.py
  88. +55
    -0
      primitive_tests/detection_algorithm/OCSVM_pipline.py
  89. +53
    -0
      primitive_tests/detection_algorithm/PCAODetect_pipeline.py
  90. +55
    -0
      primitive_tests/detection_algorithm/PyodCOF.py
  91. +54
    -0
      primitive_tests/detection_algorithm/PyodMoGaal_pipeline.py
  92. +54
    -0
      primitive_tests/detection_algorithm/PyodSoGaal_pipeline.py
  93. +12
    -28
      primitive_tests/detection_algorithm/SOD_pipeline.py
  94. +54
    -0
      primitive_tests/detection_algorithm/Telemanom_pipeline.py
  95. +13
    -29
      primitive_tests/detection_algorithm/VariationalAutoEncoder_pipeline.py
  96. +18
    -16
      primitive_tests/feature_analysis/BKFilter_pipeline.py
  97. +18
    -17
      primitive_tests/feature_analysis/DiscreteCosineTransform_pipeline.py
  98. +18
    -19
      primitive_tests/feature_analysis/FastFourierTransform_pipeline.py
  99. +19
    -19
      primitive_tests/feature_analysis/HPFilter_pipeline.py
  100. +19
    -19
      primitive_tests/feature_analysis/NonNegativeMatrixFactorization_pipeline.py

+ 1
- 1
datasets/anomaly/transform_yahoo.py View File

@@ -10,7 +10,7 @@ import json
# Designed for time series data
name = 'yahoo_sub_5'
src_path = './raw_data/yahoo_sub_5.csv'
label_name = 'is_anomaly'
label_name = 'anomaly'
timestamp_name = 'timestamp'
value_names = ['value_{}'.format(i) for i in range(5)]
ratio = 0.9 # Ratio of training data, the rest is for testing


datasets/anomaly/yahoo_system_sub_5/SCORE/dataset_TEST/datasetDoc.json → datasets/anomaly/yahoo_sub_5/SCORE/dataset_TEST/datasetDoc.json View File

@@ -1,6 +1,6 @@
{
"about": {
"datasetID": "yahoo_system_sub_5_dataset_TEST",
"datasetID": "yahoo_sub_5_dataset_TEST",
"datasetName": "NULL",
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'",
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ",
@@ -50,7 +50,7 @@
},
{
"colIndex": 3,
"colName": "system_id",
"colName": "value_1",
"colType": "real",
"role": [
"attribute"
@@ -58,6 +58,30 @@
},
{
"colIndex": 4,
"colName": "value_2",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 5,
"colName": "value_3",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 6,
"colName": "value_4",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 7,
"colName": "ground_truth",
"colType": "integer",
"role": [
@@ -65,7 +89,7 @@
]
}
],
"columnsCount": 5
"columnsCount": 8
}
]
}

+ 141
- 0
datasets/anomaly/yahoo_sub_5/SCORE/dataset_TEST/tables/learningData.csv View File

@@ -0,0 +1,141 @@
d3mIndex,timestamp,value_0,value_1,value_2,value_3,value_4,ground_truth
1260,1261,7782,0.03428038631974298,2.5072222222222003,104,3119,0
1261,1262,7829,0.039360296791109,2.5927777777778,82,3590,0
1262,1263,7902,0.0,2.6894444444444,208,3893,0
1263,1264,8039,0.03894406599435602,2.6291666666667,92,3264,0
1264,1265,8350,0.18176011684739002,2.6469444444444,53,3963,0
1265,1266,8142,0.18521047165852,2.7461111111111003,65,2757,0
1266,1267,7886,0.13079770999921,2.9363888888889,62,2306,0
1267,1268,7743,0.13310058077443,3.2797222222222002,73,2549,0
1268,1269,7707,0.054750658073534006,3.5194444444444,84,2212,0
1269,1270,7726,0.030588852697706,3.8130555555556,90,2286,0
1270,1271,7717,0.12998124134227002,3.7941666666667,80,2979,0
1271,1272,10331,0.09100057249197198,3.6086111111111,90,3158,0
1272,1273,10515,0.19464543002904008,3.3858333333333,84,2645,0
1273,1274,10415,0.22178651521516,3.3336111111111,34,3161,0
1274,1275,10387,0.22983578430825,3.3116666666667003,67,4460,0
1275,1276,10471,0.298229429356,3.2616666666667005,74,2630,0
1276,1277,10385,0.12923377484588,3.0044444444444003,44,2593,0
1277,1278,10439,0.19609416059774,2.6741666666667,64,2625,0
1278,1279,10516,0.04051853381938501,2.3191666666667,70,4834,0
1279,1280,10587,0.07099894663641,2.0597222222222,96,4056,0
1280,1281,10586,0.07584150637714701,2.0547222222222,110,5713,0
1281,1282,10684,0.08180100127782801,2.1511111111111,68,3940,0
1282,1283,10880,0.0,2.2602777777778,90,4414,0
1283,1284,10830,0.0,2.2883333333333,90,5044,0
1284,1285,10794,0.09140162014739303,2.3736111111111002,69,3894,0
1285,1286,10843,0.0,2.5869444444444,46,3993,0
1286,1287,10805,0.0,2.6480555555556,74,4404,0
1287,1288,10996,0.0,2.6077777777777995,68,4072,0
1288,1289,11327,0.05363316840061,2.6069444444444,67,4182,0
1289,1290,11090,0.26818151064716,2.6908333333332997,51,3351,0
1290,1291,10578,0.21887772653901,2.9019444444444003,39,4183,0
1291,1292,10528,0.32371296573811,3.2711111111111,26,4068,0
1292,1293,10475,0.12565805017257,3.5872222222222,25,8139,0
1293,1294,10664,0.092277247744574,3.6913888888888997,32,11000,0
1294,1295,10513,0.077016875742983,3.6313888888888997,17,2975,0
1295,1296,9072,0.3714480797312501,3.5605555555556,19,2692,0
1296,1297,9069,0.19332372237792,3.4402777777778,16,2502,0
1297,1298,9089,0.06345811641554701,3.35,28,2510,0
1298,1299,9027,0.22671215594729996,3.3469444444444,24,2663,0
1299,1300,8969,0.053072279964629,3.2708333333332997,35,3575,0
1300,1301,9073,0.13336345197744,3.2519444444444,49,2586,0
1301,1302,8957,0.1252855094715,2.7311111111111,106,2908,0
1302,1303,9126,0.096211952864224,2.3875,80,3530,0
1303,1304,9122,0.09652446751775501,2.0847222222222,90,2776,0
1304,1305,9231,0.08924770147957402,2.0975,169,2962,0
1305,1306,9368,0.11889606284161999,2.1763888888889,98,3441,0
1306,1307,9458,0.031429841710104,2.2327777777777995,92,4376,0
1307,1308,9463,0.0,2.2725,91,3857,0
1308,1309,9356,0.036512411627867995,2.3202777777778,99,4685,0
1309,1310,9340,0.0,2.5425,90,4585,0
1310,1311,9340,0.0,2.5986111111111,126,3542,0
1311,1312,9276,0.0,2.6319444444444,102,3370,0
1312,1313,9611,0.10106696361212,2.5836111111111,132,3515,0
1313,1314,9532,0.14854949043035,2.675,88,3793,0
1314,1315,9156,0.08612162048398897,2.8522222222222,135,2954,0
1315,1316,9222,0.16494200410492002,3.1302777777778,114,2627,0
1316,1317,9282,0.28637713141253,3.4805555555556,35,2550,0
1317,1318,9573,0.13206535647488,3.5994444444444,24,2480,0
1318,1319,9333,0.27364025607799,3.5847222222222,44,2521,0
1319,1320,9987,0.38382339961227,3.4963888888889,26,2860,0
1320,1321,10133,0.08426242877623301,3.3825,37,3675,0
1321,1322,10010,0.3290413568025901,3.2694444444444,45,2704,0
1322,1323,10028,0.22632868808707998,3.2322222222222,42,3121,0
1323,1324,9984,0.17914189971361,3.1936111111111005,47,2603,0
1324,1325,10041,0.30046815361859003,3.0536111111111004,34,3984,0
1325,1326,10072,0.22650915594248,2.7819444444444,56,2537,0
1326,1327,10025,0.0,2.4152777777777996,87,3349,0
1327,1328,10116,0.1223093269317,2.1569444444443997,74,3958,0
1328,1329,10232,0.1696074188221,2.1125,90,4243,0
1329,1330,10516,0.0,2.1833333333333003,79,4159,0
1330,1331,10449,0.028193633007367002,2.205,97,5637,0
1331,1332,10598,0.0,2.1697222222222,90,8142,0
1332,1333,10337,0.0,2.3075,77,5713,0
1333,1334,10469,0.097305232437507,2.4575,101,3668,0
1334,1335,10426,0.11905908868378999,2.6077777777777995,74,4307,0
1335,1336,10531,0.11660374103282001,2.6275,439,4354,0
1336,1337,10875,0.060474297756584014,2.6144444444443997,79,4262,0
1337,1338,10494,0.22568442027805,2.6477777777777995,165,3446,0
1338,1339,10195,0.14077736537045002,2.8594444444444003,139,2677,0
1339,1340,9918,0.1924574892026,3.2675,56,4450,0
1340,1341,9889,0.18922597300629002,3.5136111111111004,102,3044,0
1341,1342,9947,0.041593949118095004,3.5725,101,3428,0
1342,1343,9977,0.2502095174271,3.6863888888889,41,2845,0
1343,1344,10835,0.18663972932643,3.5636111111111,94,2781,0
1344,1345,10765,0.07351854082400297,3.4127777777778,116,2743,0
1345,1346,10656,0.081949111399618,3.295,94,4470,0
1346,1347,10485,0.20148511394008997,3.2666666666667004,89,2596,0
1347,1348,10681,0.11515101921294,3.1933333333332996,141,3249,0
1348,1349,10852,0.07797276382811,3.0688888888888997,167,2529,0
1349,1350,10728,0.07244862879413201,2.8102777777778,148,2452,0
1350,1351,10874,0.07310929970435699,2.42,105,2934,0
1351,1352,10964,0.066868365737218,2.1358333333333,210,3159,0
1352,1353,10984,0.05788512501593701,1.9916666666667,145,3974,0
1353,1354,11055,0.09727414207464803,2.0947222222222,136,4305,0
1354,1355,11233,0.033270317741557996,2.1591666666667,126,5012,0
1355,1356,11161,0.0,2.2377777777778,157,4455,0
1356,1357,10966,0.038270957919533,2.2511111111111,105,4108,0
1357,1358,11193,0.08728058888363299,2.4208333333332996,114,4339,0
1358,1359,11167,0.10536774813238,2.5241666666667,104,5056,0
1359,1360,11367,0.1233991317089,2.5794444444443996,69,5573,0
1360,1361,51251,0.042565915766552,2.5936111111111,75,3366,1
1361,1362,17953,0.23147422367229,2.6830555555556,73,2559,1
1362,1363,170029,0.08983405162538903,2.8188888888888997,74,1999,1
1363,1364,10955,0.07464756469365201,2.9513888888888995,126,1993,0
1364,1365,10984,0.09924410491893401,3.2830555555556,67,1913,0
1365,1366,10964,0.11535172009194,3.4819444444444,32,1760,0
1366,1367,10980,0.21774881707851998,3.5886111111111005,38,1890,0
1367,1368,10852,0.1305066423559,3.4836111111111,34,2469,0
1368,1369,10786,0.10054853030204,3.3955555555556,36,2133,0
1369,1370,10841,0.02468393737575,3.2847222222222,26,3359,0
1370,1371,10762,0.10018007414459,3.2383333333332995,74,3783,0
1371,1372,10419,0.12522619841308,3.2188888888888996,85,1809,0
1372,1373,10467,0.11781887197077001,2.9483333333333,67,2143,0
1373,1374,10502,0.13417256350298,2.5855555555556,84,2567,0
1374,1375,10519,0.07474686582090599,2.3005555555556003,1630,2176,0
1375,1376,10579,0.13570963056519,2.0855555555556,1435,1929,0
1376,1377,10502,0.076431907457478,1.9027777777777999,857,2244,0
1377,1378,10661,0.0,1.9411111111111,31,1810,0
1378,1379,10818,0.1936428046839,2.0444444444444,500,2088,0
1379,1380,10918,0.05282677388968402,2.1363888888889,53,2371,0
1380,1381,10871,0.0,2.22,61,1843,0
1381,1382,10796,0.054466597481213,2.3530555555556,158,2668,0
1382,1383,10774,0.057459020289436,2.545,184,2309,0
1383,1384,10898,0.28750562005936,2.6202777777777997,91,1998,0
1384,1385,11442,0.075538554674309,2.6847222222222,60,2480,0
1385,1386,11113,0.08112608570492501,2.6591666666667004,107,2147,0
1386,1387,10888,0.21563803296368,2.7863888888888995,5157,1802,0
1387,1388,10894,0.09572500230568501,3.0269444444444003,28,1789,0
1388,1389,10888,0.17516056892320994,3.3227777777778,24,1999,0
1389,1390,10896,0.32902836018585996,3.6097222222222,21,2142,0
1390,1391,10800,0.10216065221678,3.6805555555555998,12,1904,0
1391,1392,11000,0.19741931250852,3.6075,24,1876,0
1392,1393,10985,0.10149107903671001,3.4091666666667004,17,2434,0
1393,1394,11017,0.17479255893624,3.3666666666667004,48,2472,0
1394,1395,10863,0.034385029573777,3.3158333333332997,41,1744,0
1395,1396,10875,0.21988771218053,3.1622222222222,1088,2404,0
1396,1397,10987,0.10149107903671001,3.1086111111111,68,1971,0
1397,1398,10778,0.10269981175444999,2.6552777777778,2575,1713,0
1398,1399,10957,0.11258759940039,2.2730555555556,4688,1765,0
1399,1400,10832,0.13022351806001,2.0591666666667,477,3156,0

+ 1261
- 0
datasets/anomaly/yahoo_sub_5/SCORE/problem_TEST/dataSplits.csv
File diff suppressed because it is too large
View File


datasets/anomaly/yahoo_system_sub_5/TRAIN/problem_TRAIN/problemDoc.json → datasets/anomaly/yahoo_sub_5/SCORE/problem_TEST/problemDoc.json View File

@@ -1,7 +1,7 @@
{
"about": {
"problemID": "yahoo_system_sub_5_problem",
"problemName": "yahoo_system_sub_5_problem",
"problemID": "yahoo_sub_5_problem",
"problemName": "yahoo_sub_5_problem",
"problemDescription": "Anomaly detection",
"problemVersion": "4.0.0",
"problemSchemaVersion": "4.0.0",
@@ -14,12 +14,12 @@
"inputs": {
"data": [
{
"datasetID": "yahoo_system_sub_5_dataset",
"datasetID": "yahoo_sub_5_dataset",
"targets": [
{
"targetIndex": 0,
"resID": "learningData",
"colIndex": 4,
"colIndex": 7,
"colName": "ground_truth"
}
]
@@ -35,20 +35,20 @@
"datasetViewMaps": {
"train": [
{
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_TRAIN"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TRAIN"
}
],
"test": [
{
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_TEST"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TEST"
}
],
"score": [
{
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_SCORE"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_SCORE"
}
]
}

datasets/anomaly/yahoo_system_sub_5/SCORE/targets.csv → datasets/anomaly/yahoo_sub_5/SCORE/targets.csv View File


datasets/anomaly/yahoo_system_sub_5/TEST/dataset_TEST/datasetDoc.json → datasets/anomaly/yahoo_sub_5/TEST/dataset_TEST/datasetDoc.json View File

@@ -1,6 +1,6 @@
{
"about": {
"datasetID": "yahoo_system_sub_5_dataset_TEST",
"datasetID": "yahoo_sub_5_dataset_TEST",
"datasetName": "NULL",
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'",
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ",
@@ -50,7 +50,7 @@
},
{
"colIndex": 3,
"colName": "system_id",
"colName": "value_1",
"colType": "real",
"role": [
"attribute"
@@ -58,6 +58,30 @@
},
{
"colIndex": 4,
"colName": "value_2",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 5,
"colName": "value_3",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 6,
"colName": "value_4",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 7,
"colName": "ground_truth",
"colType": "integer",
"role": [
@@ -65,7 +89,7 @@
]
}
],
"columnsCount": 5
"columnsCount": 8
}
]
}

+ 141
- 0
datasets/anomaly/yahoo_sub_5/TEST/dataset_TEST/tables/learningData.csv View File

@@ -0,0 +1,141 @@
d3mIndex,timestamp,value_0,value_1,value_2,value_3,value_4,ground_truth
1260,1261,7782,0.03428038631974298,2.5072222222222003,104,3119,0
1261,1262,7829,0.039360296791109,2.5927777777778,82,3590,0
1262,1263,7902,0.0,2.6894444444444,208,3893,0
1263,1264,8039,0.03894406599435602,2.6291666666667,92,3264,0
1264,1265,8350,0.18176011684739002,2.6469444444444,53,3963,0
1265,1266,8142,0.18521047165852,2.7461111111111003,65,2757,0
1266,1267,7886,0.13079770999921,2.9363888888889,62,2306,0
1267,1268,7743,0.13310058077443,3.2797222222222002,73,2549,0
1268,1269,7707,0.054750658073534006,3.5194444444444,84,2212,0
1269,1270,7726,0.030588852697706,3.8130555555556,90,2286,0
1270,1271,7717,0.12998124134227002,3.7941666666667,80,2979,0
1271,1272,10331,0.09100057249197198,3.6086111111111,90,3158,0
1272,1273,10515,0.19464543002904008,3.3858333333333,84,2645,0
1273,1274,10415,0.22178651521516,3.3336111111111,34,3161,0
1274,1275,10387,0.22983578430825,3.3116666666667003,67,4460,0
1275,1276,10471,0.298229429356,3.2616666666667005,74,2630,0
1276,1277,10385,0.12923377484588,3.0044444444444003,44,2593,0
1277,1278,10439,0.19609416059774,2.6741666666667,64,2625,0
1278,1279,10516,0.04051853381938501,2.3191666666667,70,4834,0
1279,1280,10587,0.07099894663641,2.0597222222222,96,4056,0
1280,1281,10586,0.07584150637714701,2.0547222222222,110,5713,0
1281,1282,10684,0.08180100127782801,2.1511111111111,68,3940,0
1282,1283,10880,0.0,2.2602777777778,90,4414,0
1283,1284,10830,0.0,2.2883333333333,90,5044,0
1284,1285,10794,0.09140162014739303,2.3736111111111002,69,3894,0
1285,1286,10843,0.0,2.5869444444444,46,3993,0
1286,1287,10805,0.0,2.6480555555556,74,4404,0
1287,1288,10996,0.0,2.6077777777777995,68,4072,0
1288,1289,11327,0.05363316840061,2.6069444444444,67,4182,0
1289,1290,11090,0.26818151064716,2.6908333333332997,51,3351,0
1290,1291,10578,0.21887772653901,2.9019444444444003,39,4183,0
1291,1292,10528,0.32371296573811,3.2711111111111,26,4068,0
1292,1293,10475,0.12565805017257,3.5872222222222,25,8139,0
1293,1294,10664,0.092277247744574,3.6913888888888997,32,11000,0
1294,1295,10513,0.077016875742983,3.6313888888888997,17,2975,0
1295,1296,9072,0.3714480797312501,3.5605555555556,19,2692,0
1296,1297,9069,0.19332372237792,3.4402777777778,16,2502,0
1297,1298,9089,0.06345811641554701,3.35,28,2510,0
1298,1299,9027,0.22671215594729996,3.3469444444444,24,2663,0
1299,1300,8969,0.053072279964629,3.2708333333332997,35,3575,0
1300,1301,9073,0.13336345197744,3.2519444444444,49,2586,0
1301,1302,8957,0.1252855094715,2.7311111111111,106,2908,0
1302,1303,9126,0.096211952864224,2.3875,80,3530,0
1303,1304,9122,0.09652446751775501,2.0847222222222,90,2776,0
1304,1305,9231,0.08924770147957402,2.0975,169,2962,0
1305,1306,9368,0.11889606284161999,2.1763888888889,98,3441,0
1306,1307,9458,0.031429841710104,2.2327777777777995,92,4376,0
1307,1308,9463,0.0,2.2725,91,3857,0
1308,1309,9356,0.036512411627867995,2.3202777777778,99,4685,0
1309,1310,9340,0.0,2.5425,90,4585,0
1310,1311,9340,0.0,2.5986111111111,126,3542,0
1311,1312,9276,0.0,2.6319444444444,102,3370,0
1312,1313,9611,0.10106696361212,2.5836111111111,132,3515,0
1313,1314,9532,0.14854949043035,2.675,88,3793,0
1314,1315,9156,0.08612162048398897,2.8522222222222,135,2954,0
1315,1316,9222,0.16494200410492002,3.1302777777778,114,2627,0
1316,1317,9282,0.28637713141253,3.4805555555556,35,2550,0
1317,1318,9573,0.13206535647488,3.5994444444444,24,2480,0
1318,1319,9333,0.27364025607799,3.5847222222222,44,2521,0
1319,1320,9987,0.38382339961227,3.4963888888889,26,2860,0
1320,1321,10133,0.08426242877623301,3.3825,37,3675,0
1321,1322,10010,0.3290413568025901,3.2694444444444,45,2704,0
1322,1323,10028,0.22632868808707998,3.2322222222222,42,3121,0
1323,1324,9984,0.17914189971361,3.1936111111111005,47,2603,0
1324,1325,10041,0.30046815361859003,3.0536111111111004,34,3984,0
1325,1326,10072,0.22650915594248,2.7819444444444,56,2537,0
1326,1327,10025,0.0,2.4152777777777996,87,3349,0
1327,1328,10116,0.1223093269317,2.1569444444443997,74,3958,0
1328,1329,10232,0.1696074188221,2.1125,90,4243,0
1329,1330,10516,0.0,2.1833333333333003,79,4159,0
1330,1331,10449,0.028193633007367002,2.205,97,5637,0
1331,1332,10598,0.0,2.1697222222222,90,8142,0
1332,1333,10337,0.0,2.3075,77,5713,0
1333,1334,10469,0.097305232437507,2.4575,101,3668,0
1334,1335,10426,0.11905908868378999,2.6077777777777995,74,4307,0
1335,1336,10531,0.11660374103282001,2.6275,439,4354,0
1336,1337,10875,0.060474297756584014,2.6144444444443997,79,4262,0
1337,1338,10494,0.22568442027805,2.6477777777777995,165,3446,0
1338,1339,10195,0.14077736537045002,2.8594444444444003,139,2677,0
1339,1340,9918,0.1924574892026,3.2675,56,4450,0
1340,1341,9889,0.18922597300629002,3.5136111111111004,102,3044,0
1341,1342,9947,0.041593949118095004,3.5725,101,3428,0
1342,1343,9977,0.2502095174271,3.6863888888889,41,2845,0
1343,1344,10835,0.18663972932643,3.5636111111111,94,2781,0
1344,1345,10765,0.07351854082400297,3.4127777777778,116,2743,0
1345,1346,10656,0.081949111399618,3.295,94,4470,0
1346,1347,10485,0.20148511394008997,3.2666666666667004,89,2596,0
1347,1348,10681,0.11515101921294,3.1933333333332996,141,3249,0
1348,1349,10852,0.07797276382811,3.0688888888888997,167,2529,0
1349,1350,10728,0.07244862879413201,2.8102777777778,148,2452,0
1350,1351,10874,0.07310929970435699,2.42,105,2934,0
1351,1352,10964,0.066868365737218,2.1358333333333,210,3159,0
1352,1353,10984,0.05788512501593701,1.9916666666667,145,3974,0
1353,1354,11055,0.09727414207464803,2.0947222222222,136,4305,0
1354,1355,11233,0.033270317741557996,2.1591666666667,126,5012,0
1355,1356,11161,0.0,2.2377777777778,157,4455,0
1356,1357,10966,0.038270957919533,2.2511111111111,105,4108,0
1357,1358,11193,0.08728058888363299,2.4208333333332996,114,4339,0
1358,1359,11167,0.10536774813238,2.5241666666667,104,5056,0
1359,1360,11367,0.1233991317089,2.5794444444443996,69,5573,0
1360,1361,51251,0.042565915766552,2.5936111111111,75,3366,1
1361,1362,17953,0.23147422367229,2.6830555555556,73,2559,1
1362,1363,170029,0.08983405162538903,2.8188888888888997,74,1999,1
1363,1364,10955,0.07464756469365201,2.9513888888888995,126,1993,0
1364,1365,10984,0.09924410491893401,3.2830555555556,67,1913,0
1365,1366,10964,0.11535172009194,3.4819444444444,32,1760,0
1366,1367,10980,0.21774881707851998,3.5886111111111005,38,1890,0
1367,1368,10852,0.1305066423559,3.4836111111111,34,2469,0
1368,1369,10786,0.10054853030204,3.3955555555556,36,2133,0
1369,1370,10841,0.02468393737575,3.2847222222222,26,3359,0
1370,1371,10762,0.10018007414459,3.2383333333332995,74,3783,0
1371,1372,10419,0.12522619841308,3.2188888888888996,85,1809,0
1372,1373,10467,0.11781887197077001,2.9483333333333,67,2143,0
1373,1374,10502,0.13417256350298,2.5855555555556,84,2567,0
1374,1375,10519,0.07474686582090599,2.3005555555556003,1630,2176,0
1375,1376,10579,0.13570963056519,2.0855555555556,1435,1929,0
1376,1377,10502,0.076431907457478,1.9027777777777999,857,2244,0
1377,1378,10661,0.0,1.9411111111111,31,1810,0
1378,1379,10818,0.1936428046839,2.0444444444444,500,2088,0
1379,1380,10918,0.05282677388968402,2.1363888888889,53,2371,0
1380,1381,10871,0.0,2.22,61,1843,0
1381,1382,10796,0.054466597481213,2.3530555555556,158,2668,0
1382,1383,10774,0.057459020289436,2.545,184,2309,0
1383,1384,10898,0.28750562005936,2.6202777777777997,91,1998,0
1384,1385,11442,0.075538554674309,2.6847222222222,60,2480,0
1385,1386,11113,0.08112608570492501,2.6591666666667004,107,2147,0
1386,1387,10888,0.21563803296368,2.7863888888888995,5157,1802,0
1387,1388,10894,0.09572500230568501,3.0269444444444003,28,1789,0
1388,1389,10888,0.17516056892320994,3.3227777777778,24,1999,0
1389,1390,10896,0.32902836018585996,3.6097222222222,21,2142,0
1390,1391,10800,0.10216065221678,3.6805555555555998,12,1904,0
1391,1392,11000,0.19741931250852,3.6075,24,1876,0
1392,1393,10985,0.10149107903671001,3.4091666666667004,17,2434,0
1393,1394,11017,0.17479255893624,3.3666666666667004,48,2472,0
1394,1395,10863,0.034385029573777,3.3158333333332997,41,1744,0
1395,1396,10875,0.21988771218053,3.1622222222222,1088,2404,0
1396,1397,10987,0.10149107903671001,3.1086111111111,68,1971,0
1397,1398,10778,0.10269981175444999,2.6552777777778,2575,1713,0
1398,1399,10957,0.11258759940039,2.2730555555556,4688,1765,0
1399,1400,10832,0.13022351806001,2.0591666666667,477,3156,0

+ 1261
- 0
datasets/anomaly/yahoo_sub_5/TEST/problem_TEST/dataSplits.csv
File diff suppressed because it is too large
View File


datasets/anomaly/yahoo_system_sub_5/yahoo_system_sub_5_problem/problemDoc.json → datasets/anomaly/yahoo_sub_5/TEST/problem_TEST/problemDoc.json View File

@@ -1,7 +1,7 @@
{
"about": {
"problemID": "yahoo_system_sub_5_problem",
"problemName": "yahoo_system_sub_5_problem",
"problemID": "yahoo_sub_5_problem",
"problemName": "yahoo_sub_5_problem",
"problemDescription": "Anomaly detection",
"problemVersion": "4.0.0",
"problemSchemaVersion": "4.0.0",
@@ -14,12 +14,12 @@
"inputs": {
"data": [
{
"datasetID": "yahoo_system_sub_5_dataset",
"datasetID": "yahoo_sub_5_dataset",
"targets": [
{
"targetIndex": 0,
"resID": "learningData",
"colIndex": 4,
"colIndex": 7,
"colName": "ground_truth"
}
]
@@ -35,20 +35,20 @@
"datasetViewMaps": {
"train": [
{
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_TRAIN"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TRAIN"
}
],
"test": [
{
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_TEST"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TEST"
}
],
"score": [
{
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_SCORE"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_SCORE"
}
]
}

datasets/anomaly/yahoo_system_sub_5/TRAIN/dataset_TRAIN/datasetDoc.json → datasets/anomaly/yahoo_sub_5/TRAIN/dataset_TRAIN/datasetDoc.json View File

@@ -1,6 +1,6 @@
{
"about": {
"datasetID": "yahoo_system_sub_5_dataset_TRAIN",
"datasetID": "yahoo_sub_5_dataset_TRAIN",
"datasetName": "NULL",
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'",
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ",
@@ -50,7 +50,7 @@
},
{
"colIndex": 3,
"colName": "system_id",
"colName": "value_1",
"colType": "real",
"role": [
"attribute"
@@ -58,6 +58,30 @@
},
{
"colIndex": 4,
"colName": "value_2",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 5,
"colName": "value_3",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 6,
"colName": "value_4",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 7,
"colName": "ground_truth",
"colType": "integer",
"role": [
@@ -65,7 +89,7 @@
]
}
],
"columnsCount": 5
"columnsCount": 8
}
]
}
}

+ 1261
- 0
datasets/anomaly/yahoo_sub_5/TRAIN/dataset_TRAIN/tables/learningData.csv
File diff suppressed because it is too large
View File


+ 1261
- 0
datasets/anomaly/yahoo_sub_5/TRAIN/problem_TRAIN/dataSplits.csv
File diff suppressed because it is too large
View File


datasets/anomaly/yahoo_system_sub_5/TEST/problem_TEST/problemDoc.json → datasets/anomaly/yahoo_sub_5/TRAIN/problem_TRAIN/problemDoc.json View File

@@ -1,7 +1,7 @@
{
"about": {
"problemID": "yahoo_system_sub_5_problem",
"problemName": "yahoo_system_sub_5_problem",
"problemID": "yahoo_sub_5_problem",
"problemName": "yahoo_sub_5_problem",
"problemDescription": "Anomaly detection",
"problemVersion": "4.0.0",
"problemSchemaVersion": "4.0.0",
@@ -14,12 +14,12 @@
"inputs": {
"data": [
{
"datasetID": "yahoo_system_sub_5_dataset",
"datasetID": "yahoo_sub_5_dataset",
"targets": [
{
"targetIndex": 0,
"resID": "learningData",
"colIndex": 4,
"colIndex": 7,
"colName": "ground_truth"
}
]
@@ -35,20 +35,20 @@
"datasetViewMaps": {
"train": [
{
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_TRAIN"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TRAIN"
}
],
"test": [
{
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_TEST"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TEST"
}
],
"score": [
{
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_SCORE"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_SCORE"
}
]
}

datasets/anomaly/yahoo_system_sub_5/yahoo_system_sub_5_dataset/datasetDoc.json → datasets/anomaly/yahoo_sub_5/yahoo_sub_5_dataset/datasetDoc.json View File

@@ -1,7 +1,7 @@
{
"about": {
"datasetID": "yahoo_system_sub_5_dataset",
"datasetName": "yahoo_system_sub_5",
"datasetID": "yahoo_sub_5_dataset",
"datasetName": "yahoo_sub_5",
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'",
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ",
"license": " CC Public Domain Mark 1.0 ",
@@ -50,7 +50,7 @@
},
{
"colIndex": 3,
"colName": "system_id",
"colName": "value_1",
"colType": "real",
"role": [
"attribute"
@@ -58,6 +58,30 @@
},
{
"colIndex": 4,
"colName": "value_2",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 5,
"colName": "value_3",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 6,
"colName": "value_4",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 7,
"colName": "ground_truth",
"colType": "integer",
"role": [
@@ -65,7 +89,7 @@
]
}
],
"columnsCount": 5
"columnsCount": 8
}
]
}
}

+ 1401
- 0
datasets/anomaly/yahoo_sub_5/yahoo_sub_5_dataset/tables/learningData.csv
File diff suppressed because it is too large
View File


+ 1261
- 0
datasets/anomaly/yahoo_sub_5/yahoo_sub_5_problem/dataSplits.csv
File diff suppressed because it is too large
View File


datasets/anomaly/yahoo_system_sub_5/SCORE/problem_TEST/problemDoc.json → datasets/anomaly/yahoo_sub_5/yahoo_sub_5_problem/problemDoc.json View File

@@ -1,7 +1,7 @@
{
"about": {
"problemID": "yahoo_system_sub_5_problem",
"problemName": "yahoo_system_sub_5_problem",
"problemID": "yahoo_sub_5_problem",
"problemName": "yahoo_sub_5_problem",
"problemDescription": "Anomaly detection",
"problemVersion": "4.0.0",
"problemSchemaVersion": "4.0.0",
@@ -14,12 +14,12 @@
"inputs": {
"data": [
{
"datasetID": "yahoo_system_sub_5_dataset",
"datasetID": "yahoo_sub_5_dataset",
"targets": [
{
"targetIndex": 0,
"resID": "learningData",
"colIndex": 4,
"colIndex": 7,
"colName": "ground_truth"
}
]
@@ -35,20 +35,20 @@
"datasetViewMaps": {
"train": [
{
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_TRAIN"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TRAIN"
}
],
"test": [
{
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_TEST"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TEST"
}
],
"score": [
{
"from": "yahoo_system_sub_5_dataset",
"to": "yahoo_system_sub_5_dataset_SCORE"
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_SCORE"
}
]
}

+ 0
- 1401
datasets/anomaly/yahoo_system_sub_5/SCORE/dataset_TEST/tables/learningData.csv
File diff suppressed because it is too large
View File


+ 0
- 5601
datasets/anomaly/yahoo_system_sub_5/SCORE/problem_TEST/dataSplits.csv
File diff suppressed because it is too large
View File


+ 0
- 1401
datasets/anomaly/yahoo_system_sub_5/TEST/dataset_TEST/tables/learningData.csv
File diff suppressed because it is too large
View File


+ 0
- 5601
datasets/anomaly/yahoo_system_sub_5/TEST/problem_TEST/dataSplits.csv
File diff suppressed because it is too large
View File


+ 0
- 5601
datasets/anomaly/yahoo_system_sub_5/TRAIN/dataset_TRAIN/tables/learningData.csv
File diff suppressed because it is too large
View File


+ 0
- 5601
datasets/anomaly/yahoo_system_sub_5/TRAIN/problem_TRAIN/dataSplits.csv
File diff suppressed because it is too large
View File


+ 0
- 7001
datasets/anomaly/yahoo_system_sub_5/yahoo_system_sub_5_dataset/tables/learningData.csv
File diff suppressed because it is too large
View File


+ 0
- 5601
datasets/anomaly/yahoo_system_sub_5/yahoo_system_sub_5_problem/dataSplits.csv
File diff suppressed because it is too large
View File


+ 0
- 70
primitive_tests/build_ABOD_pipline.py View File

@@ -1,70 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: ABOD
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_abod'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')

step_5.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_5.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_5.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2, 4,))
step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='replace')

step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 51
primitive_tests/build_CBLOF_pipline.py View File

@@ -1,51 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_cblof')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 49
primitive_tests/build_DeepLog_pipeline.py View File

@@ -1,49 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.deeplog')

step_2 = PrimitiveStep(primitive=primitive_2)
#step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# # Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 0
- 76
primitive_tests/build_HoltSmoothing_pipline.py View File

@@ -1,76 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: holt smoothing
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.holt_smoothing'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_5.add_hyperparameter(name="exclude_columns", argument_type=ArgumentType.VALUE, data = (2, 3))
step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True)
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Step 6: isolation forest
#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm'))
#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets)
#step_6.add_output('produce')
#pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 76
primitive_tests/build_HoltWintersExponentialSmoothing_pipline.py View File

@@ -1,76 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: holt winters exponential smoothing
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.holt_winters_exponential_smoothing'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_5.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (2, 3))
step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True)
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Step 6: isolation forest
#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm'))
#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets)
#step_6.add_output('produce')
#pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 71
primitive_tests/build_KDiscord_pipeline.py View File

@@ -1,71 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import numpy as np

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# # Step 3: Standardization
primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(1,2,3,4,5,))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# # Step 4: test primitive
primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.KDiscordODetector')
step_4 = PrimitiveStep(primitive=primitive_4)
step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_4.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=10)
# step_4.add_hyperparameter(name='weights', argument_type=ArgumentType.VALUE, data=weights_ndarray)
step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False)
# step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) # There is sth wrong with multi-dimensional
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_4.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True)
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_output('produce')
step_4.add_output('produce_score')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 51
primitive_tests/build_KNN_pipline.py View File

@@ -1,51 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_knn')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 51
primitive_tests/build_LODA_pipline.py View File

@@ -1,51 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_loda')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 51
primitive_tests/build_LOF_pipline.py View File

@@ -1,51 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_lof')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 49
primitive_tests/build_MatrixProfile_pipeline.py View File

@@ -1,49 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.matrix_profile')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=3) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# # Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 0
- 77
primitive_tests/build_MeanAverageTransform_pipline.py View File

@@ -1,77 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)


# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: mean average transform
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.moving_average_transform'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_5.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (2, 3))
step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True)
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Step 6: isolation forest
#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm'))
#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets)
#step_6.add_output('produce')
#pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 51
primitive_tests/build_OCSVM_pipline.py View File

@@ -1,51 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ocsvm')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 51
primitive_tests/build_PyodCOF.py View File

@@ -1,51 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_cof')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 49
primitive_tests/build_QuantileTransform_pipline.py View File

@@ -1,49 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.quantile_transformer')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 49
primitive_tests/build_SOD_pipeline.py View File

@@ -1,49 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_sod')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# # Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 0
- 76
primitive_tests/build_SimpleExponentialSmoothing_pipline.py View File

@@ -1,76 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: simple exponential smoothing
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.simple_exponential_smoothing'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_5.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (1,))
step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True)
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Step 6: isolation forest
#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm'))
#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets)
#step_6.add_output('produce')
#pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 49
primitive_tests/build_Standardize_pipline.py View File

@@ -1,49 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 80
primitive_tests/build_SubsequenceClustering_pipline.py View File

@@ -1,80 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)


# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)


# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: test primitive
primitive_4 = index.get_primitive('d3m.primitives.tods.timeseries_processing.subsequence_clustering')
step_4 = PrimitiveStep(primitive=primitive_4)

step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: test primitive
primitive_5 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_loda')
step_5 = PrimitiveStep(primitive=primitive_5)
step_5.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Step 6: Predictions
step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common'))
step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_6.add_output('produce')
pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce')

# Output to json
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 0
- 48
primitive_tests/build_Telemanom.py View File

@@ -1,48 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')


# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: Column Parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: Fast Fourier Transform
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.telemanom')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)


# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 0
- 86
primitive_tests/build_TimeIntervalTransform_pipeline.py View File

@@ -1,86 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: dataframe transformation
# primitive_1 = index.get_primitive('d3m.primitives.data_transformation.SKPowerTransformer')
# primitive_1 = index.get_primitive('d3m.primitives.data_transformation.SKStandardization')
# primitive_1 = index.get_primitive('d3m.primitives.data_transformation.SKQuantileTransformer')

#Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

primitive_2 = index.get_primitive('d3m.primitives.tods.data_processing.time_interval_transform')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name="time_interval", argument_type=ArgumentType.VALUE, data = '5T')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)
#
# # Step 2: column_parser
# step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
# step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
# step_2.add_output('produce')
# pipeline_description.add_step(step_2)
#
#
# # Step 3: extract_columns_by_semantic_types(attributes)
# step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
# step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
# step_3.add_output('produce')
# step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
# data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
# pipeline_description.add_step(step_3)
#
# # Step 4: extract_columns_by_semantic_types(targets)
# step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
# step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
# step_4.add_output('produce')
# step_4.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
# data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
# pipeline_description.add_step(step_4)
#
# attributes = 'steps.3.produce'
# targets = 'steps.4.produce'
#
# # Step 5: imputer
# step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_cleaning.imputer.SKlearn'))
# step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
# step_5.add_output('produce')
# pipeline_description.add_step(step_5)
#
# # Step 6: random_forest
# step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.regression.random_forest.SKlearn'))
# step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
# step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets)
# step_6.add_output('produce')
# pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.1.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 0
- 64
primitive_tests/build_WaveletTransform_pipline.py View File

@@ -1,64 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test WaveletTransform
primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.wavelet_transform')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='wavelet', argument_type=ArgumentType.VALUE, data='db8')
step_2.add_hyperparameter(name='level', argument_type=ArgumentType.VALUE, data=2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 2: test inverse WaveletTransform
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.wavelet_transform')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='wavelet', argument_type=ArgumentType.VALUE, data='db8')
step_3.add_hyperparameter(name='level', argument_type=ArgumentType.VALUE, data=2)
step_3.add_hyperparameter(name='inverse', argument_type=ArgumentType.VALUE, data=1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False)
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)


# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()


+ 0
- 50
primitive_tests/build_test_detection_algorithm_PyodMoGaal.py View File

@@ -1,50 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_mogaal')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 0
- 50
primitive_tests/build_test_detection_algorithm_PyodSoGaal.py View File

@@ -1,50 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: test primitive
primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_sogaal')

step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

+ 0
- 61
primitive_tests/build_test_feature_analysis_spectral_residual_transform_pipeline.py View File

@@ -1,61 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.spectral_residual_transform')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='avg_filter_dimension', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_abs_energy.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_abs_energy')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_abs_sum.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_abs_sum')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_gmean.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_g_mean')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_hmean.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_h_mean')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_kurtosis.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_kurtosis')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_maximum.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_maximum')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_mean.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_mean_abs.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean_abs')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_mean_abs_temporal_derivative.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean_abs_temporal_derivative')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_mean_temporal_derivative.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean_temporal_derivative')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_median.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_median')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 63
primitive_tests/build_test_feature_analysis_statistical_median_absolute_deviation.py View File

@@ -1,63 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)


# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_median_abs_deviation')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_minimum.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_minimum')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_skew.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_skew')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_variation.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_variation')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_vec_sum.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_vec_sum')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 62
primitive_tests/build_test_feature_analysis_statistical_willison_amplitude.py View File

@@ -1,62 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_willison_amplitude')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)


# Or you can output json
#data = pipline_description.to_json()


+ 0
- 61
primitive_tests/build_test_time_series_seasonality_trend_decomposition.py View File

@@ -1,61 +0,0 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

# # Step 3: test primitive
# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive')
primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.decomposition.time_series_seasonality_trend_decomposition')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='period', argument_type=ArgumentType.VALUE, data=5)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



# Final Output
pipeline_description.add_output(name='output', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)

# Or you can output json
#data = pipline_description.to_json()


primitive_tests/build_CategoricalToBinary.py → primitive_tests/data_processing/CategoricalToBinary_pipeline.py View File

@@ -2,14 +2,11 @@ from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')


# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
@@ -24,25 +21,28 @@ step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: Categorical to Binary
primitive_2 = index.get_primitive('d3m.primitives.tods.data_processing.categorical_to_binary')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(3,))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: Categorical to Binary
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.categorical_to_binary'))
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(3,))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce')

# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

primitive_tests/build_ColumnFilter_pipeline.py → primitive_tests/data_processing/ColumnFilter_pipeline.py View File

@@ -22,16 +22,16 @@ step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_1.add_output('produce')
pipeline_description.add_step(step_1)

primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.auto_correlation')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True)
step_2.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (2, 3))
# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

primitive_3 = index.get_primitive('d3m.primitives.tods.data_processing.column_filter')
step_3 = PrimitiveStep(primitive=primitive_3)
# Step 3: column_filter
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_filter'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)
@@ -39,11 +39,8 @@ pipeline_description.add_step(step_3)
# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

primitive_tests/build_ContinuityValidation_pipline.py → primitive_tests/data_processing/ContinuityValidation_pipline.py View File

@@ -18,8 +18,7 @@ step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_1.add_output('produce')
pipeline_description.add_step(step_1)


# Step 2: ContinuityValidation
# Step 3: ContinuityValidation
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.continuity_validation'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
@@ -32,12 +31,9 @@ pipeline_description.add_step(step_2)
# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


primitive_tests/build_DuplicationValidation_pipline.py → primitive_tests/data_processing/DuplicationValidation_pipeline.py View File

@@ -13,14 +13,12 @@ step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_0.add_output('produce')
pipeline_description.add_step(step_0)


# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)


# Step 2: DuplicationValidation
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.duplication_validation'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
@@ -31,12 +29,9 @@ pipeline_description.add_step(step_2)
# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


primitive_tests/build_TRMF_pipline.py → primitive_tests/data_processing/TimeIntervalTransform_pipeline.py View File

@@ -8,7 +8,8 @@ pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)
@@ -19,26 +20,18 @@ step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: TRMF
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.trmf'))
# Step 2: time_interval_transform
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.time_interval_transform'))
step_2.add_hyperparameter(name="time_interval", argument_type=ArgumentType.VALUE, data = 'T')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')

step_2.add_hyperparameter(name = 'lags', argument_type=ArgumentType.VALUE, data = [1,2,10,100])
# step_2.add_hyperparameter(name = 'K', argument_type=ArgumentType.VALUE, data = 3)
# step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = (2, 3, 4, 5, 6))

pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

+ 53
- 0
primitive_tests/detection_algorithm/ABOD_pipeline.py View File

@@ -0,0 +1,53 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: ABOD
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_abod'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)



primitive_tests/build_AutoEncoder.py → primitive_tests/detection_algorithm/AutoEncoder_pipeline.py View File

@@ -2,8 +2,6 @@ from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
@@ -29,39 +27,25 @@ step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALU
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
# Step 3: auto encoder
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: auto encoder
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_5.add_output('produce')
pipeline_description.add_step(step_5)


# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 54
- 0
primitive_tests/detection_algorithm/AutoRegODetect_pipeline.py View File

@@ -0,0 +1,54 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import numpy as np


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: AutoRegODetector
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.AutoRegODetector'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 57
- 0
primitive_tests/detection_algorithm/CBLOF_pipline.py View File

@@ -0,0 +1,57 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: CBLOF
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_cblof'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 54
- 0
primitive_tests/detection_algorithm/DeepLog_pipeline.py View File

@@ -0,0 +1,54 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: deeplog
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.deeplog'))
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

primitive_tests/build_HBOS_pipline.py → primitive_tests/detection_algorithm/HBOS_pipline.py View File

@@ -27,42 +27,26 @@ step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALU
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
# Step 3: HBOS
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_hbos'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: HBOS
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_hbos'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')

step_5.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
# step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')

step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


primitive_tests/build_HBOS_score_pipline.py → primitive_tests/detection_algorithm/HBOS_score_pipeline.py View File

@@ -27,45 +27,22 @@ step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALU
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
# Step 3: HBOS
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_hbos'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True)
step_3.add_output('produce_score')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: HBOS
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_hbos'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')

step_5.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_5.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True)
# step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')

step_5.add_output('produce_score')
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')
# pipeline_description.add_output(name='output score', data_reference='steps.5.produce_score')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()
# pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')
pipeline_description.add_output(name='output score', data_reference='steps.3.produce_score')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


primitive_tests/build_IsolationForest_pipline.py → primitive_tests/detection_algorithm/IsolationForest_pipline.py View File

@@ -1,11 +1,7 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
@@ -36,24 +32,23 @@ pipeline_description.add_step(step_2)
primitive_3 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_iforest')
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
# step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
# step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True)
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce_score')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce_score')
# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)
# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 54
- 0
primitive_tests/detection_algorithm/KDiscord_pipeline.py View File

@@ -0,0 +1,54 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: KDiscordODetector
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.KDiscordODetector'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=10)
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 55
- 0
primitive_tests/detection_algorithm/KNN_pipeline.py View File

@@ -0,0 +1,55 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: KNN
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_knn'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 55
- 0
primitive_tests/detection_algorithm/LODA_pipeline.py View File

@@ -0,0 +1,55 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: LODA
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_loda'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 55
- 0
primitive_tests/detection_algorithm/LOF_pipeline.py View File

@@ -0,0 +1,55 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: LOF
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_lof'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 55
- 0
primitive_tests/detection_algorithm/LSTMOD_pipeline.py View File

@@ -0,0 +1,55 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: LSTMODetector
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.LSTMODetector'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='diff_group_method', argument_type=ArgumentType.VALUE, data='average')
step_3.add_hyperparameter(name='feature_dim', argument_type=ArgumentType.VALUE, data=6)
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


primitive_tests/build_AutoRegODetect_pipeline.py → primitive_tests/detection_algorithm/MatrixProfile_pipeline.py View File

@@ -2,10 +2,7 @@ from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import numpy as np

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
@@ -18,7 +15,7 @@ step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
# Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
@@ -32,40 +29,28 @@ step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# # Step 3: Standardization
primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_3 = PrimitiveStep(primitive=primitive_3)
# Step 3: matrix_profile
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.matrix_profile'))
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(1,2,3,4,5,))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,)) # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=3) # There is sth wrong with multi-dimensional
# step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# # Step 4: test primitive
primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.AutoRegODetector')
step_4 = PrimitiveStep(primitive=primitive_4)
step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_4.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=10)
# step_4.add_hyperparameter(name='weights', argument_type=ArgumentType.VALUE, data=weights_ndarray)
step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False)
# step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) # There is sth wrong with multi-dimensional
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_4.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True)
# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
step_4.add_output('produce_score')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

+ 55
- 0
primitive_tests/detection_algorithm/OCSVM_pipline.py View File

@@ -0,0 +1,55 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: OCSVM
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ocsvm'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 53
- 0
primitive_tests/detection_algorithm/PCAODetect_pipeline.py View File

@@ -0,0 +1,53 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: PCAODetector
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.PCAODetector'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 55
- 0
primitive_tests/detection_algorithm/PyodCOF.py View File

@@ -0,0 +1,55 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# # Step 3: COF
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_cof'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


+ 54
- 0
primitive_tests/detection_algorithm/PyodMoGaal_pipeline.py View File

@@ -0,0 +1,54 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: MoGaal
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_mogaal'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

+ 54
- 0
primitive_tests/detection_algorithm/PyodSoGaal_pipeline.py View File

@@ -0,0 +1,54 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: SoGaal
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_sogaal'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

primitive_tests/build_PCAODetect_pipeline.py → primitive_tests/detection_algorithm/SOD_pipeline.py View File

@@ -2,10 +2,7 @@ from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams
import numpy as np

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
@@ -32,40 +29,27 @@ step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# # Step 3: Standardization
primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler')
step_3 = PrimitiveStep(primitive=primitive_3)
# Step 3: SOD
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_sod'))
step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(1,2,3,4,5,))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new')
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# # Step 4: test primitive
primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.PCAODetector')
step_4 = PrimitiveStep(primitive=primitive_4)
step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1)
step_4.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=10)
# step_4.add_hyperparameter(name='weights', argument_type=ArgumentType.VALUE, data=weights_ndarray)
step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False)
# step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) # There is sth wrong with multi-dimensional
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_4.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True)
# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
step_4.add_output('produce_score')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

+ 54
- 0
primitive_tests/detection_algorithm/Telemanom_pipeline.py View File

@@ -0,0 +1,54 @@
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep


# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')


# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: Column Parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: telemanom
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.telemanom'))
# step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
# step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

primitive_tests/build_VariationalAutoEncoder.py → primitive_tests/detection_algorithm/VariationalAutoEncoder_pipeline.py View File

@@ -2,8 +2,6 @@ from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
@@ -29,39 +27,25 @@ step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALU
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
# Step 3: variatinal auto encoder
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_vae'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

# Step 4: imputer
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
# Step 4: Predictions
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: variatinal auto encoder
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_vae'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
step_5.add_output('produce')
pipeline_description.add_step(step_5)


# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)
pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')

# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


primitive_tests/build_BKFilter_pipline.py → primitive_tests/feature_analysis/BKFilter_pipeline.py View File

@@ -13,32 +13,34 @@ step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_0.add_output('produce')
pipeline_description.add_step(step_0)


# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)


# Step 2: BKFilter
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.bk_filter'))
# step_2.add_hyperparameter(name = 'columns_using_method', argument_type=ArgumentType.VALUE, data = 'name')
step_2.add_hyperparameter(name = 'use_semantic_types', argument_type=ArgumentType.VALUE, data = True)
step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = (2,3))
# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')
# Step 3: BKFilter
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.bk_filter'))
step_3.add_hyperparameter(name = 'use_semantic_types', argument_type=ArgumentType.VALUE, data = True)
step_3.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = (2,3))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)
# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce')

# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)


primitive_tests/build_FastFourierTransform.py → primitive_tests/feature_analysis/DiscreteCosineTransform_pipeline.py View File

@@ -2,8 +2,6 @@ from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
@@ -24,25 +22,28 @@ step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: Fast Fourier Transform
primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.fast_fourier_transform')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: discrete_cosine_transform
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.discrete_cosine_transform'))
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()
# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

primitive_tests/build_DiscreteCosineTransform.py → primitive_tests/feature_analysis/FastFourierTransform_pipeline.py View File

@@ -2,8 +2,6 @@ from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
@@ -24,27 +22,28 @@ step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_1.add_output('produce')
pipeline_description.add_step(step_1)


# Step 2: Discrete Cosine Transform
primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.discrete_cosine_transform')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: Fast Fourier Transform
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.fast_fourier_transform'))
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

primitive_tests/build_HPFilter_pipline.py → primitive_tests/feature_analysis/HPFilter_pipeline.py View File

@@ -13,34 +13,34 @@ step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_0.add_output('produce')
pipeline_description.add_step(step_0)


# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)


# Step 2: HPFilter
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.hp_filter'))
# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')

step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = [2,3,6])

step_2.add_hyperparameter(name = 'use_semantic_types', argument_type=ArgumentType.VALUE, data = True)
step_2.add_hyperparameter(name = 'return_result', argument_type=ArgumentType.VALUE, data = 'append')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')
# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)
# Step 3: HPFilter
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.hp_filter'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = (2,3))
step_3.add_hyperparameter(name = 'use_semantic_types', argument_type=ArgumentType.VALUE, data = True)
step_3.add_hyperparameter(name = 'return_result', argument_type=ArgumentType.VALUE, data = 'append')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Or you can output json
#data = pipline_description.to_json()
# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

primitive_tests/build_NonNegativeMatrixFactorization.py → primitive_tests/feature_analysis/NonNegativeMatrixFactorization_pipeline.py View File

@@ -2,8 +2,6 @@ from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
# extract_columns_by_semantic_types(targets) -> ^

# Creating pipeline
pipeline_description = Pipeline()
@@ -24,27 +22,29 @@ step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_re
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: Non Negative Matrix Factorization
primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.non_negative_matrix_factorization')
step_2 = PrimitiveStep(primitive=primitive_2)
step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_2.add_hyperparameter(name='rank', argument_type=ArgumentType.VALUE, data=5)
# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: Non Negative Matrix Factorization
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.non_negative_matrix_factorization'))
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_hyperparameter(name='rank', argument_type=ArgumentType.VALUE, data=5)
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
f.write(yaml)
print(yaml)

# Or you can output json
#data = pipline_description.to_json()
pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce')

# Output to JSON
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
f.write(data)
print(data)

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save