Browse Source

recover KPI example dataset

master
lhenry15 4 years ago
parent
commit
04155fc0b8
100 changed files with 31626 additions and 197872 deletions
  1. +0
    -68
      datasets/NAB/README.md
  2. +0
    -4033
      datasets/NAB/artificialNoAnomaly/labeled_art_daily_no_noise.csv
  3. +0
    -4033
      datasets/NAB/artificialNoAnomaly/labeled_art_daily_perfect_square_wave.csv
  4. +0
    -4033
      datasets/NAB/artificialNoAnomaly/labeled_art_daily_small_noise.csv
  5. +0
    -4033
      datasets/NAB/artificialNoAnomaly/labeled_art_flatline.csv
  6. +0
    -4033
      datasets/NAB/artificialNoAnomaly/labeled_art_noisy.csv
  7. +0
    -4033
      datasets/NAB/artificialWithAnomaly/labeled_art_daily_flatmiddle.csv
  8. +0
    -4033
      datasets/NAB/artificialWithAnomaly/labeled_art_daily_jumpsdown.csv
  9. +0
    -4033
      datasets/NAB/artificialWithAnomaly/labeled_art_daily_jumpsup.csv
  10. +0
    -4033
      datasets/NAB/artificialWithAnomaly/labeled_art_daily_nojump.csv
  11. +0
    -4033
      datasets/NAB/artificialWithAnomaly/labeled_art_increase_spike_density.csv
  12. +0
    -4033
      datasets/NAB/artificialWithAnomaly/labeled_art_load_balancer_spikes.csv
  13. +0
    -232
      datasets/NAB/combined_labels.json
  14. +0
    -4033
      datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_24ae8d.csv
  15. +0
    -4033
      datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_53ea38.csv
  16. +0
    -4033
      datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_5f5533.csv
  17. +0
    -4033
      datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_77c1ca.csv
  18. +0
    -4033
      datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_825cc2.csv
  19. +0
    -4033
      datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_ac20cd.csv
  20. +0
    -4033
      datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_c6585a.csv
  21. +0
    -4033
      datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_fe7f93.csv
  22. +0
    -4731
      datasets/NAB/realAWSCloudwatch/labeled_ec2_disk_write_bytes_1ef3de.csv
  23. +0
    -4033
      datasets/NAB/realAWSCloudwatch/labeled_ec2_disk_write_bytes_c0d644.csv
  24. +0
    -4033
      datasets/NAB/realAWSCloudwatch/labeled_ec2_network_in_257a54.csv
  25. +0
    -4731
      datasets/NAB/realAWSCloudwatch/labeled_ec2_network_in_5abac7.csv
  26. +0
    -4033
      datasets/NAB/realAWSCloudwatch/labeled_elb_request_count_8c0756.csv
  27. +0
    -4622
      datasets/NAB/realAWSCloudwatch/labeled_grok_asg_anomaly.csv
  28. +0
    -1244
      datasets/NAB/realAWSCloudwatch/labeled_iio_us-east-1_i-a2eb1cd9_NetworkIn.csv
  29. +0
    -4033
      datasets/NAB/realAWSCloudwatch/labeled_rds_cpu_utilization_cc0c53.csv
  30. +0
    -4033
      datasets/NAB/realAWSCloudwatch/labeled_rds_cpu_utilization_e47b3b.csv
  31. +0
    -1625
      datasets/NAB/realAdExchange/labeled_exchange-2_cpc_results.csv
  32. +0
    -1625
      datasets/NAB/realAdExchange/labeled_exchange-2_cpm_results.csv
  33. +0
    -1539
      datasets/NAB/realAdExchange/labeled_exchange-3_cpc_results.csv
  34. +0
    -1539
      datasets/NAB/realAdExchange/labeled_exchange-3_cpm_results.csv
  35. +0
    -1644
      datasets/NAB/realAdExchange/labeled_exchange-4_cpc_results.csv
  36. +0
    -1644
      datasets/NAB/realAdExchange/labeled_exchange-4_cpm_results.csv
  37. +0
    -7268
      datasets/NAB/realKnownCause/labeled_ambient_temperature_system_failure.csv
  38. +0
    -1
      datasets/NAB/realKnownCause/labeled_cpu_utilization_asg_misconfiguration.csv.REMOVED.git-id
  39. +0
    -4033
      datasets/NAB/realKnownCause/labeled_ec2_request_latency_system_failure.csv
  40. +0
    -1
      datasets/NAB/realKnownCause/labeled_machine_temperature_system_failure.csv.REMOVED.git-id
  41. +0
    -10321
      datasets/NAB/realKnownCause/labeled_nyc_taxi.csv
  42. +0
    -1883
      datasets/NAB/realKnownCause/labeled_rogue_agent_key_hold.csv
  43. +0
    -5316
      datasets/NAB/realKnownCause/labeled_rogue_agent_key_updown.csv
  44. +0
    -2501
      datasets/NAB/realTraffic/labeled_TravelTime_387.csv
  45. +0
    -2163
      datasets/NAB/realTraffic/labeled_TravelTime_451.csv
  46. +0
    -2381
      datasets/NAB/realTraffic/labeled_occupancy_6005.csv
  47. +0
    -2501
      datasets/NAB/realTraffic/labeled_occupancy_t4013.csv
  48. +0
    -2501
      datasets/NAB/realTraffic/labeled_speed_6005.csv
  49. +0
    -1128
      datasets/NAB/realTraffic/labeled_speed_7578.csv
  50. +0
    -2496
      datasets/NAB/realTraffic/labeled_speed_t4013.csv
  51. +0
    -1
      datasets/NAB/realTweets/labeled_Twitter_volume_AAPL.csv.REMOVED.git-id
  52. +0
    -1
      datasets/NAB/realTweets/labeled_Twitter_volume_AMZN.csv.REMOVED.git-id
  53. +0
    -1
      datasets/NAB/realTweets/labeled_Twitter_volume_CRM.csv.REMOVED.git-id
  54. +0
    -1
      datasets/NAB/realTweets/labeled_Twitter_volume_CVS.csv.REMOVED.git-id
  55. +0
    -1
      datasets/NAB/realTweets/labeled_Twitter_volume_FB.csv.REMOVED.git-id
  56. +0
    -1
      datasets/NAB/realTweets/labeled_Twitter_volume_GOOG.csv.REMOVED.git-id
  57. +0
    -1
      datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv.REMOVED.git-id
  58. +0
    -1
      datasets/NAB/realTweets/labeled_Twitter_volume_KO.csv.REMOVED.git-id
  59. +0
    -1
      datasets/NAB/realTweets/labeled_Twitter_volume_PFE.csv.REMOVED.git-id
  60. +0
    -1
      datasets/NAB/realTweets/labeled_Twitter_volume_UPS.csv.REMOVED.git-id
  61. +7028
    -0
      datasets/anomaly/kpi/TRAIN/dataset_TRAIN/tables/learningData.csv
  62. +0
    -1
      datasets/anomaly/kpi/TRAIN/dataset_TRAIN/tables/learningData.csv.REMOVED.git-id
  63. +8785
    -0
      datasets/anomaly/kpi/kpi_dataset/tables/learningData.csv
  64. +7028
    -0
      datasets/anomaly/kpi/kpi_problem/dataSplits.csv
  65. +8785
    -0
      datasets/anomaly/raw_data/kpi.csv
  66. +0
    -1
      datasets/anomaly/raw_data/kpi.csv.REMOVED.git-id
  67. +0
    -63
      datasets/anomaly_reserve/kpi/SCORE/dataset_TEST/datasetDoc.json
  68. +0
    -1758
      datasets/anomaly_reserve/kpi/SCORE/dataset_TEST/tables/learningData.csv
  69. +0
    -7028
      datasets/anomaly_reserve/kpi/SCORE/problem_TEST/dataSplits.csv
  70. +0
    -65
      datasets/anomaly_reserve/kpi/SCORE/problem_TEST/problemDoc.json
  71. +0
    -0
      datasets/anomaly_reserve/kpi/SCORE/targets.csv
  72. +0
    -63
      datasets/anomaly_reserve/kpi/TEST/dataset_TEST/datasetDoc.json
  73. +0
    -1758
      datasets/anomaly_reserve/kpi/TEST/dataset_TEST/tables/learningData.csv
  74. +0
    -7028
      datasets/anomaly_reserve/kpi/TEST/problem_TEST/dataSplits.csv
  75. +0
    -65
      datasets/anomaly_reserve/kpi/TEST/problem_TEST/problemDoc.json
  76. +0
    -63
      datasets/anomaly_reserve/kpi/TRAIN/dataset_TRAIN/datasetDoc.json
  77. +0
    -1
      datasets/anomaly_reserve/kpi/TRAIN/dataset_TRAIN/tables/learningData.csv.REMOVED.git-id
  78. +0
    -7028
      datasets/anomaly_reserve/kpi/TRAIN/problem_TRAIN/dataSplits.csv
  79. +0
    -65
      datasets/anomaly_reserve/kpi/TRAIN/problem_TRAIN/problemDoc.json
  80. +0
    -63
      datasets/anomaly_reserve/kpi/kpi_dataset/datasetDoc.json
  81. +0
    -1
      datasets/anomaly_reserve/kpi/kpi_dataset/tables/learningData.csv.REMOVED.git-id
  82. +0
    -1
      datasets/anomaly_reserve/kpi/kpi_problem/dataSplits.csv.REMOVED.git-id
  83. +0
    -65
      datasets/anomaly_reserve/kpi/kpi_problem/problemDoc.json
  84. +0
    -1
      datasets/anomaly_reserve/raw_data/kpi.csv.REMOVED.git-id
  85. +0
    -183
      datasets/anomaly_reserve/template/datasetDoc.json
  86. +0
    -65
      datasets/anomaly_reserve/template/problemDoc.json
  87. +0
    -160
      datasets/anomaly_reserve/transform.py
  88. +0
    -95
      datasets/anomaly_reserve/yahoo_sub_5/SCORE/dataset_TEST/datasetDoc.json
  89. +0
    -141
      datasets/anomaly_reserve/yahoo_sub_5/SCORE/dataset_TEST/tables/learningData.csv
  90. +0
    -1261
      datasets/anomaly_reserve/yahoo_sub_5/SCORE/problem_TEST/dataSplits.csv
  91. +0
    -65
      datasets/anomaly_reserve/yahoo_sub_5/SCORE/problem_TEST/problemDoc.json
  92. +0
    -0
      datasets/anomaly_reserve/yahoo_sub_5/SCORE/targets.csv
  93. +0
    -95
      datasets/anomaly_reserve/yahoo_sub_5/TEST/dataset_TEST/datasetDoc.json
  94. +0
    -141
      datasets/anomaly_reserve/yahoo_sub_5/TEST/dataset_TEST/tables/learningData.csv
  95. +0
    -1261
      datasets/anomaly_reserve/yahoo_sub_5/TEST/problem_TEST/dataSplits.csv
  96. +0
    -65
      datasets/anomaly_reserve/yahoo_sub_5/TEST/problem_TEST/problemDoc.json
  97. +0
    -95
      datasets/anomaly_reserve/yahoo_sub_5/TRAIN/dataset_TRAIN/datasetDoc.json
  98. +0
    -1261
      datasets/anomaly_reserve/yahoo_sub_5/TRAIN/dataset_TRAIN/tables/learningData.csv
  99. +0
    -1261
      datasets/anomaly_reserve/yahoo_sub_5/TRAIN/problem_TRAIN/dataSplits.csv
  100. +0
    -65
      datasets/anomaly_reserve/yahoo_sub_5/TRAIN/problem_TRAIN/problemDoc.json

+ 0
- 68
datasets/NAB/README.md View File

@@ -1,68 +0,0 @@
NAB Data Corpus
---

Data are ordered, timestamped, single-valued metrics. All data files contain anomalies, unless otherwise noted.


### Real data
- realAWSCloudwatch/

AWS server metrics as collected by the AmazonCloudwatch service. Example metrics include CPU Utilization, Network Bytes In, and Disk Read Bytes.

- realAdExchange/
Online advertisement clicking rates, where the metrics are cost-per-click (CPC) and cost per thousand impressions (CPM). One of the files is normal, without anomalies.
- realKnownCause/

This is data for which we know the anomaly causes; no hand labeling.
- ambient_temperature_system_failure.csv: The ambient temperature in an office
setting.
- cpu_utilization_asg_misconfiguration.csv: From Amazon Web Services (AWS)
monitoring CPU usage – i.e. average CPU usage across a given cluster. When
usage is high, AWS spins up a new machine, and uses fewer machines when usage
is low.
- ec2_request_latency_system_failure.csv: CPU usage data from a server in
Amazon's East Coast datacenter. The dataset ends with complete system failure
resulting from a documented failure of AWS API servers. There's an interesting
story behind this data in the [Numenta
blog](http://numenta.com/blog/anomaly-of-the-week.html).
- machine_temperature_system_failure.csv: Temperature sensor data of an
internal component of a large, industrial mahcine. The first anomaly is a
planned shutdown of the machine. The second anomaly is difficult to detect and
directly led to the third anomaly, a catastrophic failure of the machine.
- nyc_taxi.csv: Number of NYC taxi passengers, where the five anomalies occur
during the NYC marathon, Thanksgiving, Christmas, New Years day, and a snow
storm. The raw data is from the [NYC Taxi and Limousine Commission](http://www.nyc.gov/html/tlc/html/about/trip_record_data.shtml).
The data file included here consists of aggregating the total number of
taxi passengers into 30 minute buckets.
- rogue_agent_key_hold.csv: Timing the key holds for several users of a
computer, where the anomalies represent a change in the user.
- rogue_agent_key_updown.csv: Timing the key strokes for several users of a
computer, where the anomalies represent a change in the user.

- realTraffic/

Real time traffic data from the Twin Cities Metro area in Minnesota, collected
by the
[Minnesota Department of Transportation](http://www.dot.state.mn.us/tmc/trafficinfo/developers.html).
Included metrics include occupancy, speed, and travel time from specific
sensors.

- realTweets/

A collection of Twitter mentions of large publicly-traded companies
such as Google and IBM. The metric value represents the number of mentions
for a given ticker symbol every 5 minutes.


### Artificial data

- artificialNoAnomaly/

Artificially-generated data without any anomalies.

- artificialWithAnomaly/

Artificially-generated data with varying types of anomalies.

+ 0
- 4033
datasets/NAB/artificialNoAnomaly/labeled_art_daily_no_noise.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/artificialNoAnomaly/labeled_art_daily_perfect_square_wave.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/artificialNoAnomaly/labeled_art_daily_small_noise.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/artificialNoAnomaly/labeled_art_flatline.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/artificialNoAnomaly/labeled_art_noisy.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/artificialWithAnomaly/labeled_art_daily_flatmiddle.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/artificialWithAnomaly/labeled_art_daily_jumpsdown.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/artificialWithAnomaly/labeled_art_daily_jumpsup.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/artificialWithAnomaly/labeled_art_daily_nojump.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/artificialWithAnomaly/labeled_art_increase_spike_density.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/artificialWithAnomaly/labeled_art_load_balancer_spikes.csv
File diff suppressed because it is too large
View File


+ 0
- 232
datasets/NAB/combined_labels.json View File

@@ -1,232 +0,0 @@
{
"artificialNoAnomaly/art_daily_no_noise.csv": [],
"artificialNoAnomaly/art_daily_perfect_square_wave.csv": [],
"artificialNoAnomaly/art_daily_small_noise.csv": [],
"artificialNoAnomaly/art_flatline.csv": [],
"artificialNoAnomaly/art_noisy.csv": [],
"artificialWithAnomaly/art_daily_flatmiddle.csv": [
"2014-04-11 00:00:00"
],
"artificialWithAnomaly/art_daily_jumpsdown.csv": [
"2014-04-11 09:00:00"
],
"artificialWithAnomaly/art_daily_jumpsup.csv": [
"2014-04-11 09:00:00"
],
"artificialWithAnomaly/art_daily_nojump.csv": [
"2014-04-11 09:00:00"
],
"artificialWithAnomaly/art_increase_spike_density.csv": [
"2014-04-07 23:10:00"
],
"artificialWithAnomaly/art_load_balancer_spikes.csv": [
"2014-04-11 04:35:00"
],
"realAWSCloudwatch/ec2_cpu_utilization_24ae8d.csv": [
"2014-02-26 22:05:00",
"2014-02-27 17:15:00"
],
"realAWSCloudwatch/ec2_cpu_utilization_53ea38.csv": [
"2014-02-19 19:10:00",
"2014-02-23 20:05:00"
],
"realAWSCloudwatch/ec2_cpu_utilization_5f5533.csv": [
"2014-02-19 00:22:00",
"2014-02-24 18:37:00"
],
"realAWSCloudwatch/ec2_cpu_utilization_77c1ca.csv": [
"2014-04-09 10:15:00"
],
"realAWSCloudwatch/ec2_cpu_utilization_825cc2.csv": [
"2014-04-15 15:44:00",
"2014-04-16 03:34:00"
],
"realAWSCloudwatch/ec2_cpu_utilization_ac20cd.csv": [
"2014-04-15 00:49:00"
],
"realAWSCloudwatch/ec2_cpu_utilization_c6585a.csv": [],
"realAWSCloudwatch/ec2_cpu_utilization_fe7f93.csv": [
"2014-02-17 06:12:00",
"2014-02-22 00:02:00",
"2014-02-23 15:17:00"
],
"realAWSCloudwatch/ec2_disk_write_bytes_1ef3de.csv": [
"2014-03-10 21:09:00"
],
"realAWSCloudwatch/ec2_disk_write_bytes_c0d644.csv": [
"2014-04-09 01:30:00",
"2014-04-10 14:35:00",
"2014-04-13 03:00:00"
],
"realAWSCloudwatch/ec2_network_in_257a54.csv": [
"2014-04-15 16:44:00"
],
"realAWSCloudwatch/ec2_network_in_5abac7.csv": [
"2014-03-10 18:56:00",
"2014-03-12 21:01:00"
],
"realAWSCloudwatch/elb_request_count_8c0756.csv": [
"2014-04-12 17:24:00",
"2014-04-22 19:34:00"
],
"realAWSCloudwatch/grok_asg_anomaly.csv": [
"2014-01-20 08:30:00",
"2014-01-21 10:45:00",
"2014-01-29 00:45:00"
],
"realAWSCloudwatch/iio_us-east-1_i-a2eb1cd9_NetworkIn.csv": [
"2013-10-10 09:35:00",
"2013-10-10 20:40:00"
],
"realAWSCloudwatch/rds_cpu_utilization_cc0c53.csv": [
"2014-02-25 07:15:00",
"2014-02-27 00:50:00"
],
"realAWSCloudwatch/rds_cpu_utilization_e47b3b.csv": [
"2014-04-13 06:52:00",
"2014-04-18 23:27:00"
],
"realAdExchange/exchange-2_cpc_results.csv": [
"2011-07-14 13:00:01"
],
"realAdExchange/exchange-2_cpm_results.csv": [
"2011-07-26 06:00:01",
"2011-08-10 17:00:01"
],
"realAdExchange/exchange-3_cpc_results.csv": [
"2011-07-14 10:15:01",
"2011-07-20 10:15:01",
"2011-08-13 10:15:01"
],
"realAdExchange/exchange-3_cpm_results.csv": [
"2011-08-19 18:15:01"
],
"realAdExchange/exchange-4_cpc_results.csv": [
"2011-07-16 09:15:01",
"2011-08-02 12:15:01",
"2011-08-23 08:15:01"
],
"realAdExchange/exchange-4_cpm_results.csv": [
"2011-07-16 09:15:01",
"2011-08-01 07:15:01",
"2011-08-23 08:15:01",
"2011-08-28 13:15:01"
],
"realKnownCause/ambient_temperature_system_failure.csv": [
"2013-12-22 20:00:00",
"2014-04-13 09:00:00"
],
"realKnownCause/cpu_utilization_asg_misconfiguration.csv": [
"2014-07-12 02:04:00",
"2014-07-14 21:44:00"
],
"realKnownCause/ec2_request_latency_system_failure.csv": [
"2014-03-14 09:06:00",
"2014-03-18 22:41:00",
"2014-03-21 03:01:00"
],
"realKnownCause/machine_temperature_system_failure.csv": [
"2013-12-11 06:00:00",
"2013-12-16 17:25:00",
"2014-01-28 13:55:00",
"2014-02-08 14:30:00"
],
"realKnownCause/nyc_taxi.csv": [
"2014-11-01 19:00:00",
"2014-11-27 15:30:00",
"2014-12-25 15:00:00",
"2015-01-01 01:00:00",
"2015-01-27 00:00:00"
],
"realKnownCause/rogue_agent_key_hold.csv": [
"2014-07-15 08:30:00",
"2014-07-17 09:50:00"
],
"realKnownCause/rogue_agent_key_updown.csv": [
"2014-07-15 04:00:00",
"2014-07-17 08:50:00"
],
"realTraffic/TravelTime_387.csv": [
"2015-07-30 12:29:00",
"2015-08-18 16:26:00",
"2015-09-01 05:34:00"
],
"realTraffic/TravelTime_451.csv": [
"2015-08-11 12:07:00"
],
"realTraffic/occupancy_6005.csv": [
"2015-09-15 06:55:00"
],
"realTraffic/occupancy_t4013.csv": [
"2015-09-16 08:09:00",
"2015-09-17 07:55:00"
],
"realTraffic/speed_6005.csv": [
"2015-09-17 07:00:00"
],
"realTraffic/speed_7578.csv": [
"2015-09-11 16:44:00",
"2015-09-15 14:34:00",
"2015-09-16 14:14:00",
"2015-09-16 17:10:00"
],
"realTraffic/speed_t4013.csv": [
"2015-09-16 08:04:00",
"2015-09-17 08:15:00"
],
"realTweets/Twitter_volume_AAPL.csv": [
"2015-03-03 21:07:53",
"2015-03-09 17:32:53",
"2015-03-16 02:57:53",
"2015-03-31 03:27:53"
],
"realTweets/Twitter_volume_AMZN.csv": [
"2015-03-05 19:47:53",
"2015-03-11 20:57:53",
"2015-04-01 21:57:53",
"2015-04-08 04:52:53"
],
"realTweets/Twitter_volume_CRM.csv": [
"2015-03-09 19:07:53",
"2015-03-19 23:07:53",
"2015-03-26 19:07:53"
],
"realTweets/Twitter_volume_CVS.csv": [
"2015-03-04 16:02:53",
"2015-03-05 19:57:53",
"2015-03-26 14:07:53",
"2015-04-14 22:37:53"
],
"realTweets/Twitter_volume_FB.csv": [
"2015-03-16 07:07:53",
"2015-04-03 17:47:53"
],
"realTweets/Twitter_volume_GOOG.csv": [
"2015-03-13 20:22:53",
"2015-03-14 16:27:53",
"2015-03-22 22:52:53",
"2015-04-01 05:27:53"
],
"realTweets/Twitter_volume_IBM.csv": [
"2015-03-23 22:27:53",
"2015-04-20 20:07:53"
],
"realTweets/Twitter_volume_KO.csv": [
"2015-03-20 13:12:53",
"2015-04-08 23:42:53",
"2015-04-14 14:52:53"
],
"realTweets/Twitter_volume_PFE.csv": [
"2015-03-02 21:22:53",
"2015-03-04 10:32:53",
"2015-03-13 19:57:53",
"2015-04-07 23:42:53"
],
"realTweets/Twitter_volume_UPS.csv": [
"2015-03-03 00:27:53",
"2015-03-04 11:07:53",
"2015-03-05 15:22:53",
"2015-03-24 18:17:53",
"2015-03-29 16:27:53"
]
}

+ 0
- 4033
datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_24ae8d.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_53ea38.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_5f5533.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_77c1ca.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_825cc2.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_ac20cd.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_c6585a.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_fe7f93.csv
File diff suppressed because it is too large
View File


+ 0
- 4731
datasets/NAB/realAWSCloudwatch/labeled_ec2_disk_write_bytes_1ef3de.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/realAWSCloudwatch/labeled_ec2_disk_write_bytes_c0d644.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/realAWSCloudwatch/labeled_ec2_network_in_257a54.csv
File diff suppressed because it is too large
View File


+ 0
- 4731
datasets/NAB/realAWSCloudwatch/labeled_ec2_network_in_5abac7.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/realAWSCloudwatch/labeled_elb_request_count_8c0756.csv
File diff suppressed because it is too large
View File


+ 0
- 4622
datasets/NAB/realAWSCloudwatch/labeled_grok_asg_anomaly.csv
File diff suppressed because it is too large
View File


+ 0
- 1244
datasets/NAB/realAWSCloudwatch/labeled_iio_us-east-1_i-a2eb1cd9_NetworkIn.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/realAWSCloudwatch/labeled_rds_cpu_utilization_cc0c53.csv
File diff suppressed because it is too large
View File


+ 0
- 4033
datasets/NAB/realAWSCloudwatch/labeled_rds_cpu_utilization_e47b3b.csv
File diff suppressed because it is too large
View File


+ 0
- 1625
datasets/NAB/realAdExchange/labeled_exchange-2_cpc_results.csv
File diff suppressed because it is too large
View File


+ 0
- 1625
datasets/NAB/realAdExchange/labeled_exchange-2_cpm_results.csv
File diff suppressed because it is too large
View File


+ 0
- 1539
datasets/NAB/realAdExchange/labeled_exchange-3_cpc_results.csv
File diff suppressed because it is too large
View File


+ 0
- 1539
datasets/NAB/realAdExchange/labeled_exchange-3_cpm_results.csv
File diff suppressed because it is too large
View File


+ 0
- 1644
datasets/NAB/realAdExchange/labeled_exchange-4_cpc_results.csv
File diff suppressed because it is too large
View File


+ 0
- 1644
datasets/NAB/realAdExchange/labeled_exchange-4_cpm_results.csv
File diff suppressed because it is too large
View File


+ 0
- 7268
datasets/NAB/realKnownCause/labeled_ambient_temperature_system_failure.csv
File diff suppressed because it is too large
View File


+ 0
- 1
datasets/NAB/realKnownCause/labeled_cpu_utilization_asg_misconfiguration.csv.REMOVED.git-id View File

@@ -1 +0,0 @@
948611b07519538ef036e0ec1c948f6bf97009cf

+ 0
- 4033
datasets/NAB/realKnownCause/labeled_ec2_request_latency_system_failure.csv
File diff suppressed because it is too large
View File


+ 0
- 1
datasets/NAB/realKnownCause/labeled_machine_temperature_system_failure.csv.REMOVED.git-id View File

@@ -1 +0,0 @@
428229640a5466e68014f74649a24f00abb1150b

+ 0
- 10321
datasets/NAB/realKnownCause/labeled_nyc_taxi.csv
File diff suppressed because it is too large
View File


+ 0
- 1883
datasets/NAB/realKnownCause/labeled_rogue_agent_key_hold.csv
File diff suppressed because it is too large
View File


+ 0
- 5316
datasets/NAB/realKnownCause/labeled_rogue_agent_key_updown.csv
File diff suppressed because it is too large
View File


+ 0
- 2501
datasets/NAB/realTraffic/labeled_TravelTime_387.csv
File diff suppressed because it is too large
View File


+ 0
- 2163
datasets/NAB/realTraffic/labeled_TravelTime_451.csv
File diff suppressed because it is too large
View File


+ 0
- 2381
datasets/NAB/realTraffic/labeled_occupancy_6005.csv
File diff suppressed because it is too large
View File


+ 0
- 2501
datasets/NAB/realTraffic/labeled_occupancy_t4013.csv
File diff suppressed because it is too large
View File


+ 0
- 2501
datasets/NAB/realTraffic/labeled_speed_6005.csv
File diff suppressed because it is too large
View File


+ 0
- 1128
datasets/NAB/realTraffic/labeled_speed_7578.csv
File diff suppressed because it is too large
View File


+ 0
- 2496
datasets/NAB/realTraffic/labeled_speed_t4013.csv
File diff suppressed because it is too large
View File


+ 0
- 1
datasets/NAB/realTweets/labeled_Twitter_volume_AAPL.csv.REMOVED.git-id View File

@@ -1 +0,0 @@
68dd1084ed091fb9affe45b4e0894250c6c62c07

+ 0
- 1
datasets/NAB/realTweets/labeled_Twitter_volume_AMZN.csv.REMOVED.git-id View File

@@ -1 +0,0 @@
7bebf0fe077dda56f789d644090faf1d2484913c

+ 0
- 1
datasets/NAB/realTweets/labeled_Twitter_volume_CRM.csv.REMOVED.git-id View File

@@ -1 +0,0 @@
59fdf3c2b8d171704e3de1e10d8ccfca72c8ab9a

+ 0
- 1
datasets/NAB/realTweets/labeled_Twitter_volume_CVS.csv.REMOVED.git-id View File

@@ -1 +0,0 @@
d20453833fc13c681f0b5f5a830f3aba52b774cd

+ 0
- 1
datasets/NAB/realTweets/labeled_Twitter_volume_FB.csv.REMOVED.git-id View File

@@ -1 +0,0 @@
ed60bba6f53c779335874c39966b7d5e4309e2c3

+ 0
- 1
datasets/NAB/realTweets/labeled_Twitter_volume_GOOG.csv.REMOVED.git-id View File

@@ -1 +0,0 @@
f01b654d9a6a6ebc7efc65da240f83680de2131d

+ 0
- 1
datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv.REMOVED.git-id View File

@@ -1 +0,0 @@
8e0088d97641d6ab39b808fe03ac0a7ec9ea99b9

+ 0
- 1
datasets/NAB/realTweets/labeled_Twitter_volume_KO.csv.REMOVED.git-id View File

@@ -1 +0,0 @@
d72fffb08da82bb70ecc379bb1fa56316efda557

+ 0
- 1
datasets/NAB/realTweets/labeled_Twitter_volume_PFE.csv.REMOVED.git-id View File

@@ -1 +0,0 @@
4c2f8543201c0a66e44815dee128d9044a41c382

+ 0
- 1
datasets/NAB/realTweets/labeled_Twitter_volume_UPS.csv.REMOVED.git-id View File

@@ -1 +0,0 @@
25a0dd3110986418d379a887cc575f9fdc45a6da

+ 7028
- 0
datasets/anomaly/kpi/TRAIN/dataset_TRAIN/tables/learningData.csv
File diff suppressed because it is too large
View File


+ 0
- 1
datasets/anomaly/kpi/TRAIN/dataset_TRAIN/tables/learningData.csv.REMOVED.git-id View File

@@ -1 +0,0 @@
44db328c252a8156434142a37ef65765869e7548

+ 8785
- 0
datasets/anomaly/kpi/kpi_dataset/tables/learningData.csv
File diff suppressed because it is too large
View File


+ 7028
- 0
datasets/anomaly/kpi/kpi_problem/dataSplits.csv
File diff suppressed because it is too large
View File


+ 8785
- 0
datasets/anomaly/raw_data/kpi.csv
File diff suppressed because it is too large
View File


+ 0
- 1
datasets/anomaly/raw_data/kpi.csv.REMOVED.git-id View File

@@ -1 +0,0 @@
bea5d1c052730eaba76b84ff5df854477cdfa80b

+ 0
- 63
datasets/anomaly_reserve/kpi/SCORE/dataset_TEST/datasetDoc.json View File

@@ -1,63 +0,0 @@
{
"about": {
"datasetID": "kpi_dataset_TEST",
"datasetName": "NULL",
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'",
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ",
"license": " CC Public Domain Mark 1.0 ",
"source": "OpenML",
"sourceURI": "http://www.openml.org/d/185",
"approximateSize": "",
"datasetSchemaVersion": "4.0.0",
"redacted": false,
"datasetVersion": "4.0.0"
},
"dataResources": [
{
"resID": "learningData",
"resPath": "tables/learningData.csv",
"resType": "table",
"resFormat": {
"text/csv": [
"csv"
]
},
"isCollection": false,
"columns": [
{
"colIndex": 0,
"colName": "d3mIndex",
"colType": "integer",
"role": [
"index"
]
},
{
"colIndex": 1,
"colName": "timestamp",
"colType": "integer",
"role": [
"attribute"
]
},
{
"colIndex": 2,
"colName": "value",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 3,
"colName": "ground_truth",
"colType": "integer",
"role": [
"suggestedTarget"
]
}
],
"columnsCount": 4
}
]
}

+ 0
- 1758
datasets/anomaly_reserve/kpi/SCORE/dataset_TEST/tables/learningData.csv
File diff suppressed because it is too large
View File


+ 0
- 7028
datasets/anomaly_reserve/kpi/SCORE/problem_TEST/dataSplits.csv
File diff suppressed because it is too large
View File


+ 0
- 65
datasets/anomaly_reserve/kpi/SCORE/problem_TEST/problemDoc.json View File

@@ -1,65 +0,0 @@
{
"about": {
"problemID": "kpi_problem",
"problemName": "kpi_problem",
"problemDescription": "Anomaly detection",
"problemVersion": "4.0.0",
"problemSchemaVersion": "4.0.0",
"taskKeywords": [
"classification",
"binary",
"tabular"
]
},
"inputs": {
"data": [
{
"datasetID": "kpi_dataset",
"targets": [
{
"targetIndex": 0,
"resID": "learningData",
"colIndex": 3,
"colName": "ground_truth"
}
]
}
],
"dataSplits": {
"method": "holdOut",
"testSize": 0.2,
"stratified": true,
"numRepeats": 0,
"randomSeed": 42,
"splitsFile": "dataSplits.csv",
"datasetViewMaps": {
"train": [
{
"from": "kpi_dataset",
"to": "kpi_dataset_TRAIN"
}
],
"test": [
{
"from": "kpi_dataset",
"to": "kpi_dataset_TEST"
}
],
"score": [
{
"from": "kpi_dataset",
"to": "kpi_dataset_SCORE"
}
]
}
},
"performanceMetrics": [
{
"metric": "f1Macro"
}
]
},
"expectedOutputs": {
"predictionsFile": "predictions.csv"
}
}

+ 0
- 0
datasets/anomaly_reserve/kpi/SCORE/targets.csv View File


+ 0
- 63
datasets/anomaly_reserve/kpi/TEST/dataset_TEST/datasetDoc.json View File

@@ -1,63 +0,0 @@
{
"about": {
"datasetID": "kpi_dataset_TEST",
"datasetName": "NULL",
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'",
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ",
"license": " CC Public Domain Mark 1.0 ",
"source": "OpenML",
"sourceURI": "http://www.openml.org/d/185",
"approximateSize": "",
"datasetSchemaVersion": "4.0.0",
"redacted": false,
"datasetVersion": "4.0.0"
},
"dataResources": [
{
"resID": "learningData",
"resPath": "tables/learningData.csv",
"resType": "table",
"resFormat": {
"text/csv": [
"csv"
]
},
"isCollection": false,
"columns": [
{
"colIndex": 0,
"colName": "d3mIndex",
"colType": "integer",
"role": [
"index"
]
},
{
"colIndex": 1,
"colName": "timestamp",
"colType": "integer",
"role": [
"attribute"
]
},
{
"colIndex": 2,
"colName": "value",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 3,
"colName": "ground_truth",
"colType": "integer",
"role": [
"suggestedTarget"
]
}
],
"columnsCount": 4
}
]
}

+ 0
- 1758
datasets/anomaly_reserve/kpi/TEST/dataset_TEST/tables/learningData.csv
File diff suppressed because it is too large
View File


+ 0
- 7028
datasets/anomaly_reserve/kpi/TEST/problem_TEST/dataSplits.csv
File diff suppressed because it is too large
View File


+ 0
- 65
datasets/anomaly_reserve/kpi/TEST/problem_TEST/problemDoc.json View File

@@ -1,65 +0,0 @@
{
"about": {
"problemID": "kpi_problem",
"problemName": "kpi_problem",
"problemDescription": "Anomaly detection",
"problemVersion": "4.0.0",
"problemSchemaVersion": "4.0.0",
"taskKeywords": [
"classification",
"binary",
"tabular"
]
},
"inputs": {
"data": [
{
"datasetID": "kpi_dataset",
"targets": [
{
"targetIndex": 0,
"resID": "learningData",
"colIndex": 3,
"colName": "ground_truth"
}
]
}
],
"dataSplits": {
"method": "holdOut",
"testSize": 0.2,
"stratified": true,
"numRepeats": 0,
"randomSeed": 42,
"splitsFile": "dataSplits.csv",
"datasetViewMaps": {
"train": [
{
"from": "kpi_dataset",
"to": "kpi_dataset_TRAIN"
}
],
"test": [
{
"from": "kpi_dataset",
"to": "kpi_dataset_TEST"
}
],
"score": [
{
"from": "kpi_dataset",
"to": "kpi_dataset_SCORE"
}
]
}
},
"performanceMetrics": [
{
"metric": "f1Macro"
}
]
},
"expectedOutputs": {
"predictionsFile": "predictions.csv"
}
}

+ 0
- 63
datasets/anomaly_reserve/kpi/TRAIN/dataset_TRAIN/datasetDoc.json View File

@@ -1,63 +0,0 @@
{
"about": {
"datasetID": "kpi_dataset_TRAIN",
"datasetName": "NULL",
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'",
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ",
"license": " CC Public Domain Mark 1.0 ",
"source": "OpenML",
"sourceURI": "http://www.openml.org/d/185",
"approximateSize": "",
"datasetSchemaVersion": "4.0.0",
"redacted": false,
"datasetVersion": "4.0.0"
},
"dataResources": [
{
"resID": "learningData",
"resPath": "tables/learningData.csv",
"resType": "table",
"resFormat": {
"text/csv": [
"csv"
]
},
"isCollection": false,
"columns": [
{
"colIndex": 0,
"colName": "d3mIndex",
"colType": "integer",
"role": [
"index"
]
},
{
"colIndex": 1,
"colName": "timestamp",
"colType": "integer",
"role": [
"attribute"
]
},
{
"colIndex": 2,
"colName": "value",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 3,
"colName": "ground_truth",
"colType": "integer",
"role": [
"suggestedTarget"
]
}
],
"columnsCount": 4
}
]
}

+ 0
- 1
datasets/anomaly_reserve/kpi/TRAIN/dataset_TRAIN/tables/learningData.csv.REMOVED.git-id View File

@@ -1 +0,0 @@
44db328c252a8156434142a37ef65765869e7548

+ 0
- 7028
datasets/anomaly_reserve/kpi/TRAIN/problem_TRAIN/dataSplits.csv
File diff suppressed because it is too large
View File


+ 0
- 65
datasets/anomaly_reserve/kpi/TRAIN/problem_TRAIN/problemDoc.json View File

@@ -1,65 +0,0 @@
{
"about": {
"problemID": "kpi_problem",
"problemName": "kpi_problem",
"problemDescription": "Anomaly detection",
"problemVersion": "4.0.0",
"problemSchemaVersion": "4.0.0",
"taskKeywords": [
"classification",
"binary",
"tabular"
]
},
"inputs": {
"data": [
{
"datasetID": "kpi_dataset",
"targets": [
{
"targetIndex": 0,
"resID": "learningData",
"colIndex": 3,
"colName": "ground_truth"
}
]
}
],
"dataSplits": {
"method": "holdOut",
"testSize": 0.2,
"stratified": true,
"numRepeats": 0,
"randomSeed": 42,
"splitsFile": "dataSplits.csv",
"datasetViewMaps": {
"train": [
{
"from": "kpi_dataset",
"to": "kpi_dataset_TRAIN"
}
],
"test": [
{
"from": "kpi_dataset",
"to": "kpi_dataset_TEST"
}
],
"score": [
{
"from": "kpi_dataset",
"to": "kpi_dataset_SCORE"
}
]
}
},
"performanceMetrics": [
{
"metric": "f1Macro"
}
]
},
"expectedOutputs": {
"predictionsFile": "predictions.csv"
}
}

+ 0
- 63
datasets/anomaly_reserve/kpi/kpi_dataset/datasetDoc.json View File

@@ -1,63 +0,0 @@
{
"about": {
"datasetID": "kpi_dataset",
"datasetName": "kpi",
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'",
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ",
"license": " CC Public Domain Mark 1.0 ",
"source": "OpenML",
"sourceURI": "http://www.openml.org/d/185",
"approximateSize": "",
"datasetSchemaVersion": "4.0.0",
"redacted": false,
"datasetVersion": "4.0.0"
},
"dataResources": [
{
"resID": "learningData",
"resPath": "tables/learningData.csv",
"resType": "table",
"resFormat": {
"text/csv": [
"csv"
]
},
"isCollection": false,
"columns": [
{
"colIndex": 0,
"colName": "d3mIndex",
"colType": "integer",
"role": [
"index"
]
},
{
"colIndex": 1,
"colName": "timestamp",
"colType": "integer",
"role": [
"attribute"
]
},
{
"colIndex": 2,
"colName": "value",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 3,
"colName": "ground_truth",
"colType": "integer",
"role": [
"suggestedTarget"
]
}
],
"columnsCount": 4
}
]
}

+ 0
- 1
datasets/anomaly_reserve/kpi/kpi_dataset/tables/learningData.csv.REMOVED.git-id View File

@@ -1 +0,0 @@
d80846dc46c173472f646a52005a1fb3670ccd09

+ 0
- 1
datasets/anomaly_reserve/kpi/kpi_problem/dataSplits.csv.REMOVED.git-id View File

@@ -1 +0,0 @@
44db328c252a8156434142a37ef65765869e7548

+ 0
- 65
datasets/anomaly_reserve/kpi/kpi_problem/problemDoc.json View File

@@ -1,65 +0,0 @@
{
"about": {
"problemID": "kpi_problem",
"problemName": "kpi_problem",
"problemDescription": "Anomaly detection",
"problemVersion": "4.0.0",
"problemSchemaVersion": "4.0.0",
"taskKeywords": [
"classification",
"binary",
"tabular"
]
},
"inputs": {
"data": [
{
"datasetID": "kpi_dataset",
"targets": [
{
"targetIndex": 0,
"resID": "learningData",
"colIndex": 3,
"colName": "ground_truth"
}
]
}
],
"dataSplits": {
"method": "holdOut",
"testSize": 0.2,
"stratified": true,
"numRepeats": 0,
"randomSeed": 42,
"splitsFile": "dataSplits.csv",
"datasetViewMaps": {
"train": [
{
"from": "kpi_dataset",
"to": "kpi_dataset_TRAIN"
}
],
"test": [
{
"from": "kpi_dataset",
"to": "kpi_dataset_TEST"
}
],
"score": [
{
"from": "kpi_dataset",
"to": "kpi_dataset_SCORE"
}
]
}
},
"performanceMetrics": [
{
"metric": "f1Macro"
}
]
},
"expectedOutputs": {
"predictionsFile": "predictions.csv"
}
}

+ 0
- 1
datasets/anomaly_reserve/raw_data/kpi.csv.REMOVED.git-id View File

@@ -1 +0,0 @@
bea5d1c052730eaba76b84ff5df854477cdfa80b

+ 0
- 183
datasets/anomaly_reserve/template/datasetDoc.json View File

@@ -1,183 +0,0 @@
{
"about": {
"datasetID": "template",
"datasetName": "baseball",
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'",
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ",
"license": " CC Public Domain Mark 1.0 ",
"source": "OpenML",
"sourceURI": "http://www.openml.org/d/185",
"approximateSize": "",
"datasetSchemaVersion": "4.0.0",
"redacted": false,
"datasetVersion": "4.0.0"
},
"dataResources": [
{
"resID": "learningData",
"resPath": "tables/learningData.csv",
"resType": "table",
"resFormat": {
"text/csv": [
"csv"
]
},
"isCollection": false,
"columns": [
{
"colIndex": 0,
"colName": "d3mIndex",
"colType": "integer",
"role": [
"index"
]
},
{
"colIndex": 1,
"colName": "Player",
"colType": "categorical",
"role": [
"attribute"
]
},
{
"colIndex": 2,
"colName": "Number_seasons",
"colType": "integer",
"role": [
"attribute"
]
},
{
"colIndex": 3,
"colName": "Games_played",
"colType": "integer",
"role": [
"attribute"
]
},
{
"colIndex": 4,
"colName": "At_bats",
"colType": "integer",
"role": [
"attribute"
]
},
{
"colIndex": 5,
"colName": "Runs",
"colType": "integer",
"role": [
"attribute"
]
},
{
"colIndex": 6,
"colName": "Hits",
"colType": "integer",
"role": [
"attribute"
]
},
{
"colIndex": 7,
"colName": "Doubles",
"colType": "integer",
"role": [
"attribute"
]
},
{
"colIndex": 8,
"colName": "Triples",
"colType": "integer",
"role": [
"attribute"
]
},
{
"colIndex": 9,
"colName": "Home_runs",
"colType": "integer",
"role": [
"attribute"
]
},
{
"colIndex": 10,
"colName": "RBIs",
"colType": "integer",
"role": [
"attribute"
]
},
{
"colIndex": 11,
"colName": "Walks",
"colType": "integer",
"role": [
"attribute"
]
},
{
"colIndex": 12,
"colName": "Strikeouts",
"colType": "integer",
"role": [
"attribute"
]
},
{
"colIndex": 13,
"colName": "Batting_average",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 14,
"colName": "On_base_pct",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 15,
"colName": "Slugging_pct",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 16,
"colName": "Fielding_ave",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 17,
"colName": "Position",
"colType": "categorical",
"role": [
"attribute"
]
},
{
"colIndex": 18,
"colName": "Hall_of_Fame",
"colType": "categorical",
"role": [
"suggestedTarget"
]
}
],
"columnsCount": 19
}
]
}

+ 0
- 65
datasets/anomaly_reserve/template/problemDoc.json View File

@@ -1,65 +0,0 @@
{
"about": {
"problemID": "template",
"problemName": "baseball_problem",
"problemDescription": "**Author**: Jeffrey S. Simonoff \n**Source**: [AnalCatData](http://www.stern.nyu.edu/~jsimonof/AnalCatData) - 2003 \n**Please cite**: Jeffrey S. Simonoff, Analyzing Categorical Data, Springer-Verlag, New York, 2003 \n \nDatabase of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave' \n\nNotes: \n* Quotes, Single-Quotes and Backslashes were removed, Blanks replaced with Underscores\n* Player is an identifier that should be ignored when modelling the data",
"problemVersion": "4.0.0",
"problemSchemaVersion": "4.0.0",
"taskKeywords": [
"classification",
"multiClass",
"tabular"
]
},
"inputs": {
"data": [
{
"datasetID": "185_baseball_dataset",
"targets": [
{
"targetIndex": 0,
"resID": "learningData",
"colIndex": 18,
"colName": "Hall_of_Fame"
}
]
}
],
"dataSplits": {
"method": "holdOut",
"testSize": 0.2,
"stratified": true,
"numRepeats": 0,
"randomSeed": 42,
"splitsFile": "dataSplits.csv",
"datasetViewMaps": {
"train": [
{
"from": "185_baseball_dataset",
"to": "185_baseball_dataset_TRAIN"
}
],
"test": [
{
"from": "185_baseball_dataset",
"to": "185_baseball_dataset_TEST"
}
],
"score": [
{
"from": "185_baseball_dataset",
"to": "185_baseball_dataset_SCORE"
}
]
}
},
"performanceMetrics": [
{
"metric": "f1Macro"
}
]
},
"expectedOutputs": {
"predictionsFile": "predictions.csv"
}
}

+ 0
- 160
datasets/anomaly_reserve/transform.py View File

@@ -1,160 +0,0 @@
# TODO: Wrap it as a class and connect it to GUI
# A script to transform anomaly data to d3m format
import pandas as pd
import numpy as np
import os
import json

##############################
# Some information for the dataset to be transformed
# Designed for time series data
name = 'kpi'
src_path = './raw_data/kpi.csv'
label_name = 'label'
timestamp_name = 'timestamp'
value_names = ['value']
ratio = 0.8 # Ratio of training data, the rest is for testing

###############################



dst_root = './' + name
dirs = ['./', 'SCORE', 'TEST', 'TRAIN']
maps = {'./': None, 'SCORE': 'TEST', 'TEST': 'TEST', 'TRAIN': 'TRAIN'}

# Create the corresponding directories
for d in dirs:
if maps[d] is not None:
dataset_name = 'dataset_' + maps[d]
problem_name = 'problem_' + maps[d]
else:
dataset_name = name + '_dataset'
problem_name = name + '_problem'
tables_dir = os.path.join(dst_root, d, dataset_name, 'tables')
if not os.path.exists(tables_dir):
os.makedirs(tables_dir)
problem_dir = os.path.join(dst_root, d, problem_name)
if not os.path.exists(problem_dir):
os.makedirs(problem_dir)

# Process data
_df = pd.DataFrame()
df = pd.read_csv(src_path)
_df['d3mIndex'] = df.index
_df['timestamp'] = df[timestamp_name]
for value_name in value_names:
_df[value_name] = df[value_name]
_df['ground_truth'] = df[label_name]
df = _df
cols = df.columns.tolist()

# Save all the data
df.to_csv(os.path.join(dst_root, name+'_dataset', 'tables', 'learningData.csv'), index=False)

# Save training and testing data
train_df, test_df = df[:int(df.shape[0]*ratio)], df[int(df.shape[0]*ratio):]

train_df.to_csv(os.path.join(dst_root, 'TRAIN', 'dataset_TRAIN', 'tables', 'learningData.csv'), index=False)
test_df.to_csv(os.path.join(dst_root, 'TEST', 'dataset_TEST', 'tables', 'learningData.csv'), index=False)
test_df.to_csv(os.path.join(dst_root, 'SCORE', 'dataset_TEST', 'tables', 'learningData.csv'), index=False)

# Data splits
row_0 = train_df.shape[0]
row_1 = train_df.shape[0]
row = row_0 + row_1
df = pd.DataFrame(np.array([[i for i in range(row)], ['TRAIN' for _ in range(row_0)] + ['TEST' for _ in range(row_1)], [0 for _ in range(row)], [0 for _ in range(row)]]).transpose(), columns = ['d3mIndex', 'type', 'repeat', 'fold'])

# Save data splits for all data
train_df.to_csv(os.path.join(dst_root, name+'_problem', 'dataSplits.csv'), index=False)

# Save training and testing splits
train_df, test_df = df[:row_0], df[row_0:]
train_df.to_csv(os.path.join(dst_root, 'TRAIN', 'problem_TRAIN', 'dataSplits.csv'), index=False)
test_df.to_csv(os.path.join(dst_root, 'TEST', 'problem_TEST', 'dataSplits.csv'), index=False)
test_df.to_csv(os.path.join(dst_root, 'SCORE', 'problem_TEST', 'dataSplits.csv'), index=False)


# Dataset JSON files
# Load template
with open('template/datasetDoc.json') as json_file:
data = json.load(json_file)
columns = []
for i in range(len(cols)):
c = {}
c['colIndex'] = i
c['colName'] = cols[i]
if i == 0:
c['colType'] = 'integer'
c['role'] = ['index']
elif i == 1:
c['colType'] = 'integer'
c['role'] = ['attribute']
elif i == len(cols)-1:
c['colType'] = 'integer'
c['role'] = ['suggestedTarget']
else:
c['colType'] = 'real'
c['role'] = ['attribute']
columns.append(c)
data['dataResources'][0]['columns'] = columns
data['dataResources'][0]['columnsCount'] = len(cols)
data['about']['datasetID'] = name + '_dataset'
data['about']['datasetName'] = name
with open(os.path.join(dst_root, name+'_dataset', 'datasetDoc.json'), 'w') as outfile:
json.dump(data, outfile, indent=4)

data['about']['datasetID'] = name +'_dataset_TRAIN'
data['about']['datasetName'] = "NULL"
with open(os.path.join(dst_root, 'TRAIN', 'dataset_TRAIN', 'datasetDoc.json'), 'w') as outfile:
json.dump(data, outfile, indent=4)

data['about']['datasetID'] = name + '_dataset_TEST'
data['about']['datasetName'] = 'NULL'
with open(os.path.join(dst_root, 'TEST', 'dataset_TEST', 'datasetDoc.json'), 'w') as outfile:
json.dump(data, outfile, indent=4)

data['about']['datasetID'] = name + '_dataset_TEST'
data['about']['datasetName'] = 'NULL'
with open(os.path.join(dst_root, 'SCORE', 'dataset_TEST', 'datasetDoc.json'), 'w') as outfile:
json.dump(data, outfile, indent=4)

# Problem JSON files
# Load template
with open('template/problemDoc.json') as json_file:
data = json.load(json_file)

data['about']['problemID'] = name+'_problem'
data['about']['problemName'] = name+'_problem'
data['about']['problemDescription'] = 'Anomaly detection'
data['about']['taskKeywords'] = ['classification', 'binary', 'tabular']
data['inputs']['data'][0]['datasetID'] = name + '_dataset'
data['inputs']['data'][0]['targets'][0]['colIndex'] = len(cols)-1
data['inputs']['data'][0]['targets'][0]['colName'] = cols[-1]
data['inputs']['dataSplits']['datasetViewMaps']['train'][0]['from'] = name+'_dataset'
data['inputs']['dataSplits']['datasetViewMaps']['test'][0]['from'] = name+'_dataset'
data['inputs']['dataSplits']['datasetViewMaps']['score'][0]['from'] = name+'_dataset'
data['inputs']['dataSplits']['datasetViewMaps']['train'][0]['to'] = name+'_dataset_TRAIN'
data['inputs']['dataSplits']['datasetViewMaps']['test'][0]['to'] = name+'_dataset_TEST'
data['inputs']['dataSplits']['datasetViewMaps']['score'][0]['to'] = name+'_dataset_SCORE'

with open(os.path.join(dst_root, name+'_problem', 'problemDoc.json'), 'w') as outfile:
json.dump(data, outfile, indent=4)

with open(os.path.join(dst_root, 'TRAIN', 'problem_TRAIN', 'problemDoc.json'), 'w') as outfile:
json.dump(data, outfile, indent=4)

with open(os.path.join(dst_root, 'TEST', 'problem_TEST', 'problemDoc.json'), 'w') as outfile:
json.dump(data, outfile, indent=4)

with open(os.path.join(dst_root, 'SCORE', 'problem_TEST', 'problemDoc.json'), 'w') as outfile:
json.dump(data, outfile, indent=4)

# Make an empty targets.csv
with open(os.path.join(dst_root, 'SCORE', 'targets.csv'), 'w') as outfile:
outfile.write('')





+ 0
- 95
datasets/anomaly_reserve/yahoo_sub_5/SCORE/dataset_TEST/datasetDoc.json View File

@@ -1,95 +0,0 @@
{
"about": {
"datasetID": "yahoo_sub_5_dataset_TEST",
"datasetName": "NULL",
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'",
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ",
"license": " CC Public Domain Mark 1.0 ",
"source": "OpenML",
"sourceURI": "http://www.openml.org/d/185",
"approximateSize": "",
"datasetSchemaVersion": "4.0.0",
"redacted": false,
"datasetVersion": "4.0.0"
},
"dataResources": [
{
"resID": "learningData",
"resPath": "tables/learningData.csv",
"resType": "table",
"resFormat": {
"text/csv": [
"csv"
]
},
"isCollection": false,
"columns": [
{
"colIndex": 0,
"colName": "d3mIndex",
"colType": "integer",
"role": [
"index"
]
},
{
"colIndex": 1,
"colName": "timestamp",
"colType": "integer",
"role": [
"attribute"
]
},
{
"colIndex": 2,
"colName": "value_0",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 3,
"colName": "value_1",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 4,
"colName": "value_2",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 5,
"colName": "value_3",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 6,
"colName": "value_4",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 7,
"colName": "ground_truth",
"colType": "integer",
"role": [
"suggestedTarget"
]
}
],
"columnsCount": 8
}
]
}

+ 0
- 141
datasets/anomaly_reserve/yahoo_sub_5/SCORE/dataset_TEST/tables/learningData.csv View File

@@ -1,141 +0,0 @@
d3mIndex,timestamp,value_0,value_1,value_2,value_3,value_4,ground_truth
1260,1261,7782,0.034280386319742985,2.5072222222222003,104,3119,0
1261,1262,7829,0.039360296791109,2.5927777777778,82,3590,0
1262,1263,7902,0.0,2.6894444444444,208,3893,0
1263,1264,8039,0.038944065994356014,2.6291666666667,92,3264,0
1264,1265,8350,0.18176011684739,2.6469444444444,53,3963,0
1265,1266,8142,0.18521047165852,2.7461111111111003,65,2757,0
1266,1267,7886,0.13079770999921,2.9363888888889,62,2306,0
1267,1268,7743,0.13310058077443,3.2797222222222,73,2549,0
1268,1269,7707,0.054750658073534006,3.5194444444444,84,2212,0
1269,1270,7726,0.030588852697706,3.8130555555556,90,2286,0
1270,1271,7717,0.12998124134227002,3.7941666666667,80,2979,0
1271,1272,10331,0.09100057249197198,3.6086111111111,90,3158,0
1272,1273,10515,0.19464543002904006,3.3858333333333,84,2645,0
1273,1274,10415,0.22178651521516,3.3336111111111,34,3161,0
1274,1275,10387,0.22983578430825,3.3116666666667003,67,4460,0
1275,1276,10471,0.298229429356,3.2616666666667005,74,2630,0
1276,1277,10385,0.12923377484588,3.0044444444444003,44,2593,0
1277,1278,10439,0.19609416059774,2.6741666666667,64,2625,0
1278,1279,10516,0.040518533819385014,2.3191666666667,70,4834,0
1279,1280,10587,0.07099894663641,2.0597222222222,96,4056,0
1280,1281,10586,0.07584150637714701,2.0547222222222,110,5713,0
1281,1282,10684,0.08180100127782801,2.1511111111111,68,3940,0
1282,1283,10880,0.0,2.2602777777778,90,4414,0
1283,1284,10830,0.0,2.2883333333333,90,5044,0
1284,1285,10794,0.09140162014739303,2.3736111111111,69,3894,0
1285,1286,10843,0.0,2.5869444444444,46,3993,0
1286,1287,10805,0.0,2.6480555555556,74,4404,0
1287,1288,10996,0.0,2.6077777777777995,68,4072,0
1288,1289,11327,0.05363316840061,2.6069444444444,67,4182,0
1289,1290,11090,0.26818151064716,2.6908333333333,51,3351,0
1290,1291,10578,0.21887772653901,2.9019444444444003,39,4183,0
1291,1292,10528,0.32371296573811,3.2711111111111,26,4068,0
1292,1293,10475,0.12565805017257,3.5872222222222,25,8139,0
1293,1294,10664,0.092277247744574,3.6913888888889,32,11000,0
1294,1295,10513,0.077016875742983,3.6313888888889,17,2975,0
1295,1296,9072,0.3714480797312501,3.5605555555556,19,2692,0
1296,1297,9069,0.19332372237792,3.4402777777778,16,2502,0
1297,1298,9089,0.06345811641554701,3.35,28,2510,0
1298,1299,9027,0.2267121559473,3.3469444444444,24,2663,0
1299,1300,8969,0.053072279964629,3.2708333333333,35,3575,0
1300,1301,9073,0.13336345197744,3.2519444444444,49,2586,0
1301,1302,8957,0.1252855094715,2.7311111111111,106,2908,0
1302,1303,9126,0.096211952864224,2.3875,80,3530,0
1303,1304,9122,0.096524467517755,2.0847222222222,90,2776,0
1304,1305,9231,0.08924770147957402,2.0975,169,2962,0
1305,1306,9368,0.11889606284162,2.1763888888889,98,3441,0
1306,1307,9458,0.031429841710104,2.2327777777777995,92,4376,0
1307,1308,9463,0.0,2.2725,91,3857,0
1308,1309,9356,0.036512411627868,2.3202777777778,99,4685,0
1309,1310,9340,0.0,2.5425,90,4585,0
1310,1311,9340,0.0,2.5986111111111,126,3542,0
1311,1312,9276,0.0,2.6319444444444,102,3370,0
1312,1313,9611,0.10106696361212,2.5836111111111,132,3515,0
1313,1314,9532,0.14854949043035,2.675,88,3793,0
1314,1315,9156,0.08612162048398897,2.8522222222222,135,2954,0
1315,1316,9222,0.16494200410492002,3.1302777777778,114,2627,0
1316,1317,9282,0.28637713141253,3.4805555555556,35,2550,0
1317,1318,9573,0.13206535647488,3.5994444444444,24,2480,0
1318,1319,9333,0.27364025607799,3.5847222222222,44,2521,0
1319,1320,9987,0.38382339961227,3.4963888888889,26,2860,0
1320,1321,10133,0.08426242877623301,3.3825,37,3675,0
1321,1322,10010,0.3290413568025901,3.2694444444444,45,2704,0
1322,1323,10028,0.22632868808708,3.2322222222222,42,3121,0
1323,1324,9984,0.17914189971361,3.1936111111111005,47,2603,0
1324,1325,10041,0.30046815361859003,3.0536111111111004,34,3984,0
1325,1326,10072,0.22650915594248,2.7819444444444,56,2537,0
1326,1327,10025,0.0,2.4152777777778,87,3349,0
1327,1328,10116,0.1223093269317,2.1569444444444,74,3958,0
1328,1329,10232,0.1696074188221,2.1125,90,4243,0
1329,1330,10516,0.0,2.1833333333333003,79,4159,0
1330,1331,10449,0.028193633007367,2.205,97,5637,0
1331,1332,10598,0.0,2.1697222222222,90,8142,0
1332,1333,10337,0.0,2.3075,77,5713,0
1333,1334,10469,0.097305232437507,2.4575,101,3668,0
1334,1335,10426,0.11905908868379,2.6077777777777995,74,4307,0
1335,1336,10531,0.11660374103282,2.6275,439,4354,0
1336,1337,10875,0.060474297756584014,2.6144444444444,79,4262,0
1337,1338,10494,0.22568442027805,2.6477777777777995,165,3446,0
1338,1339,10195,0.14077736537045002,2.8594444444444003,139,2677,0
1339,1340,9918,0.1924574892026,3.2675,56,4450,0
1340,1341,9889,0.18922597300629,3.5136111111111004,102,3044,0
1341,1342,9947,0.041593949118095004,3.5725,101,3428,0
1342,1343,9977,0.2502095174271,3.6863888888889,41,2845,0
1343,1344,10835,0.18663972932643,3.5636111111111,94,2781,0
1344,1345,10765,0.07351854082400297,3.4127777777778,116,2743,0
1345,1346,10656,0.081949111399618,3.295,94,4470,0
1346,1347,10485,0.20148511394009,3.2666666666667004,89,2596,0
1347,1348,10681,0.11515101921294,3.1933333333333,141,3249,0
1348,1349,10852,0.07797276382811,3.0688888888889,167,2529,0
1349,1350,10728,0.07244862879413201,2.8102777777778,148,2452,0
1350,1351,10874,0.07310929970435699,2.42,105,2934,0
1351,1352,10964,0.066868365737218,2.1358333333333,210,3159,0
1352,1353,10984,0.05788512501593701,1.9916666666667,145,3974,0
1353,1354,11055,0.09727414207464803,2.0947222222222,136,4305,0
1354,1355,11233,0.033270317741558,2.1591666666667,126,5012,0
1355,1356,11161,0.0,2.2377777777778,157,4455,0
1356,1357,10966,0.038270957919533,2.2511111111111,105,4108,0
1357,1358,11193,0.08728058888363299,2.4208333333333,114,4339,0
1358,1359,11167,0.10536774813238,2.5241666666667,104,5056,0
1359,1360,11367,0.1233991317089,2.5794444444444,69,5573,0
1360,1361,51251,0.042565915766552,2.5936111111111,75,3366,1
1361,1362,17953,0.23147422367229,2.6830555555556,73,2559,1
1362,1363,170029,0.08983405162538903,2.8188888888889,74,1999,1
1363,1364,10955,0.07464756469365201,2.9513888888888995,126,1993,0
1364,1365,10984,0.099244104918934,3.2830555555556,67,1913,0
1365,1366,10964,0.11535172009194,3.4819444444444,32,1760,0
1366,1367,10980,0.21774881707852,3.5886111111111005,38,1890,0
1367,1368,10852,0.1305066423559,3.4836111111111,34,2469,0
1368,1369,10786,0.10054853030204,3.3955555555556,36,2133,0
1369,1370,10841,0.02468393737575,3.2847222222222,26,3359,0
1370,1371,10762,0.10018007414459,3.2383333333332995,74,3783,0
1371,1372,10419,0.12522619841308,3.2188888888889,85,1809,0
1372,1373,10467,0.11781887197077,2.9483333333333,67,2143,0
1373,1374,10502,0.13417256350298,2.5855555555556,84,2567,0
1374,1375,10519,0.07474686582090599,2.3005555555556003,1630,2176,0
1375,1376,10579,0.13570963056519,2.0855555555556,1435,1929,0
1376,1377,10502,0.076431907457478,1.9027777777778,857,2244,0
1377,1378,10661,0.0,1.9411111111111,31,1810,0
1378,1379,10818,0.1936428046839,2.0444444444444,500,2088,0
1379,1380,10918,0.052826773889684014,2.1363888888889,53,2371,0
1380,1381,10871,0.0,2.22,61,1843,0
1381,1382,10796,0.054466597481213,2.3530555555556,158,2668,0
1382,1383,10774,0.057459020289436,2.545,184,2309,0
1383,1384,10898,0.28750562005936,2.6202777777778,91,1998,0
1384,1385,11442,0.075538554674309,2.6847222222222,60,2480,0
1385,1386,11113,0.08112608570492501,2.6591666666667004,107,2147,0
1386,1387,10888,0.21563803296368,2.7863888888888995,5157,1802,0
1387,1388,10894,0.095725002305685,3.0269444444444003,28,1789,0
1388,1389,10888,0.17516056892320994,3.3227777777778,24,1999,0
1389,1390,10896,0.32902836018586,3.6097222222222,21,2142,0
1390,1391,10800,0.10216065221678,3.6805555555556,12,1904,0
1391,1392,11000,0.19741931250852,3.6075,24,1876,0
1392,1393,10985,0.10149107903671,3.4091666666667004,17,2434,0
1393,1394,11017,0.17479255893624,3.3666666666667004,48,2472,0
1394,1395,10863,0.034385029573777,3.3158333333333,41,1744,0
1395,1396,10875,0.21988771218053,3.1622222222222,1088,2404,0
1396,1397,10987,0.10149107903671,3.1086111111111,68,1971,0
1397,1398,10778,0.10269981175445,2.6552777777778,2575,1713,0
1398,1399,10957,0.11258759940039,2.2730555555556,4688,1765,0
1399,1400,10832,0.13022351806001,2.0591666666667,477,3156,0

+ 0
- 1261
datasets/anomaly_reserve/yahoo_sub_5/SCORE/problem_TEST/dataSplits.csv
File diff suppressed because it is too large
View File


+ 0
- 65
datasets/anomaly_reserve/yahoo_sub_5/SCORE/problem_TEST/problemDoc.json View File

@@ -1,65 +0,0 @@
{
"about": {
"problemID": "yahoo_sub_5_problem",
"problemName": "yahoo_sub_5_problem",
"problemDescription": "Anomaly detection",
"problemVersion": "4.0.0",
"problemSchemaVersion": "4.0.0",
"taskKeywords": [
"classification",
"binary",
"tabular"
]
},
"inputs": {
"data": [
{
"datasetID": "yahoo_sub_5_dataset",
"targets": [
{
"targetIndex": 0,
"resID": "learningData",
"colIndex": 7,
"colName": "ground_truth"
}
]
}
],
"dataSplits": {
"method": "holdOut",
"testSize": 0.2,
"stratified": true,
"numRepeats": 0,
"randomSeed": 42,
"splitsFile": "dataSplits.csv",
"datasetViewMaps": {
"train": [
{
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TRAIN"
}
],
"test": [
{
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TEST"
}
],
"score": [
{
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_SCORE"
}
]
}
},
"performanceMetrics": [
{
"metric": "f1Macro"
}
]
},
"expectedOutputs": {
"predictionsFile": "predictions.csv"
}
}

+ 0
- 0
datasets/anomaly_reserve/yahoo_sub_5/SCORE/targets.csv View File


+ 0
- 95
datasets/anomaly_reserve/yahoo_sub_5/TEST/dataset_TEST/datasetDoc.json View File

@@ -1,95 +0,0 @@
{
"about": {
"datasetID": "yahoo_sub_5_dataset_TEST",
"datasetName": "NULL",
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'",
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ",
"license": " CC Public Domain Mark 1.0 ",
"source": "OpenML",
"sourceURI": "http://www.openml.org/d/185",
"approximateSize": "",
"datasetSchemaVersion": "4.0.0",
"redacted": false,
"datasetVersion": "4.0.0"
},
"dataResources": [
{
"resID": "learningData",
"resPath": "tables/learningData.csv",
"resType": "table",
"resFormat": {
"text/csv": [
"csv"
]
},
"isCollection": false,
"columns": [
{
"colIndex": 0,
"colName": "d3mIndex",
"colType": "integer",
"role": [
"index"
]
},
{
"colIndex": 1,
"colName": "timestamp",
"colType": "integer",
"role": [
"attribute"
]
},
{
"colIndex": 2,
"colName": "value_0",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 3,
"colName": "value_1",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 4,
"colName": "value_2",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 5,
"colName": "value_3",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 6,
"colName": "value_4",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 7,
"colName": "ground_truth",
"colType": "integer",
"role": [
"suggestedTarget"
]
}
],
"columnsCount": 8
}
]
}

+ 0
- 141
datasets/anomaly_reserve/yahoo_sub_5/TEST/dataset_TEST/tables/learningData.csv View File

@@ -1,141 +0,0 @@
d3mIndex,timestamp,value_0,value_1,value_2,value_3,value_4,ground_truth
1260,1261,7782,0.034280386319742985,2.5072222222222003,104,3119,0
1261,1262,7829,0.039360296791109,2.5927777777778,82,3590,0
1262,1263,7902,0.0,2.6894444444444,208,3893,0
1263,1264,8039,0.038944065994356014,2.6291666666667,92,3264,0
1264,1265,8350,0.18176011684739,2.6469444444444,53,3963,0
1265,1266,8142,0.18521047165852,2.7461111111111003,65,2757,0
1266,1267,7886,0.13079770999921,2.9363888888889,62,2306,0
1267,1268,7743,0.13310058077443,3.2797222222222,73,2549,0
1268,1269,7707,0.054750658073534006,3.5194444444444,84,2212,0
1269,1270,7726,0.030588852697706,3.8130555555556,90,2286,0
1270,1271,7717,0.12998124134227002,3.7941666666667,80,2979,0
1271,1272,10331,0.09100057249197198,3.6086111111111,90,3158,0
1272,1273,10515,0.19464543002904006,3.3858333333333,84,2645,0
1273,1274,10415,0.22178651521516,3.3336111111111,34,3161,0
1274,1275,10387,0.22983578430825,3.3116666666667003,67,4460,0
1275,1276,10471,0.298229429356,3.2616666666667005,74,2630,0
1276,1277,10385,0.12923377484588,3.0044444444444003,44,2593,0
1277,1278,10439,0.19609416059774,2.6741666666667,64,2625,0
1278,1279,10516,0.040518533819385014,2.3191666666667,70,4834,0
1279,1280,10587,0.07099894663641,2.0597222222222,96,4056,0
1280,1281,10586,0.07584150637714701,2.0547222222222,110,5713,0
1281,1282,10684,0.08180100127782801,2.1511111111111,68,3940,0
1282,1283,10880,0.0,2.2602777777778,90,4414,0
1283,1284,10830,0.0,2.2883333333333,90,5044,0
1284,1285,10794,0.09140162014739303,2.3736111111111,69,3894,0
1285,1286,10843,0.0,2.5869444444444,46,3993,0
1286,1287,10805,0.0,2.6480555555556,74,4404,0
1287,1288,10996,0.0,2.6077777777777995,68,4072,0
1288,1289,11327,0.05363316840061,2.6069444444444,67,4182,0
1289,1290,11090,0.26818151064716,2.6908333333333,51,3351,0
1290,1291,10578,0.21887772653901,2.9019444444444003,39,4183,0
1291,1292,10528,0.32371296573811,3.2711111111111,26,4068,0
1292,1293,10475,0.12565805017257,3.5872222222222,25,8139,0
1293,1294,10664,0.092277247744574,3.6913888888889,32,11000,0
1294,1295,10513,0.077016875742983,3.6313888888889,17,2975,0
1295,1296,9072,0.3714480797312501,3.5605555555556,19,2692,0
1296,1297,9069,0.19332372237792,3.4402777777778,16,2502,0
1297,1298,9089,0.06345811641554701,3.35,28,2510,0
1298,1299,9027,0.2267121559473,3.3469444444444,24,2663,0
1299,1300,8969,0.053072279964629,3.2708333333333,35,3575,0
1300,1301,9073,0.13336345197744,3.2519444444444,49,2586,0
1301,1302,8957,0.1252855094715,2.7311111111111,106,2908,0
1302,1303,9126,0.096211952864224,2.3875,80,3530,0
1303,1304,9122,0.096524467517755,2.0847222222222,90,2776,0
1304,1305,9231,0.08924770147957402,2.0975,169,2962,0
1305,1306,9368,0.11889606284162,2.1763888888889,98,3441,0
1306,1307,9458,0.031429841710104,2.2327777777777995,92,4376,0
1307,1308,9463,0.0,2.2725,91,3857,0
1308,1309,9356,0.036512411627868,2.3202777777778,99,4685,0
1309,1310,9340,0.0,2.5425,90,4585,0
1310,1311,9340,0.0,2.5986111111111,126,3542,0
1311,1312,9276,0.0,2.6319444444444,102,3370,0
1312,1313,9611,0.10106696361212,2.5836111111111,132,3515,0
1313,1314,9532,0.14854949043035,2.675,88,3793,0
1314,1315,9156,0.08612162048398897,2.8522222222222,135,2954,0
1315,1316,9222,0.16494200410492002,3.1302777777778,114,2627,0
1316,1317,9282,0.28637713141253,3.4805555555556,35,2550,0
1317,1318,9573,0.13206535647488,3.5994444444444,24,2480,0
1318,1319,9333,0.27364025607799,3.5847222222222,44,2521,0
1319,1320,9987,0.38382339961227,3.4963888888889,26,2860,0
1320,1321,10133,0.08426242877623301,3.3825,37,3675,0
1321,1322,10010,0.3290413568025901,3.2694444444444,45,2704,0
1322,1323,10028,0.22632868808708,3.2322222222222,42,3121,0
1323,1324,9984,0.17914189971361,3.1936111111111005,47,2603,0
1324,1325,10041,0.30046815361859003,3.0536111111111004,34,3984,0
1325,1326,10072,0.22650915594248,2.7819444444444,56,2537,0
1326,1327,10025,0.0,2.4152777777778,87,3349,0
1327,1328,10116,0.1223093269317,2.1569444444444,74,3958,0
1328,1329,10232,0.1696074188221,2.1125,90,4243,0
1329,1330,10516,0.0,2.1833333333333003,79,4159,0
1330,1331,10449,0.028193633007367,2.205,97,5637,0
1331,1332,10598,0.0,2.1697222222222,90,8142,0
1332,1333,10337,0.0,2.3075,77,5713,0
1333,1334,10469,0.097305232437507,2.4575,101,3668,0
1334,1335,10426,0.11905908868379,2.6077777777777995,74,4307,0
1335,1336,10531,0.11660374103282,2.6275,439,4354,0
1336,1337,10875,0.060474297756584014,2.6144444444444,79,4262,0
1337,1338,10494,0.22568442027805,2.6477777777777995,165,3446,0
1338,1339,10195,0.14077736537045002,2.8594444444444003,139,2677,0
1339,1340,9918,0.1924574892026,3.2675,56,4450,0
1340,1341,9889,0.18922597300629,3.5136111111111004,102,3044,0
1341,1342,9947,0.041593949118095004,3.5725,101,3428,0
1342,1343,9977,0.2502095174271,3.6863888888889,41,2845,0
1343,1344,10835,0.18663972932643,3.5636111111111,94,2781,0
1344,1345,10765,0.07351854082400297,3.4127777777778,116,2743,0
1345,1346,10656,0.081949111399618,3.295,94,4470,0
1346,1347,10485,0.20148511394009,3.2666666666667004,89,2596,0
1347,1348,10681,0.11515101921294,3.1933333333333,141,3249,0
1348,1349,10852,0.07797276382811,3.0688888888889,167,2529,0
1349,1350,10728,0.07244862879413201,2.8102777777778,148,2452,0
1350,1351,10874,0.07310929970435699,2.42,105,2934,0
1351,1352,10964,0.066868365737218,2.1358333333333,210,3159,0
1352,1353,10984,0.05788512501593701,1.9916666666667,145,3974,0
1353,1354,11055,0.09727414207464803,2.0947222222222,136,4305,0
1354,1355,11233,0.033270317741558,2.1591666666667,126,5012,0
1355,1356,11161,0.0,2.2377777777778,157,4455,0
1356,1357,10966,0.038270957919533,2.2511111111111,105,4108,0
1357,1358,11193,0.08728058888363299,2.4208333333333,114,4339,0
1358,1359,11167,0.10536774813238,2.5241666666667,104,5056,0
1359,1360,11367,0.1233991317089,2.5794444444444,69,5573,0
1360,1361,51251,0.042565915766552,2.5936111111111,75,3366,1
1361,1362,17953,0.23147422367229,2.6830555555556,73,2559,1
1362,1363,170029,0.08983405162538903,2.8188888888889,74,1999,1
1363,1364,10955,0.07464756469365201,2.9513888888888995,126,1993,0
1364,1365,10984,0.099244104918934,3.2830555555556,67,1913,0
1365,1366,10964,0.11535172009194,3.4819444444444,32,1760,0
1366,1367,10980,0.21774881707852,3.5886111111111005,38,1890,0
1367,1368,10852,0.1305066423559,3.4836111111111,34,2469,0
1368,1369,10786,0.10054853030204,3.3955555555556,36,2133,0
1369,1370,10841,0.02468393737575,3.2847222222222,26,3359,0
1370,1371,10762,0.10018007414459,3.2383333333332995,74,3783,0
1371,1372,10419,0.12522619841308,3.2188888888889,85,1809,0
1372,1373,10467,0.11781887197077,2.9483333333333,67,2143,0
1373,1374,10502,0.13417256350298,2.5855555555556,84,2567,0
1374,1375,10519,0.07474686582090599,2.3005555555556003,1630,2176,0
1375,1376,10579,0.13570963056519,2.0855555555556,1435,1929,0
1376,1377,10502,0.076431907457478,1.9027777777778,857,2244,0
1377,1378,10661,0.0,1.9411111111111,31,1810,0
1378,1379,10818,0.1936428046839,2.0444444444444,500,2088,0
1379,1380,10918,0.052826773889684014,2.1363888888889,53,2371,0
1380,1381,10871,0.0,2.22,61,1843,0
1381,1382,10796,0.054466597481213,2.3530555555556,158,2668,0
1382,1383,10774,0.057459020289436,2.545,184,2309,0
1383,1384,10898,0.28750562005936,2.6202777777778,91,1998,0
1384,1385,11442,0.075538554674309,2.6847222222222,60,2480,0
1385,1386,11113,0.08112608570492501,2.6591666666667004,107,2147,0
1386,1387,10888,0.21563803296368,2.7863888888888995,5157,1802,0
1387,1388,10894,0.095725002305685,3.0269444444444003,28,1789,0
1388,1389,10888,0.17516056892320994,3.3227777777778,24,1999,0
1389,1390,10896,0.32902836018586,3.6097222222222,21,2142,0
1390,1391,10800,0.10216065221678,3.6805555555556,12,1904,0
1391,1392,11000,0.19741931250852,3.6075,24,1876,0
1392,1393,10985,0.10149107903671,3.4091666666667004,17,2434,0
1393,1394,11017,0.17479255893624,3.3666666666667004,48,2472,0
1394,1395,10863,0.034385029573777,3.3158333333333,41,1744,0
1395,1396,10875,0.21988771218053,3.1622222222222,1088,2404,0
1396,1397,10987,0.10149107903671,3.1086111111111,68,1971,0
1397,1398,10778,0.10269981175445,2.6552777777778,2575,1713,0
1398,1399,10957,0.11258759940039,2.2730555555556,4688,1765,0
1399,1400,10832,0.13022351806001,2.0591666666667,477,3156,0

+ 0
- 1261
datasets/anomaly_reserve/yahoo_sub_5/TEST/problem_TEST/dataSplits.csv
File diff suppressed because it is too large
View File


+ 0
- 65
datasets/anomaly_reserve/yahoo_sub_5/TEST/problem_TEST/problemDoc.json View File

@@ -1,65 +0,0 @@
{
"about": {
"problemID": "yahoo_sub_5_problem",
"problemName": "yahoo_sub_5_problem",
"problemDescription": "Anomaly detection",
"problemVersion": "4.0.0",
"problemSchemaVersion": "4.0.0",
"taskKeywords": [
"classification",
"binary",
"tabular"
]
},
"inputs": {
"data": [
{
"datasetID": "yahoo_sub_5_dataset",
"targets": [
{
"targetIndex": 0,
"resID": "learningData",
"colIndex": 7,
"colName": "ground_truth"
}
]
}
],
"dataSplits": {
"method": "holdOut",
"testSize": 0.2,
"stratified": true,
"numRepeats": 0,
"randomSeed": 42,
"splitsFile": "dataSplits.csv",
"datasetViewMaps": {
"train": [
{
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TRAIN"
}
],
"test": [
{
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TEST"
}
],
"score": [
{
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_SCORE"
}
]
}
},
"performanceMetrics": [
{
"metric": "f1Macro"
}
]
},
"expectedOutputs": {
"predictionsFile": "predictions.csv"
}
}

+ 0
- 95
datasets/anomaly_reserve/yahoo_sub_5/TRAIN/dataset_TRAIN/datasetDoc.json View File

@@ -1,95 +0,0 @@
{
"about": {
"datasetID": "yahoo_sub_5_dataset_TRAIN",
"datasetName": "NULL",
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'",
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ",
"license": " CC Public Domain Mark 1.0 ",
"source": "OpenML",
"sourceURI": "http://www.openml.org/d/185",
"approximateSize": "",
"datasetSchemaVersion": "4.0.0",
"redacted": false,
"datasetVersion": "4.0.0"
},
"dataResources": [
{
"resID": "learningData",
"resPath": "tables/learningData.csv",
"resType": "table",
"resFormat": {
"text/csv": [
"csv"
]
},
"isCollection": false,
"columns": [
{
"colIndex": 0,
"colName": "d3mIndex",
"colType": "integer",
"role": [
"index"
]
},
{
"colIndex": 1,
"colName": "timestamp",
"colType": "integer",
"role": [
"attribute"
]
},
{
"colIndex": 2,
"colName": "value_0",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 3,
"colName": "value_1",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 4,
"colName": "value_2",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 5,
"colName": "value_3",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 6,
"colName": "value_4",
"colType": "real",
"role": [
"attribute"
]
},
{
"colIndex": 7,
"colName": "ground_truth",
"colType": "integer",
"role": [
"suggestedTarget"
]
}
],
"columnsCount": 8
}
]
}

+ 0
- 1261
datasets/anomaly_reserve/yahoo_sub_5/TRAIN/dataset_TRAIN/tables/learningData.csv
File diff suppressed because it is too large
View File


+ 0
- 1261
datasets/anomaly_reserve/yahoo_sub_5/TRAIN/problem_TRAIN/dataSplits.csv
File diff suppressed because it is too large
View File


+ 0
- 65
datasets/anomaly_reserve/yahoo_sub_5/TRAIN/problem_TRAIN/problemDoc.json View File

@@ -1,65 +0,0 @@
{
"about": {
"problemID": "yahoo_sub_5_problem",
"problemName": "yahoo_sub_5_problem",
"problemDescription": "Anomaly detection",
"problemVersion": "4.0.0",
"problemSchemaVersion": "4.0.0",
"taskKeywords": [
"classification",
"binary",
"tabular"
]
},
"inputs": {
"data": [
{
"datasetID": "yahoo_sub_5_dataset",
"targets": [
{
"targetIndex": 0,
"resID": "learningData",
"colIndex": 7,
"colName": "ground_truth"
}
]
}
],
"dataSplits": {
"method": "holdOut",
"testSize": 0.2,
"stratified": true,
"numRepeats": 0,
"randomSeed": 42,
"splitsFile": "dataSplits.csv",
"datasetViewMaps": {
"train": [
{
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TRAIN"
}
],
"test": [
{
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_TEST"
}
],
"score": [
{
"from": "yahoo_sub_5_dataset",
"to": "yahoo_sub_5_dataset_SCORE"
}
]
}
},
"performanceMetrics": [
{
"metric": "f1Macro"
}
]
},
"expectedOutputs": {
"predictionsFile": "predictions.csv"
}
}

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save