Browse Source

add NAB dataset

Former-commit-id: 144fc3f789 [formerly 51ee914920] [formerly ef80542af0 [formerly 3e0aa0de57]] [formerly 902f3a7279 [formerly c84725dc68] [formerly f2e4714c60 [formerly 507d6b4abb]]] [formerly f8d6b8d7c7 [formerly be31038669] [formerly 1aca4b0620 [formerly e395eab9f6]] [formerly cb9d203e2d [formerly 938f3b5551] [formerly 2fe834d1b8 [formerly 0ceefa541a]]]] [formerly 740419315b [formerly 18e5437ef2] [formerly 26d627a136 [formerly c4e982cf50]] [formerly 9c49820d64 [formerly 03fa79cd28] [formerly 7bf4a741cf [formerly 9999d83ba0]]] [formerly 83a0decfae [formerly e7b1bb09d4] [formerly 028619d32f [formerly 69cd27d9e9]] [formerly 6bbd134338 [formerly da00923e8e] [formerly 7c4f8e6a2b [formerly 0fecb333a6]]]]]
Former-commit-id: a85941b3b9 [formerly 727065a440] [formerly a61c183d8a [formerly 4f17c638c3]] [formerly e32dde3960 [formerly 632c8bbee0] [formerly 09de65e17c [formerly 42cfe67a0d]]] [formerly 4e7a053e69 [formerly 73e57ce3b4] [formerly 5e709a3250 [formerly 38e1eb2854]] [formerly 2a082a48c9 [formerly 6adba8cc00] [formerly 7c4f8e6a2b]]]
Former-commit-id: 88c2e0138b [formerly 89b05b68de] [formerly f063ba3de6 [formerly ccba2c2a90]] [formerly 4267e27b5c [formerly a823fbd485] [formerly 08148f74ca [formerly 90494e66f9]]]
Former-commit-id: e549c88464 [formerly 88a7e05850] [formerly de4e53722a [formerly f7fdd01bc0]]
Former-commit-id: 2a2faafdba [formerly e21cfea3eb]
Former-commit-id: e29f2f324b
master
lhenry15 4 years ago
parent
commit
ecf8b4a8dc
63 changed files with 148983 additions and 4 deletions
  1. +68
    -0
      datasets/NAB/README.md
  2. +20
    -0
      datasets/NAB/add_label.py
  3. +4033
    -0
      datasets/NAB/artificialNoAnomaly/labeled_art_daily_no_noise.csv
  4. +4033
    -0
      datasets/NAB/artificialNoAnomaly/labeled_art_daily_perfect_square_wave.csv
  5. +4033
    -0
      datasets/NAB/artificialNoAnomaly/labeled_art_daily_small_noise.csv
  6. +4033
    -0
      datasets/NAB/artificialNoAnomaly/labeled_art_flatline.csv
  7. +4033
    -0
      datasets/NAB/artificialNoAnomaly/labeled_art_noisy.csv
  8. +4033
    -0
      datasets/NAB/artificialWithAnomaly/labeled_art_daily_flatmiddle.csv
  9. +4033
    -0
      datasets/NAB/artificialWithAnomaly/labeled_art_daily_jumpsdown.csv
  10. +4033
    -0
      datasets/NAB/artificialWithAnomaly/labeled_art_daily_jumpsup.csv
  11. +4033
    -0
      datasets/NAB/artificialWithAnomaly/labeled_art_daily_nojump.csv
  12. +4033
    -0
      datasets/NAB/artificialWithAnomaly/labeled_art_increase_spike_density.csv
  13. +4033
    -0
      datasets/NAB/artificialWithAnomaly/labeled_art_load_balancer_spikes.csv
  14. +232
    -0
      datasets/NAB/combined_labels.json
  15. +4033
    -0
      datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_24ae8d.csv
  16. +4033
    -0
      datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_53ea38.csv
  17. +4033
    -0
      datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_5f5533.csv
  18. +4033
    -0
      datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_77c1ca.csv
  19. +4033
    -0
      datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_825cc2.csv
  20. +4033
    -0
      datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_ac20cd.csv
  21. +4033
    -0
      datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_c6585a.csv
  22. +4033
    -0
      datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_fe7f93.csv
  23. +4731
    -0
      datasets/NAB/realAWSCloudwatch/labeled_ec2_disk_write_bytes_1ef3de.csv
  24. +4033
    -0
      datasets/NAB/realAWSCloudwatch/labeled_ec2_disk_write_bytes_c0d644.csv
  25. +4033
    -0
      datasets/NAB/realAWSCloudwatch/labeled_ec2_network_in_257a54.csv
  26. +4731
    -0
      datasets/NAB/realAWSCloudwatch/labeled_ec2_network_in_5abac7.csv
  27. +4033
    -0
      datasets/NAB/realAWSCloudwatch/labeled_elb_request_count_8c0756.csv
  28. +4622
    -0
      datasets/NAB/realAWSCloudwatch/labeled_grok_asg_anomaly.csv
  29. +1244
    -0
      datasets/NAB/realAWSCloudwatch/labeled_iio_us-east-1_i-a2eb1cd9_NetworkIn.csv
  30. +4033
    -0
      datasets/NAB/realAWSCloudwatch/labeled_rds_cpu_utilization_cc0c53.csv
  31. +4033
    -0
      datasets/NAB/realAWSCloudwatch/labeled_rds_cpu_utilization_e47b3b.csv
  32. +1625
    -0
      datasets/NAB/realAdExchange/labeled_exchange-2_cpc_results.csv
  33. +1625
    -0
      datasets/NAB/realAdExchange/labeled_exchange-2_cpm_results.csv
  34. +1539
    -0
      datasets/NAB/realAdExchange/labeled_exchange-3_cpc_results.csv
  35. +1539
    -0
      datasets/NAB/realAdExchange/labeled_exchange-3_cpm_results.csv
  36. +1644
    -0
      datasets/NAB/realAdExchange/labeled_exchange-4_cpc_results.csv
  37. +1644
    -0
      datasets/NAB/realAdExchange/labeled_exchange-4_cpm_results.csv
  38. +1
    -0
      datasets/NAB/realKnownCause/labeled_ambient_temperature_system_failure.csv.REMOVED.git-id
  39. +1
    -0
      datasets/NAB/realKnownCause/labeled_cpu_utilization_asg_misconfiguration.csv.REMOVED.git-id
  40. +4033
    -0
      datasets/NAB/realKnownCause/labeled_ec2_request_latency_system_failure.csv
  41. +1
    -0
      datasets/NAB/realKnownCause/labeled_machine_temperature_system_failure.csv.REMOVED.git-id
  42. +1
    -0
      datasets/NAB/realKnownCause/labeled_nyc_taxi.csv.REMOVED.git-id
  43. +1883
    -0
      datasets/NAB/realKnownCause/labeled_rogue_agent_key_hold.csv
  44. +5316
    -0
      datasets/NAB/realKnownCause/labeled_rogue_agent_key_updown.csv
  45. +2501
    -0
      datasets/NAB/realTraffic/labeled_TravelTime_387.csv
  46. +2163
    -0
      datasets/NAB/realTraffic/labeled_TravelTime_451.csv
  47. +2381
    -0
      datasets/NAB/realTraffic/labeled_occupancy_6005.csv
  48. +2501
    -0
      datasets/NAB/realTraffic/labeled_occupancy_t4013.csv
  49. +2501
    -0
      datasets/NAB/realTraffic/labeled_speed_6005.csv
  50. +1128
    -0
      datasets/NAB/realTraffic/labeled_speed_7578.csv
  51. +2496
    -0
      datasets/NAB/realTraffic/labeled_speed_t4013.csv
  52. +1
    -0
      datasets/NAB/realTweets/labeled_Twitter_volume_AAPL.csv.REMOVED.git-id
  53. +1
    -0
      datasets/NAB/realTweets/labeled_Twitter_volume_AMZN.csv.REMOVED.git-id
  54. +1
    -0
      datasets/NAB/realTweets/labeled_Twitter_volume_CRM.csv.REMOVED.git-id
  55. +1
    -0
      datasets/NAB/realTweets/labeled_Twitter_volume_CVS.csv.REMOVED.git-id
  56. +1
    -0
      datasets/NAB/realTweets/labeled_Twitter_volume_FB.csv.REMOVED.git-id
  57. +1
    -0
      datasets/NAB/realTweets/labeled_Twitter_volume_GOOG.csv.REMOVED.git-id
  58. +1
    -0
      datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv.REMOVED.git-id
  59. +1
    -0
      datasets/NAB/realTweets/labeled_Twitter_volume_KO.csv.REMOVED.git-id
  60. +1
    -0
      datasets/NAB/realTweets/labeled_Twitter_volume_PFE.csv.REMOVED.git-id
  61. +1
    -0
      datasets/NAB/realTweets/labeled_Twitter_volume_UPS.csv.REMOVED.git-id
  62. +8
    -2
      examples/run_automl.py
  63. +2
    -2
      tods/tods/search/brute_force_search.py

+ 68
- 0
datasets/NAB/README.md View File

@@ -0,0 +1,68 @@
NAB Data Corpus
---

Data are ordered, timestamped, single-valued metrics. All data files contain anomalies, unless otherwise noted.


### Real data
- realAWSCloudwatch/

AWS server metrics as collected by the AmazonCloudwatch service. Example metrics include CPU Utilization, Network Bytes In, and Disk Read Bytes.

- realAdExchange/
Online advertisement clicking rates, where the metrics are cost-per-click (CPC) and cost per thousand impressions (CPM). One of the files is normal, without anomalies.
- realKnownCause/

This is data for which we know the anomaly causes; no hand labeling.
- ambient_temperature_system_failure.csv: The ambient temperature in an office
setting.
- cpu_utilization_asg_misconfiguration.csv: From Amazon Web Services (AWS)
monitoring CPU usage – i.e. average CPU usage across a given cluster. When
usage is high, AWS spins up a new machine, and uses fewer machines when usage
is low.
- ec2_request_latency_system_failure.csv: CPU usage data from a server in
Amazon's East Coast datacenter. The dataset ends with complete system failure
resulting from a documented failure of AWS API servers. There's an interesting
story behind this data in the [Numenta
blog](http://numenta.com/blog/anomaly-of-the-week.html).
- machine_temperature_system_failure.csv: Temperature sensor data of an
internal component of a large, industrial mahcine. The first anomaly is a
planned shutdown of the machine. The second anomaly is difficult to detect and
directly led to the third anomaly, a catastrophic failure of the machine.
- nyc_taxi.csv: Number of NYC taxi passengers, where the five anomalies occur
during the NYC marathon, Thanksgiving, Christmas, New Years day, and a snow
storm. The raw data is from the [NYC Taxi and Limousine Commission](http://www.nyc.gov/html/tlc/html/about/trip_record_data.shtml).
The data file included here consists of aggregating the total number of
taxi passengers into 30 minute buckets.
- rogue_agent_key_hold.csv: Timing the key holds for several users of a
computer, where the anomalies represent a change in the user.
- rogue_agent_key_updown.csv: Timing the key strokes for several users of a
computer, where the anomalies represent a change in the user.

- realTraffic/

Real time traffic data from the Twin Cities Metro area in Minnesota, collected
by the
[Minnesota Department of Transportation](http://www.dot.state.mn.us/tmc/trafficinfo/developers.html).
Included metrics include occupancy, speed, and travel time from specific
sensors.

- realTweets/

A collection of Twitter mentions of large publicly-traded companies
such as Google and IBM. The metric value represents the number of mentions
for a given ticker symbol every 5 minutes.


### Artificial data

- artificialNoAnomaly/

Artificially-generated data without any anomalies.

- artificialWithAnomaly/

Artificially-generated data with varying types of anomalies.

+ 20
- 0
datasets/NAB/add_label.py View File

@@ -0,0 +1,20 @@

import pandas as pd
import json


label_file = open('combined_labels.json', 'r')
label_info = json.load(label_file)

for key in label_info.keys():
df = pd.read_csv(key)
fpath, fname = key.split('/')[0], key.split('/')[1]
label = []
for _, row in df.iterrows():
if row['timestamp'] in list(label_info[key]):
label.append('1')
else:
label.append('0')
df['label'] = label
df.to_csv(fpath+"/labeled_"+fname)


+ 4033
- 0
datasets/NAB/artificialNoAnomaly/labeled_art_daily_no_noise.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/artificialNoAnomaly/labeled_art_daily_perfect_square_wave.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/artificialNoAnomaly/labeled_art_daily_small_noise.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/artificialNoAnomaly/labeled_art_flatline.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/artificialNoAnomaly/labeled_art_noisy.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/artificialWithAnomaly/labeled_art_daily_flatmiddle.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/artificialWithAnomaly/labeled_art_daily_jumpsdown.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/artificialWithAnomaly/labeled_art_daily_jumpsup.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/artificialWithAnomaly/labeled_art_daily_nojump.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/artificialWithAnomaly/labeled_art_increase_spike_density.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/artificialWithAnomaly/labeled_art_load_balancer_spikes.csv
File diff suppressed because it is too large
View File


+ 232
- 0
datasets/NAB/combined_labels.json View File

@@ -0,0 +1,232 @@
{
"artificialNoAnomaly/art_daily_no_noise.csv": [],
"artificialNoAnomaly/art_daily_perfect_square_wave.csv": [],
"artificialNoAnomaly/art_daily_small_noise.csv": [],
"artificialNoAnomaly/art_flatline.csv": [],
"artificialNoAnomaly/art_noisy.csv": [],
"artificialWithAnomaly/art_daily_flatmiddle.csv": [
"2014-04-11 00:00:00"
],
"artificialWithAnomaly/art_daily_jumpsdown.csv": [
"2014-04-11 09:00:00"
],
"artificialWithAnomaly/art_daily_jumpsup.csv": [
"2014-04-11 09:00:00"
],
"artificialWithAnomaly/art_daily_nojump.csv": [
"2014-04-11 09:00:00"
],
"artificialWithAnomaly/art_increase_spike_density.csv": [
"2014-04-07 23:10:00"
],
"artificialWithAnomaly/art_load_balancer_spikes.csv": [
"2014-04-11 04:35:00"
],
"realAWSCloudwatch/ec2_cpu_utilization_24ae8d.csv": [
"2014-02-26 22:05:00",
"2014-02-27 17:15:00"
],
"realAWSCloudwatch/ec2_cpu_utilization_53ea38.csv": [
"2014-02-19 19:10:00",
"2014-02-23 20:05:00"
],
"realAWSCloudwatch/ec2_cpu_utilization_5f5533.csv": [
"2014-02-19 00:22:00",
"2014-02-24 18:37:00"
],
"realAWSCloudwatch/ec2_cpu_utilization_77c1ca.csv": [
"2014-04-09 10:15:00"
],
"realAWSCloudwatch/ec2_cpu_utilization_825cc2.csv": [
"2014-04-15 15:44:00",
"2014-04-16 03:34:00"
],
"realAWSCloudwatch/ec2_cpu_utilization_ac20cd.csv": [
"2014-04-15 00:49:00"
],
"realAWSCloudwatch/ec2_cpu_utilization_c6585a.csv": [],
"realAWSCloudwatch/ec2_cpu_utilization_fe7f93.csv": [
"2014-02-17 06:12:00",
"2014-02-22 00:02:00",
"2014-02-23 15:17:00"
],
"realAWSCloudwatch/ec2_disk_write_bytes_1ef3de.csv": [
"2014-03-10 21:09:00"
],
"realAWSCloudwatch/ec2_disk_write_bytes_c0d644.csv": [
"2014-04-09 01:30:00",
"2014-04-10 14:35:00",
"2014-04-13 03:00:00"
],
"realAWSCloudwatch/ec2_network_in_257a54.csv": [
"2014-04-15 16:44:00"
],
"realAWSCloudwatch/ec2_network_in_5abac7.csv": [
"2014-03-10 18:56:00",
"2014-03-12 21:01:00"
],
"realAWSCloudwatch/elb_request_count_8c0756.csv": [
"2014-04-12 17:24:00",
"2014-04-22 19:34:00"
],
"realAWSCloudwatch/grok_asg_anomaly.csv": [
"2014-01-20 08:30:00",
"2014-01-21 10:45:00",
"2014-01-29 00:45:00"
],
"realAWSCloudwatch/iio_us-east-1_i-a2eb1cd9_NetworkIn.csv": [
"2013-10-10 09:35:00",
"2013-10-10 20:40:00"
],
"realAWSCloudwatch/rds_cpu_utilization_cc0c53.csv": [
"2014-02-25 07:15:00",
"2014-02-27 00:50:00"
],
"realAWSCloudwatch/rds_cpu_utilization_e47b3b.csv": [
"2014-04-13 06:52:00",
"2014-04-18 23:27:00"
],
"realAdExchange/exchange-2_cpc_results.csv": [
"2011-07-14 13:00:01"
],
"realAdExchange/exchange-2_cpm_results.csv": [
"2011-07-26 06:00:01",
"2011-08-10 17:00:01"
],
"realAdExchange/exchange-3_cpc_results.csv": [
"2011-07-14 10:15:01",
"2011-07-20 10:15:01",
"2011-08-13 10:15:01"
],
"realAdExchange/exchange-3_cpm_results.csv": [
"2011-08-19 18:15:01"
],
"realAdExchange/exchange-4_cpc_results.csv": [
"2011-07-16 09:15:01",
"2011-08-02 12:15:01",
"2011-08-23 08:15:01"
],
"realAdExchange/exchange-4_cpm_results.csv": [
"2011-07-16 09:15:01",
"2011-08-01 07:15:01",
"2011-08-23 08:15:01",
"2011-08-28 13:15:01"
],
"realKnownCause/ambient_temperature_system_failure.csv": [
"2013-12-22 20:00:00",
"2014-04-13 09:00:00"
],
"realKnownCause/cpu_utilization_asg_misconfiguration.csv": [
"2014-07-12 02:04:00",
"2014-07-14 21:44:00"
],
"realKnownCause/ec2_request_latency_system_failure.csv": [
"2014-03-14 09:06:00",
"2014-03-18 22:41:00",
"2014-03-21 03:01:00"
],
"realKnownCause/machine_temperature_system_failure.csv": [
"2013-12-11 06:00:00",
"2013-12-16 17:25:00",
"2014-01-28 13:55:00",
"2014-02-08 14:30:00"
],
"realKnownCause/nyc_taxi.csv": [
"2014-11-01 19:00:00",
"2014-11-27 15:30:00",
"2014-12-25 15:00:00",
"2015-01-01 01:00:00",
"2015-01-27 00:00:00"
],
"realKnownCause/rogue_agent_key_hold.csv": [
"2014-07-15 08:30:00",
"2014-07-17 09:50:00"
],
"realKnownCause/rogue_agent_key_updown.csv": [
"2014-07-15 04:00:00",
"2014-07-17 08:50:00"
],
"realTraffic/TravelTime_387.csv": [
"2015-07-30 12:29:00",
"2015-08-18 16:26:00",
"2015-09-01 05:34:00"
],
"realTraffic/TravelTime_451.csv": [
"2015-08-11 12:07:00"
],
"realTraffic/occupancy_6005.csv": [
"2015-09-15 06:55:00"
],
"realTraffic/occupancy_t4013.csv": [
"2015-09-16 08:09:00",
"2015-09-17 07:55:00"
],
"realTraffic/speed_6005.csv": [
"2015-09-17 07:00:00"
],
"realTraffic/speed_7578.csv": [
"2015-09-11 16:44:00",
"2015-09-15 14:34:00",
"2015-09-16 14:14:00",
"2015-09-16 17:10:00"
],
"realTraffic/speed_t4013.csv": [
"2015-09-16 08:04:00",
"2015-09-17 08:15:00"
],
"realTweets/Twitter_volume_AAPL.csv": [
"2015-03-03 21:07:53",
"2015-03-09 17:32:53",
"2015-03-16 02:57:53",
"2015-03-31 03:27:53"
],
"realTweets/Twitter_volume_AMZN.csv": [
"2015-03-05 19:47:53",
"2015-03-11 20:57:53",
"2015-04-01 21:57:53",
"2015-04-08 04:52:53"
],
"realTweets/Twitter_volume_CRM.csv": [
"2015-03-09 19:07:53",
"2015-03-19 23:07:53",
"2015-03-26 19:07:53"
],
"realTweets/Twitter_volume_CVS.csv": [
"2015-03-04 16:02:53",
"2015-03-05 19:57:53",
"2015-03-26 14:07:53",
"2015-04-14 22:37:53"
],
"realTweets/Twitter_volume_FB.csv": [
"2015-03-16 07:07:53",
"2015-04-03 17:47:53"
],
"realTweets/Twitter_volume_GOOG.csv": [
"2015-03-13 20:22:53",
"2015-03-14 16:27:53",
"2015-03-22 22:52:53",
"2015-04-01 05:27:53"
],
"realTweets/Twitter_volume_IBM.csv": [
"2015-03-23 22:27:53",
"2015-04-20 20:07:53"
],
"realTweets/Twitter_volume_KO.csv": [
"2015-03-20 13:12:53",
"2015-04-08 23:42:53",
"2015-04-14 14:52:53"
],
"realTweets/Twitter_volume_PFE.csv": [
"2015-03-02 21:22:53",
"2015-03-04 10:32:53",
"2015-03-13 19:57:53",
"2015-04-07 23:42:53"
],
"realTweets/Twitter_volume_UPS.csv": [
"2015-03-03 00:27:53",
"2015-03-04 11:07:53",
"2015-03-05 15:22:53",
"2015-03-24 18:17:53",
"2015-03-29 16:27:53"
]
}

+ 4033
- 0
datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_24ae8d.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_53ea38.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_5f5533.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_77c1ca.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_825cc2.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_ac20cd.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_c6585a.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/realAWSCloudwatch/labeled_ec2_cpu_utilization_fe7f93.csv
File diff suppressed because it is too large
View File


+ 4731
- 0
datasets/NAB/realAWSCloudwatch/labeled_ec2_disk_write_bytes_1ef3de.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/realAWSCloudwatch/labeled_ec2_disk_write_bytes_c0d644.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/realAWSCloudwatch/labeled_ec2_network_in_257a54.csv
File diff suppressed because it is too large
View File


+ 4731
- 0
datasets/NAB/realAWSCloudwatch/labeled_ec2_network_in_5abac7.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/realAWSCloudwatch/labeled_elb_request_count_8c0756.csv
File diff suppressed because it is too large
View File


+ 4622
- 0
datasets/NAB/realAWSCloudwatch/labeled_grok_asg_anomaly.csv
File diff suppressed because it is too large
View File


+ 1244
- 0
datasets/NAB/realAWSCloudwatch/labeled_iio_us-east-1_i-a2eb1cd9_NetworkIn.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/realAWSCloudwatch/labeled_rds_cpu_utilization_cc0c53.csv
File diff suppressed because it is too large
View File


+ 4033
- 0
datasets/NAB/realAWSCloudwatch/labeled_rds_cpu_utilization_e47b3b.csv
File diff suppressed because it is too large
View File


+ 1625
- 0
datasets/NAB/realAdExchange/labeled_exchange-2_cpc_results.csv
File diff suppressed because it is too large
View File


+ 1625
- 0
datasets/NAB/realAdExchange/labeled_exchange-2_cpm_results.csv
File diff suppressed because it is too large
View File


+ 1539
- 0
datasets/NAB/realAdExchange/labeled_exchange-3_cpc_results.csv
File diff suppressed because it is too large
View File


+ 1539
- 0
datasets/NAB/realAdExchange/labeled_exchange-3_cpm_results.csv
File diff suppressed because it is too large
View File


+ 1644
- 0
datasets/NAB/realAdExchange/labeled_exchange-4_cpc_results.csv
File diff suppressed because it is too large
View File


+ 1644
- 0
datasets/NAB/realAdExchange/labeled_exchange-4_cpm_results.csv
File diff suppressed because it is too large
View File


+ 1
- 0
datasets/NAB/realKnownCause/labeled_ambient_temperature_system_failure.csv.REMOVED.git-id View File

@@ -0,0 +1 @@
49b3ed29808987b19c9a17d47838d7fd21a64290

+ 1
- 0
datasets/NAB/realKnownCause/labeled_cpu_utilization_asg_misconfiguration.csv.REMOVED.git-id View File

@@ -0,0 +1 @@
35256ed33e1a2c8b0c4abfbf924bb6f009c932a3

+ 4033
- 0
datasets/NAB/realKnownCause/labeled_ec2_request_latency_system_failure.csv
File diff suppressed because it is too large
View File


+ 1
- 0
datasets/NAB/realKnownCause/labeled_machine_temperature_system_failure.csv.REMOVED.git-id View File

@@ -0,0 +1 @@
26eb88e5a373bbda063203bf28b818eaba168f29

+ 1
- 0
datasets/NAB/realKnownCause/labeled_nyc_taxi.csv.REMOVED.git-id View File

@@ -0,0 +1 @@
eeb114c5c4471f4089ac206800eaa2330119dc04

+ 1883
- 0
datasets/NAB/realKnownCause/labeled_rogue_agent_key_hold.csv
File diff suppressed because it is too large
View File


+ 5316
- 0
datasets/NAB/realKnownCause/labeled_rogue_agent_key_updown.csv
File diff suppressed because it is too large
View File


+ 2501
- 0
datasets/NAB/realTraffic/labeled_TravelTime_387.csv
File diff suppressed because it is too large
View File


+ 2163
- 0
datasets/NAB/realTraffic/labeled_TravelTime_451.csv
File diff suppressed because it is too large
View File


+ 2381
- 0
datasets/NAB/realTraffic/labeled_occupancy_6005.csv
File diff suppressed because it is too large
View File


+ 2501
- 0
datasets/NAB/realTraffic/labeled_occupancy_t4013.csv
File diff suppressed because it is too large
View File


+ 2501
- 0
datasets/NAB/realTraffic/labeled_speed_6005.csv
File diff suppressed because it is too large
View File


+ 1128
- 0
datasets/NAB/realTraffic/labeled_speed_7578.csv
File diff suppressed because it is too large
View File


+ 2496
- 0
datasets/NAB/realTraffic/labeled_speed_t4013.csv
File diff suppressed because it is too large
View File


+ 1
- 0
datasets/NAB/realTweets/labeled_Twitter_volume_AAPL.csv.REMOVED.git-id View File

@@ -0,0 +1 @@
2d91a1145560d6f2ba78653857e4c365b849af48

+ 1
- 0
datasets/NAB/realTweets/labeled_Twitter_volume_AMZN.csv.REMOVED.git-id View File

@@ -0,0 +1 @@
6bd82e25bc77b7c2d239e35fe3292418b7f7f2f6

+ 1
- 0
datasets/NAB/realTweets/labeled_Twitter_volume_CRM.csv.REMOVED.git-id View File

@@ -0,0 +1 @@
10197fd0d3ae336cc3af7b73c2b8172925878a81

+ 1
- 0
datasets/NAB/realTweets/labeled_Twitter_volume_CVS.csv.REMOVED.git-id View File

@@ -0,0 +1 @@
a2bbcaeffe42bb34694e005021c137a501f42fa4

+ 1
- 0
datasets/NAB/realTweets/labeled_Twitter_volume_FB.csv.REMOVED.git-id View File

@@ -0,0 +1 @@
207e1632a8e355fb2622f090d19aa0bbc8b46f3d

+ 1
- 0
datasets/NAB/realTweets/labeled_Twitter_volume_GOOG.csv.REMOVED.git-id View File

@@ -0,0 +1 @@
b1ceb02a500f874bb83fff57c0343800b5f8d598

+ 1
- 0
datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv.REMOVED.git-id View File

@@ -0,0 +1 @@
ba0a753fc1e7b9031debc24bbe2ba4f8659a9992

+ 1
- 0
datasets/NAB/realTweets/labeled_Twitter_volume_KO.csv.REMOVED.git-id View File

@@ -0,0 +1 @@
1641472acea894a40c5ec95ad7cc329719d6cfc7

+ 1
- 0
datasets/NAB/realTweets/labeled_Twitter_volume_PFE.csv.REMOVED.git-id View File

@@ -0,0 +1 @@
46cf6c9d1c2b44f537641a6401824da570ad8986

+ 1
- 0
datasets/NAB/realTweets/labeled_Twitter_volume_UPS.csv.REMOVED.git-id View File

@@ -0,0 +1 @@
e0ea87c58b01bd07d9763dc49288a9ce17e20a0a

+ 8
- 2
examples/run_automl.py View File

@@ -6,14 +6,20 @@ from tods.utils import generate_dataset_problem
from tods.search import BruteForceSearch

# Some information
table_path = 'datasets/yahoo_sub_5.csv' # The path of the dataset
target_index = 6 # what column is the target
table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset
target_index = 3 # what column is the target

#table_path = 'datasets/yahoo_sub_5.csv'
#target_index = 6 # what column is the target
#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset
time_limit = 30 # How many seconds you wanna search
#metric = 'F1' # F1 on label 1
metric = 'F1_MACRO' # F1 on both label 0 and 1

# Read data and generate dataset and problem
df = pd.read_csv(table_path)
print(df[4])
exit()
dataset, problem_description = generate_dataset_problem(df, target_index=target_index, metric=metric)

# Start backend


+ 2
- 2
tods/tods/search/brute_force_search.py View File

@@ -75,9 +75,9 @@ class BruteForceSearch(PipelineSearchBase):

primitive_python_paths = {
'data_processing': [
'd3m.primitives.tods.data_processing.time_interval_transform',
#'d3m.primitives.tods.data_processing.time_interval_transform',
#'d3m.primitives.tods.data_processing.categorical_to_binary',
#'d3m.primitives.tods.data_processing.column_filter',
'd3m.primitives.tods.data_processing.column_filter',
#'d3m.primitives.tods.data_processing.timestamp_validation',
#'d3m.primitives.tods.data_processing.duplication_validation',
#'d3m.primitives.tods.data_processing.continuity_validation',


Loading…
Cancel
Save