@@ -1,68 +0,0 @@ | |||||
NAB Data Corpus | |||||
--- | |||||
Data are ordered, timestamped, single-valued metrics. All data files contain anomalies, unless otherwise noted. | |||||
### Real data | |||||
- realAWSCloudwatch/ | |||||
AWS server metrics as collected by the AmazonCloudwatch service. Example metrics include CPU Utilization, Network Bytes In, and Disk Read Bytes. | |||||
- realAdExchange/ | |||||
Online advertisement clicking rates, where the metrics are cost-per-click (CPC) and cost per thousand impressions (CPM). One of the files is normal, without anomalies. | |||||
- realKnownCause/ | |||||
This is data for which we know the anomaly causes; no hand labeling. | |||||
- ambient_temperature_system_failure.csv: The ambient temperature in an office | |||||
setting. | |||||
- cpu_utilization_asg_misconfiguration.csv: From Amazon Web Services (AWS) | |||||
monitoring CPU usage – i.e. average CPU usage across a given cluster. When | |||||
usage is high, AWS spins up a new machine, and uses fewer machines when usage | |||||
is low. | |||||
- ec2_request_latency_system_failure.csv: CPU usage data from a server in | |||||
Amazon's East Coast datacenter. The dataset ends with complete system failure | |||||
resulting from a documented failure of AWS API servers. There's an interesting | |||||
story behind this data in the [Numenta | |||||
blog](http://numenta.com/blog/anomaly-of-the-week.html). | |||||
- machine_temperature_system_failure.csv: Temperature sensor data of an | |||||
internal component of a large, industrial mahcine. The first anomaly is a | |||||
planned shutdown of the machine. The second anomaly is difficult to detect and | |||||
directly led to the third anomaly, a catastrophic failure of the machine. | |||||
- nyc_taxi.csv: Number of NYC taxi passengers, where the five anomalies occur | |||||
during the NYC marathon, Thanksgiving, Christmas, New Years day, and a snow | |||||
storm. The raw data is from the [NYC Taxi and Limousine Commission](http://www.nyc.gov/html/tlc/html/about/trip_record_data.shtml). | |||||
The data file included here consists of aggregating the total number of | |||||
taxi passengers into 30 minute buckets. | |||||
- rogue_agent_key_hold.csv: Timing the key holds for several users of a | |||||
computer, where the anomalies represent a change in the user. | |||||
- rogue_agent_key_updown.csv: Timing the key strokes for several users of a | |||||
computer, where the anomalies represent a change in the user. | |||||
- realTraffic/ | |||||
Real time traffic data from the Twin Cities Metro area in Minnesota, collected | |||||
by the | |||||
[Minnesota Department of Transportation](http://www.dot.state.mn.us/tmc/trafficinfo/developers.html). | |||||
Included metrics include occupancy, speed, and travel time from specific | |||||
sensors. | |||||
- realTweets/ | |||||
A collection of Twitter mentions of large publicly-traded companies | |||||
such as Google and IBM. The metric value represents the number of mentions | |||||
for a given ticker symbol every 5 minutes. | |||||
### Artificial data | |||||
- artificialNoAnomaly/ | |||||
Artificially-generated data without any anomalies. | |||||
- artificialWithAnomaly/ | |||||
Artificially-generated data with varying types of anomalies. |
@@ -1,232 +0,0 @@ | |||||
{ | |||||
"artificialNoAnomaly/art_daily_no_noise.csv": [], | |||||
"artificialNoAnomaly/art_daily_perfect_square_wave.csv": [], | |||||
"artificialNoAnomaly/art_daily_small_noise.csv": [], | |||||
"artificialNoAnomaly/art_flatline.csv": [], | |||||
"artificialNoAnomaly/art_noisy.csv": [], | |||||
"artificialWithAnomaly/art_daily_flatmiddle.csv": [ | |||||
"2014-04-11 00:00:00" | |||||
], | |||||
"artificialWithAnomaly/art_daily_jumpsdown.csv": [ | |||||
"2014-04-11 09:00:00" | |||||
], | |||||
"artificialWithAnomaly/art_daily_jumpsup.csv": [ | |||||
"2014-04-11 09:00:00" | |||||
], | |||||
"artificialWithAnomaly/art_daily_nojump.csv": [ | |||||
"2014-04-11 09:00:00" | |||||
], | |||||
"artificialWithAnomaly/art_increase_spike_density.csv": [ | |||||
"2014-04-07 23:10:00" | |||||
], | |||||
"artificialWithAnomaly/art_load_balancer_spikes.csv": [ | |||||
"2014-04-11 04:35:00" | |||||
], | |||||
"realAWSCloudwatch/ec2_cpu_utilization_24ae8d.csv": [ | |||||
"2014-02-26 22:05:00", | |||||
"2014-02-27 17:15:00" | |||||
], | |||||
"realAWSCloudwatch/ec2_cpu_utilization_53ea38.csv": [ | |||||
"2014-02-19 19:10:00", | |||||
"2014-02-23 20:05:00" | |||||
], | |||||
"realAWSCloudwatch/ec2_cpu_utilization_5f5533.csv": [ | |||||
"2014-02-19 00:22:00", | |||||
"2014-02-24 18:37:00" | |||||
], | |||||
"realAWSCloudwatch/ec2_cpu_utilization_77c1ca.csv": [ | |||||
"2014-04-09 10:15:00" | |||||
], | |||||
"realAWSCloudwatch/ec2_cpu_utilization_825cc2.csv": [ | |||||
"2014-04-15 15:44:00", | |||||
"2014-04-16 03:34:00" | |||||
], | |||||
"realAWSCloudwatch/ec2_cpu_utilization_ac20cd.csv": [ | |||||
"2014-04-15 00:49:00" | |||||
], | |||||
"realAWSCloudwatch/ec2_cpu_utilization_c6585a.csv": [], | |||||
"realAWSCloudwatch/ec2_cpu_utilization_fe7f93.csv": [ | |||||
"2014-02-17 06:12:00", | |||||
"2014-02-22 00:02:00", | |||||
"2014-02-23 15:17:00" | |||||
], | |||||
"realAWSCloudwatch/ec2_disk_write_bytes_1ef3de.csv": [ | |||||
"2014-03-10 21:09:00" | |||||
], | |||||
"realAWSCloudwatch/ec2_disk_write_bytes_c0d644.csv": [ | |||||
"2014-04-09 01:30:00", | |||||
"2014-04-10 14:35:00", | |||||
"2014-04-13 03:00:00" | |||||
], | |||||
"realAWSCloudwatch/ec2_network_in_257a54.csv": [ | |||||
"2014-04-15 16:44:00" | |||||
], | |||||
"realAWSCloudwatch/ec2_network_in_5abac7.csv": [ | |||||
"2014-03-10 18:56:00", | |||||
"2014-03-12 21:01:00" | |||||
], | |||||
"realAWSCloudwatch/elb_request_count_8c0756.csv": [ | |||||
"2014-04-12 17:24:00", | |||||
"2014-04-22 19:34:00" | |||||
], | |||||
"realAWSCloudwatch/grok_asg_anomaly.csv": [ | |||||
"2014-01-20 08:30:00", | |||||
"2014-01-21 10:45:00", | |||||
"2014-01-29 00:45:00" | |||||
], | |||||
"realAWSCloudwatch/iio_us-east-1_i-a2eb1cd9_NetworkIn.csv": [ | |||||
"2013-10-10 09:35:00", | |||||
"2013-10-10 20:40:00" | |||||
], | |||||
"realAWSCloudwatch/rds_cpu_utilization_cc0c53.csv": [ | |||||
"2014-02-25 07:15:00", | |||||
"2014-02-27 00:50:00" | |||||
], | |||||
"realAWSCloudwatch/rds_cpu_utilization_e47b3b.csv": [ | |||||
"2014-04-13 06:52:00", | |||||
"2014-04-18 23:27:00" | |||||
], | |||||
"realAdExchange/exchange-2_cpc_results.csv": [ | |||||
"2011-07-14 13:00:01" | |||||
], | |||||
"realAdExchange/exchange-2_cpm_results.csv": [ | |||||
"2011-07-26 06:00:01", | |||||
"2011-08-10 17:00:01" | |||||
], | |||||
"realAdExchange/exchange-3_cpc_results.csv": [ | |||||
"2011-07-14 10:15:01", | |||||
"2011-07-20 10:15:01", | |||||
"2011-08-13 10:15:01" | |||||
], | |||||
"realAdExchange/exchange-3_cpm_results.csv": [ | |||||
"2011-08-19 18:15:01" | |||||
], | |||||
"realAdExchange/exchange-4_cpc_results.csv": [ | |||||
"2011-07-16 09:15:01", | |||||
"2011-08-02 12:15:01", | |||||
"2011-08-23 08:15:01" | |||||
], | |||||
"realAdExchange/exchange-4_cpm_results.csv": [ | |||||
"2011-07-16 09:15:01", | |||||
"2011-08-01 07:15:01", | |||||
"2011-08-23 08:15:01", | |||||
"2011-08-28 13:15:01" | |||||
], | |||||
"realKnownCause/ambient_temperature_system_failure.csv": [ | |||||
"2013-12-22 20:00:00", | |||||
"2014-04-13 09:00:00" | |||||
], | |||||
"realKnownCause/cpu_utilization_asg_misconfiguration.csv": [ | |||||
"2014-07-12 02:04:00", | |||||
"2014-07-14 21:44:00" | |||||
], | |||||
"realKnownCause/ec2_request_latency_system_failure.csv": [ | |||||
"2014-03-14 09:06:00", | |||||
"2014-03-18 22:41:00", | |||||
"2014-03-21 03:01:00" | |||||
], | |||||
"realKnownCause/machine_temperature_system_failure.csv": [ | |||||
"2013-12-11 06:00:00", | |||||
"2013-12-16 17:25:00", | |||||
"2014-01-28 13:55:00", | |||||
"2014-02-08 14:30:00" | |||||
], | |||||
"realKnownCause/nyc_taxi.csv": [ | |||||
"2014-11-01 19:00:00", | |||||
"2014-11-27 15:30:00", | |||||
"2014-12-25 15:00:00", | |||||
"2015-01-01 01:00:00", | |||||
"2015-01-27 00:00:00" | |||||
], | |||||
"realKnownCause/rogue_agent_key_hold.csv": [ | |||||
"2014-07-15 08:30:00", | |||||
"2014-07-17 09:50:00" | |||||
], | |||||
"realKnownCause/rogue_agent_key_updown.csv": [ | |||||
"2014-07-15 04:00:00", | |||||
"2014-07-17 08:50:00" | |||||
], | |||||
"realTraffic/TravelTime_387.csv": [ | |||||
"2015-07-30 12:29:00", | |||||
"2015-08-18 16:26:00", | |||||
"2015-09-01 05:34:00" | |||||
], | |||||
"realTraffic/TravelTime_451.csv": [ | |||||
"2015-08-11 12:07:00" | |||||
], | |||||
"realTraffic/occupancy_6005.csv": [ | |||||
"2015-09-15 06:55:00" | |||||
], | |||||
"realTraffic/occupancy_t4013.csv": [ | |||||
"2015-09-16 08:09:00", | |||||
"2015-09-17 07:55:00" | |||||
], | |||||
"realTraffic/speed_6005.csv": [ | |||||
"2015-09-17 07:00:00" | |||||
], | |||||
"realTraffic/speed_7578.csv": [ | |||||
"2015-09-11 16:44:00", | |||||
"2015-09-15 14:34:00", | |||||
"2015-09-16 14:14:00", | |||||
"2015-09-16 17:10:00" | |||||
], | |||||
"realTraffic/speed_t4013.csv": [ | |||||
"2015-09-16 08:04:00", | |||||
"2015-09-17 08:15:00" | |||||
], | |||||
"realTweets/Twitter_volume_AAPL.csv": [ | |||||
"2015-03-03 21:07:53", | |||||
"2015-03-09 17:32:53", | |||||
"2015-03-16 02:57:53", | |||||
"2015-03-31 03:27:53" | |||||
], | |||||
"realTweets/Twitter_volume_AMZN.csv": [ | |||||
"2015-03-05 19:47:53", | |||||
"2015-03-11 20:57:53", | |||||
"2015-04-01 21:57:53", | |||||
"2015-04-08 04:52:53" | |||||
], | |||||
"realTweets/Twitter_volume_CRM.csv": [ | |||||
"2015-03-09 19:07:53", | |||||
"2015-03-19 23:07:53", | |||||
"2015-03-26 19:07:53" | |||||
], | |||||
"realTweets/Twitter_volume_CVS.csv": [ | |||||
"2015-03-04 16:02:53", | |||||
"2015-03-05 19:57:53", | |||||
"2015-03-26 14:07:53", | |||||
"2015-04-14 22:37:53" | |||||
], | |||||
"realTweets/Twitter_volume_FB.csv": [ | |||||
"2015-03-16 07:07:53", | |||||
"2015-04-03 17:47:53" | |||||
], | |||||
"realTweets/Twitter_volume_GOOG.csv": [ | |||||
"2015-03-13 20:22:53", | |||||
"2015-03-14 16:27:53", | |||||
"2015-03-22 22:52:53", | |||||
"2015-04-01 05:27:53" | |||||
], | |||||
"realTweets/Twitter_volume_IBM.csv": [ | |||||
"2015-03-23 22:27:53", | |||||
"2015-04-20 20:07:53" | |||||
], | |||||
"realTweets/Twitter_volume_KO.csv": [ | |||||
"2015-03-20 13:12:53", | |||||
"2015-04-08 23:42:53", | |||||
"2015-04-14 14:52:53" | |||||
], | |||||
"realTweets/Twitter_volume_PFE.csv": [ | |||||
"2015-03-02 21:22:53", | |||||
"2015-03-04 10:32:53", | |||||
"2015-03-13 19:57:53", | |||||
"2015-04-07 23:42:53" | |||||
], | |||||
"realTweets/Twitter_volume_UPS.csv": [ | |||||
"2015-03-03 00:27:53", | |||||
"2015-03-04 11:07:53", | |||||
"2015-03-05 15:22:53", | |||||
"2015-03-24 18:17:53", | |||||
"2015-03-29 16:27:53" | |||||
] | |||||
} |
@@ -1 +0,0 @@ | |||||
948611b07519538ef036e0ec1c948f6bf97009cf |
@@ -1 +0,0 @@ | |||||
428229640a5466e68014f74649a24f00abb1150b |
@@ -1 +0,0 @@ | |||||
68dd1084ed091fb9affe45b4e0894250c6c62c07 |
@@ -1 +0,0 @@ | |||||
7bebf0fe077dda56f789d644090faf1d2484913c |
@@ -1 +0,0 @@ | |||||
59fdf3c2b8d171704e3de1e10d8ccfca72c8ab9a |
@@ -1 +0,0 @@ | |||||
d20453833fc13c681f0b5f5a830f3aba52b774cd |
@@ -1 +0,0 @@ | |||||
ed60bba6f53c779335874c39966b7d5e4309e2c3 |
@@ -1 +0,0 @@ | |||||
f01b654d9a6a6ebc7efc65da240f83680de2131d |
@@ -1 +0,0 @@ | |||||
8e0088d97641d6ab39b808fe03ac0a7ec9ea99b9 |
@@ -1 +0,0 @@ | |||||
d72fffb08da82bb70ecc379bb1fa56316efda557 |
@@ -1 +0,0 @@ | |||||
4c2f8543201c0a66e44815dee128d9044a41c382 |
@@ -1 +0,0 @@ | |||||
25a0dd3110986418d379a887cc575f9fdc45a6da |
@@ -1 +0,0 @@ | |||||
44db328c252a8156434142a37ef65765869e7548 |
@@ -1 +0,0 @@ | |||||
bea5d1c052730eaba76b84ff5df854477cdfa80b |
@@ -1,63 +0,0 @@ | |||||
{ | |||||
"about": { | |||||
"datasetID": "kpi_dataset_TEST", | |||||
"datasetName": "NULL", | |||||
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", | |||||
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", | |||||
"license": " CC Public Domain Mark 1.0 ", | |||||
"source": "OpenML", | |||||
"sourceURI": "http://www.openml.org/d/185", | |||||
"approximateSize": "", | |||||
"datasetSchemaVersion": "4.0.0", | |||||
"redacted": false, | |||||
"datasetVersion": "4.0.0" | |||||
}, | |||||
"dataResources": [ | |||||
{ | |||||
"resID": "learningData", | |||||
"resPath": "tables/learningData.csv", | |||||
"resType": "table", | |||||
"resFormat": { | |||||
"text/csv": [ | |||||
"csv" | |||||
] | |||||
}, | |||||
"isCollection": false, | |||||
"columns": [ | |||||
{ | |||||
"colIndex": 0, | |||||
"colName": "d3mIndex", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"index" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 1, | |||||
"colName": "timestamp", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 2, | |||||
"colName": "value", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 3, | |||||
"colName": "ground_truth", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"suggestedTarget" | |||||
] | |||||
} | |||||
], | |||||
"columnsCount": 4 | |||||
} | |||||
] | |||||
} |
@@ -1,65 +0,0 @@ | |||||
{ | |||||
"about": { | |||||
"problemID": "kpi_problem", | |||||
"problemName": "kpi_problem", | |||||
"problemDescription": "Anomaly detection", | |||||
"problemVersion": "4.0.0", | |||||
"problemSchemaVersion": "4.0.0", | |||||
"taskKeywords": [ | |||||
"classification", | |||||
"binary", | |||||
"tabular" | |||||
] | |||||
}, | |||||
"inputs": { | |||||
"data": [ | |||||
{ | |||||
"datasetID": "kpi_dataset", | |||||
"targets": [ | |||||
{ | |||||
"targetIndex": 0, | |||||
"resID": "learningData", | |||||
"colIndex": 3, | |||||
"colName": "ground_truth" | |||||
} | |||||
] | |||||
} | |||||
], | |||||
"dataSplits": { | |||||
"method": "holdOut", | |||||
"testSize": 0.2, | |||||
"stratified": true, | |||||
"numRepeats": 0, | |||||
"randomSeed": 42, | |||||
"splitsFile": "dataSplits.csv", | |||||
"datasetViewMaps": { | |||||
"train": [ | |||||
{ | |||||
"from": "kpi_dataset", | |||||
"to": "kpi_dataset_TRAIN" | |||||
} | |||||
], | |||||
"test": [ | |||||
{ | |||||
"from": "kpi_dataset", | |||||
"to": "kpi_dataset_TEST" | |||||
} | |||||
], | |||||
"score": [ | |||||
{ | |||||
"from": "kpi_dataset", | |||||
"to": "kpi_dataset_SCORE" | |||||
} | |||||
] | |||||
} | |||||
}, | |||||
"performanceMetrics": [ | |||||
{ | |||||
"metric": "f1Macro" | |||||
} | |||||
] | |||||
}, | |||||
"expectedOutputs": { | |||||
"predictionsFile": "predictions.csv" | |||||
} | |||||
} |
@@ -1,63 +0,0 @@ | |||||
{ | |||||
"about": { | |||||
"datasetID": "kpi_dataset_TEST", | |||||
"datasetName": "NULL", | |||||
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", | |||||
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", | |||||
"license": " CC Public Domain Mark 1.0 ", | |||||
"source": "OpenML", | |||||
"sourceURI": "http://www.openml.org/d/185", | |||||
"approximateSize": "", | |||||
"datasetSchemaVersion": "4.0.0", | |||||
"redacted": false, | |||||
"datasetVersion": "4.0.0" | |||||
}, | |||||
"dataResources": [ | |||||
{ | |||||
"resID": "learningData", | |||||
"resPath": "tables/learningData.csv", | |||||
"resType": "table", | |||||
"resFormat": { | |||||
"text/csv": [ | |||||
"csv" | |||||
] | |||||
}, | |||||
"isCollection": false, | |||||
"columns": [ | |||||
{ | |||||
"colIndex": 0, | |||||
"colName": "d3mIndex", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"index" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 1, | |||||
"colName": "timestamp", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 2, | |||||
"colName": "value", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 3, | |||||
"colName": "ground_truth", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"suggestedTarget" | |||||
] | |||||
} | |||||
], | |||||
"columnsCount": 4 | |||||
} | |||||
] | |||||
} |
@@ -1,65 +0,0 @@ | |||||
{ | |||||
"about": { | |||||
"problemID": "kpi_problem", | |||||
"problemName": "kpi_problem", | |||||
"problemDescription": "Anomaly detection", | |||||
"problemVersion": "4.0.0", | |||||
"problemSchemaVersion": "4.0.0", | |||||
"taskKeywords": [ | |||||
"classification", | |||||
"binary", | |||||
"tabular" | |||||
] | |||||
}, | |||||
"inputs": { | |||||
"data": [ | |||||
{ | |||||
"datasetID": "kpi_dataset", | |||||
"targets": [ | |||||
{ | |||||
"targetIndex": 0, | |||||
"resID": "learningData", | |||||
"colIndex": 3, | |||||
"colName": "ground_truth" | |||||
} | |||||
] | |||||
} | |||||
], | |||||
"dataSplits": { | |||||
"method": "holdOut", | |||||
"testSize": 0.2, | |||||
"stratified": true, | |||||
"numRepeats": 0, | |||||
"randomSeed": 42, | |||||
"splitsFile": "dataSplits.csv", | |||||
"datasetViewMaps": { | |||||
"train": [ | |||||
{ | |||||
"from": "kpi_dataset", | |||||
"to": "kpi_dataset_TRAIN" | |||||
} | |||||
], | |||||
"test": [ | |||||
{ | |||||
"from": "kpi_dataset", | |||||
"to": "kpi_dataset_TEST" | |||||
} | |||||
], | |||||
"score": [ | |||||
{ | |||||
"from": "kpi_dataset", | |||||
"to": "kpi_dataset_SCORE" | |||||
} | |||||
] | |||||
} | |||||
}, | |||||
"performanceMetrics": [ | |||||
{ | |||||
"metric": "f1Macro" | |||||
} | |||||
] | |||||
}, | |||||
"expectedOutputs": { | |||||
"predictionsFile": "predictions.csv" | |||||
} | |||||
} |
@@ -1,63 +0,0 @@ | |||||
{ | |||||
"about": { | |||||
"datasetID": "kpi_dataset_TRAIN", | |||||
"datasetName": "NULL", | |||||
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", | |||||
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", | |||||
"license": " CC Public Domain Mark 1.0 ", | |||||
"source": "OpenML", | |||||
"sourceURI": "http://www.openml.org/d/185", | |||||
"approximateSize": "", | |||||
"datasetSchemaVersion": "4.0.0", | |||||
"redacted": false, | |||||
"datasetVersion": "4.0.0" | |||||
}, | |||||
"dataResources": [ | |||||
{ | |||||
"resID": "learningData", | |||||
"resPath": "tables/learningData.csv", | |||||
"resType": "table", | |||||
"resFormat": { | |||||
"text/csv": [ | |||||
"csv" | |||||
] | |||||
}, | |||||
"isCollection": false, | |||||
"columns": [ | |||||
{ | |||||
"colIndex": 0, | |||||
"colName": "d3mIndex", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"index" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 1, | |||||
"colName": "timestamp", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 2, | |||||
"colName": "value", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 3, | |||||
"colName": "ground_truth", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"suggestedTarget" | |||||
] | |||||
} | |||||
], | |||||
"columnsCount": 4 | |||||
} | |||||
] | |||||
} |
@@ -1 +0,0 @@ | |||||
44db328c252a8156434142a37ef65765869e7548 |
@@ -1,65 +0,0 @@ | |||||
{ | |||||
"about": { | |||||
"problemID": "kpi_problem", | |||||
"problemName": "kpi_problem", | |||||
"problemDescription": "Anomaly detection", | |||||
"problemVersion": "4.0.0", | |||||
"problemSchemaVersion": "4.0.0", | |||||
"taskKeywords": [ | |||||
"classification", | |||||
"binary", | |||||
"tabular" | |||||
] | |||||
}, | |||||
"inputs": { | |||||
"data": [ | |||||
{ | |||||
"datasetID": "kpi_dataset", | |||||
"targets": [ | |||||
{ | |||||
"targetIndex": 0, | |||||
"resID": "learningData", | |||||
"colIndex": 3, | |||||
"colName": "ground_truth" | |||||
} | |||||
] | |||||
} | |||||
], | |||||
"dataSplits": { | |||||
"method": "holdOut", | |||||
"testSize": 0.2, | |||||
"stratified": true, | |||||
"numRepeats": 0, | |||||
"randomSeed": 42, | |||||
"splitsFile": "dataSplits.csv", | |||||
"datasetViewMaps": { | |||||
"train": [ | |||||
{ | |||||
"from": "kpi_dataset", | |||||
"to": "kpi_dataset_TRAIN" | |||||
} | |||||
], | |||||
"test": [ | |||||
{ | |||||
"from": "kpi_dataset", | |||||
"to": "kpi_dataset_TEST" | |||||
} | |||||
], | |||||
"score": [ | |||||
{ | |||||
"from": "kpi_dataset", | |||||
"to": "kpi_dataset_SCORE" | |||||
} | |||||
] | |||||
} | |||||
}, | |||||
"performanceMetrics": [ | |||||
{ | |||||
"metric": "f1Macro" | |||||
} | |||||
] | |||||
}, | |||||
"expectedOutputs": { | |||||
"predictionsFile": "predictions.csv" | |||||
} | |||||
} |
@@ -1,63 +0,0 @@ | |||||
{ | |||||
"about": { | |||||
"datasetID": "kpi_dataset", | |||||
"datasetName": "kpi", | |||||
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", | |||||
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", | |||||
"license": " CC Public Domain Mark 1.0 ", | |||||
"source": "OpenML", | |||||
"sourceURI": "http://www.openml.org/d/185", | |||||
"approximateSize": "", | |||||
"datasetSchemaVersion": "4.0.0", | |||||
"redacted": false, | |||||
"datasetVersion": "4.0.0" | |||||
}, | |||||
"dataResources": [ | |||||
{ | |||||
"resID": "learningData", | |||||
"resPath": "tables/learningData.csv", | |||||
"resType": "table", | |||||
"resFormat": { | |||||
"text/csv": [ | |||||
"csv" | |||||
] | |||||
}, | |||||
"isCollection": false, | |||||
"columns": [ | |||||
{ | |||||
"colIndex": 0, | |||||
"colName": "d3mIndex", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"index" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 1, | |||||
"colName": "timestamp", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 2, | |||||
"colName": "value", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 3, | |||||
"colName": "ground_truth", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"suggestedTarget" | |||||
] | |||||
} | |||||
], | |||||
"columnsCount": 4 | |||||
} | |||||
] | |||||
} |
@@ -1 +0,0 @@ | |||||
d80846dc46c173472f646a52005a1fb3670ccd09 |
@@ -1 +0,0 @@ | |||||
44db328c252a8156434142a37ef65765869e7548 |
@@ -1,65 +0,0 @@ | |||||
{ | |||||
"about": { | |||||
"problemID": "kpi_problem", | |||||
"problemName": "kpi_problem", | |||||
"problemDescription": "Anomaly detection", | |||||
"problemVersion": "4.0.0", | |||||
"problemSchemaVersion": "4.0.0", | |||||
"taskKeywords": [ | |||||
"classification", | |||||
"binary", | |||||
"tabular" | |||||
] | |||||
}, | |||||
"inputs": { | |||||
"data": [ | |||||
{ | |||||
"datasetID": "kpi_dataset", | |||||
"targets": [ | |||||
{ | |||||
"targetIndex": 0, | |||||
"resID": "learningData", | |||||
"colIndex": 3, | |||||
"colName": "ground_truth" | |||||
} | |||||
] | |||||
} | |||||
], | |||||
"dataSplits": { | |||||
"method": "holdOut", | |||||
"testSize": 0.2, | |||||
"stratified": true, | |||||
"numRepeats": 0, | |||||
"randomSeed": 42, | |||||
"splitsFile": "dataSplits.csv", | |||||
"datasetViewMaps": { | |||||
"train": [ | |||||
{ | |||||
"from": "kpi_dataset", | |||||
"to": "kpi_dataset_TRAIN" | |||||
} | |||||
], | |||||
"test": [ | |||||
{ | |||||
"from": "kpi_dataset", | |||||
"to": "kpi_dataset_TEST" | |||||
} | |||||
], | |||||
"score": [ | |||||
{ | |||||
"from": "kpi_dataset", | |||||
"to": "kpi_dataset_SCORE" | |||||
} | |||||
] | |||||
} | |||||
}, | |||||
"performanceMetrics": [ | |||||
{ | |||||
"metric": "f1Macro" | |||||
} | |||||
] | |||||
}, | |||||
"expectedOutputs": { | |||||
"predictionsFile": "predictions.csv" | |||||
} | |||||
} |
@@ -1 +0,0 @@ | |||||
bea5d1c052730eaba76b84ff5df854477cdfa80b |
@@ -1,183 +0,0 @@ | |||||
{ | |||||
"about": { | |||||
"datasetID": "template", | |||||
"datasetName": "baseball", | |||||
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", | |||||
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", | |||||
"license": " CC Public Domain Mark 1.0 ", | |||||
"source": "OpenML", | |||||
"sourceURI": "http://www.openml.org/d/185", | |||||
"approximateSize": "", | |||||
"datasetSchemaVersion": "4.0.0", | |||||
"redacted": false, | |||||
"datasetVersion": "4.0.0" | |||||
}, | |||||
"dataResources": [ | |||||
{ | |||||
"resID": "learningData", | |||||
"resPath": "tables/learningData.csv", | |||||
"resType": "table", | |||||
"resFormat": { | |||||
"text/csv": [ | |||||
"csv" | |||||
] | |||||
}, | |||||
"isCollection": false, | |||||
"columns": [ | |||||
{ | |||||
"colIndex": 0, | |||||
"colName": "d3mIndex", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"index" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 1, | |||||
"colName": "Player", | |||||
"colType": "categorical", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 2, | |||||
"colName": "Number_seasons", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 3, | |||||
"colName": "Games_played", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 4, | |||||
"colName": "At_bats", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 5, | |||||
"colName": "Runs", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 6, | |||||
"colName": "Hits", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 7, | |||||
"colName": "Doubles", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 8, | |||||
"colName": "Triples", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 9, | |||||
"colName": "Home_runs", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 10, | |||||
"colName": "RBIs", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 11, | |||||
"colName": "Walks", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 12, | |||||
"colName": "Strikeouts", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 13, | |||||
"colName": "Batting_average", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 14, | |||||
"colName": "On_base_pct", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 15, | |||||
"colName": "Slugging_pct", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 16, | |||||
"colName": "Fielding_ave", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 17, | |||||
"colName": "Position", | |||||
"colType": "categorical", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 18, | |||||
"colName": "Hall_of_Fame", | |||||
"colType": "categorical", | |||||
"role": [ | |||||
"suggestedTarget" | |||||
] | |||||
} | |||||
], | |||||
"columnsCount": 19 | |||||
} | |||||
] | |||||
} |
@@ -1,65 +0,0 @@ | |||||
{ | |||||
"about": { | |||||
"problemID": "template", | |||||
"problemName": "baseball_problem", | |||||
"problemDescription": "**Author**: Jeffrey S. Simonoff \n**Source**: [AnalCatData](http://www.stern.nyu.edu/~jsimonof/AnalCatData) - 2003 \n**Please cite**: Jeffrey S. Simonoff, Analyzing Categorical Data, Springer-Verlag, New York, 2003 \n \nDatabase of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave' \n\nNotes: \n* Quotes, Single-Quotes and Backslashes were removed, Blanks replaced with Underscores\n* Player is an identifier that should be ignored when modelling the data", | |||||
"problemVersion": "4.0.0", | |||||
"problemSchemaVersion": "4.0.0", | |||||
"taskKeywords": [ | |||||
"classification", | |||||
"multiClass", | |||||
"tabular" | |||||
] | |||||
}, | |||||
"inputs": { | |||||
"data": [ | |||||
{ | |||||
"datasetID": "185_baseball_dataset", | |||||
"targets": [ | |||||
{ | |||||
"targetIndex": 0, | |||||
"resID": "learningData", | |||||
"colIndex": 18, | |||||
"colName": "Hall_of_Fame" | |||||
} | |||||
] | |||||
} | |||||
], | |||||
"dataSplits": { | |||||
"method": "holdOut", | |||||
"testSize": 0.2, | |||||
"stratified": true, | |||||
"numRepeats": 0, | |||||
"randomSeed": 42, | |||||
"splitsFile": "dataSplits.csv", | |||||
"datasetViewMaps": { | |||||
"train": [ | |||||
{ | |||||
"from": "185_baseball_dataset", | |||||
"to": "185_baseball_dataset_TRAIN" | |||||
} | |||||
], | |||||
"test": [ | |||||
{ | |||||
"from": "185_baseball_dataset", | |||||
"to": "185_baseball_dataset_TEST" | |||||
} | |||||
], | |||||
"score": [ | |||||
{ | |||||
"from": "185_baseball_dataset", | |||||
"to": "185_baseball_dataset_SCORE" | |||||
} | |||||
] | |||||
} | |||||
}, | |||||
"performanceMetrics": [ | |||||
{ | |||||
"metric": "f1Macro" | |||||
} | |||||
] | |||||
}, | |||||
"expectedOutputs": { | |||||
"predictionsFile": "predictions.csv" | |||||
} | |||||
} |
@@ -1,160 +0,0 @@ | |||||
# TODO: Wrap it as a class and connect it to GUI | |||||
# A script to transform anomaly data to d3m format | |||||
import pandas as pd | |||||
import numpy as np | |||||
import os | |||||
import json | |||||
############################## | |||||
# Some information for the dataset to be transformed | |||||
# Designed for time series data | |||||
name = 'kpi' | |||||
src_path = './raw_data/kpi.csv' | |||||
label_name = 'label' | |||||
timestamp_name = 'timestamp' | |||||
value_names = ['value'] | |||||
ratio = 0.8 # Ratio of training data, the rest is for testing | |||||
############################### | |||||
dst_root = './' + name | |||||
dirs = ['./', 'SCORE', 'TEST', 'TRAIN'] | |||||
maps = {'./': None, 'SCORE': 'TEST', 'TEST': 'TEST', 'TRAIN': 'TRAIN'} | |||||
# Create the corresponding directories | |||||
for d in dirs: | |||||
if maps[d] is not None: | |||||
dataset_name = 'dataset_' + maps[d] | |||||
problem_name = 'problem_' + maps[d] | |||||
else: | |||||
dataset_name = name + '_dataset' | |||||
problem_name = name + '_problem' | |||||
tables_dir = os.path.join(dst_root, d, dataset_name, 'tables') | |||||
if not os.path.exists(tables_dir): | |||||
os.makedirs(tables_dir) | |||||
problem_dir = os.path.join(dst_root, d, problem_name) | |||||
if not os.path.exists(problem_dir): | |||||
os.makedirs(problem_dir) | |||||
# Process data | |||||
_df = pd.DataFrame() | |||||
df = pd.read_csv(src_path) | |||||
_df['d3mIndex'] = df.index | |||||
_df['timestamp'] = df[timestamp_name] | |||||
for value_name in value_names: | |||||
_df[value_name] = df[value_name] | |||||
_df['ground_truth'] = df[label_name] | |||||
df = _df | |||||
cols = df.columns.tolist() | |||||
# Save all the data | |||||
df.to_csv(os.path.join(dst_root, name+'_dataset', 'tables', 'learningData.csv'), index=False) | |||||
# Save training and testing data | |||||
train_df, test_df = df[:int(df.shape[0]*ratio)], df[int(df.shape[0]*ratio):] | |||||
train_df.to_csv(os.path.join(dst_root, 'TRAIN', 'dataset_TRAIN', 'tables', 'learningData.csv'), index=False) | |||||
test_df.to_csv(os.path.join(dst_root, 'TEST', 'dataset_TEST', 'tables', 'learningData.csv'), index=False) | |||||
test_df.to_csv(os.path.join(dst_root, 'SCORE', 'dataset_TEST', 'tables', 'learningData.csv'), index=False) | |||||
# Data splits | |||||
row_0 = train_df.shape[0] | |||||
row_1 = train_df.shape[0] | |||||
row = row_0 + row_1 | |||||
df = pd.DataFrame(np.array([[i for i in range(row)], ['TRAIN' for _ in range(row_0)] + ['TEST' for _ in range(row_1)], [0 for _ in range(row)], [0 for _ in range(row)]]).transpose(), columns = ['d3mIndex', 'type', 'repeat', 'fold']) | |||||
# Save data splits for all data | |||||
train_df.to_csv(os.path.join(dst_root, name+'_problem', 'dataSplits.csv'), index=False) | |||||
# Save training and testing splits | |||||
train_df, test_df = df[:row_0], df[row_0:] | |||||
train_df.to_csv(os.path.join(dst_root, 'TRAIN', 'problem_TRAIN', 'dataSplits.csv'), index=False) | |||||
test_df.to_csv(os.path.join(dst_root, 'TEST', 'problem_TEST', 'dataSplits.csv'), index=False) | |||||
test_df.to_csv(os.path.join(dst_root, 'SCORE', 'problem_TEST', 'dataSplits.csv'), index=False) | |||||
# Dataset JSON files | |||||
# Load template | |||||
with open('template/datasetDoc.json') as json_file: | |||||
data = json.load(json_file) | |||||
columns = [] | |||||
for i in range(len(cols)): | |||||
c = {} | |||||
c['colIndex'] = i | |||||
c['colName'] = cols[i] | |||||
if i == 0: | |||||
c['colType'] = 'integer' | |||||
c['role'] = ['index'] | |||||
elif i == 1: | |||||
c['colType'] = 'integer' | |||||
c['role'] = ['attribute'] | |||||
elif i == len(cols)-1: | |||||
c['colType'] = 'integer' | |||||
c['role'] = ['suggestedTarget'] | |||||
else: | |||||
c['colType'] = 'real' | |||||
c['role'] = ['attribute'] | |||||
columns.append(c) | |||||
data['dataResources'][0]['columns'] = columns | |||||
data['dataResources'][0]['columnsCount'] = len(cols) | |||||
data['about']['datasetID'] = name + '_dataset' | |||||
data['about']['datasetName'] = name | |||||
with open(os.path.join(dst_root, name+'_dataset', 'datasetDoc.json'), 'w') as outfile: | |||||
json.dump(data, outfile, indent=4) | |||||
data['about']['datasetID'] = name +'_dataset_TRAIN' | |||||
data['about']['datasetName'] = "NULL" | |||||
with open(os.path.join(dst_root, 'TRAIN', 'dataset_TRAIN', 'datasetDoc.json'), 'w') as outfile: | |||||
json.dump(data, outfile, indent=4) | |||||
data['about']['datasetID'] = name + '_dataset_TEST' | |||||
data['about']['datasetName'] = 'NULL' | |||||
with open(os.path.join(dst_root, 'TEST', 'dataset_TEST', 'datasetDoc.json'), 'w') as outfile: | |||||
json.dump(data, outfile, indent=4) | |||||
data['about']['datasetID'] = name + '_dataset_TEST' | |||||
data['about']['datasetName'] = 'NULL' | |||||
with open(os.path.join(dst_root, 'SCORE', 'dataset_TEST', 'datasetDoc.json'), 'w') as outfile: | |||||
json.dump(data, outfile, indent=4) | |||||
# Problem JSON files | |||||
# Load template | |||||
with open('template/problemDoc.json') as json_file: | |||||
data = json.load(json_file) | |||||
data['about']['problemID'] = name+'_problem' | |||||
data['about']['problemName'] = name+'_problem' | |||||
data['about']['problemDescription'] = 'Anomaly detection' | |||||
data['about']['taskKeywords'] = ['classification', 'binary', 'tabular'] | |||||
data['inputs']['data'][0]['datasetID'] = name + '_dataset' | |||||
data['inputs']['data'][0]['targets'][0]['colIndex'] = len(cols)-1 | |||||
data['inputs']['data'][0]['targets'][0]['colName'] = cols[-1] | |||||
data['inputs']['dataSplits']['datasetViewMaps']['train'][0]['from'] = name+'_dataset' | |||||
data['inputs']['dataSplits']['datasetViewMaps']['test'][0]['from'] = name+'_dataset' | |||||
data['inputs']['dataSplits']['datasetViewMaps']['score'][0]['from'] = name+'_dataset' | |||||
data['inputs']['dataSplits']['datasetViewMaps']['train'][0]['to'] = name+'_dataset_TRAIN' | |||||
data['inputs']['dataSplits']['datasetViewMaps']['test'][0]['to'] = name+'_dataset_TEST' | |||||
data['inputs']['dataSplits']['datasetViewMaps']['score'][0]['to'] = name+'_dataset_SCORE' | |||||
with open(os.path.join(dst_root, name+'_problem', 'problemDoc.json'), 'w') as outfile: | |||||
json.dump(data, outfile, indent=4) | |||||
with open(os.path.join(dst_root, 'TRAIN', 'problem_TRAIN', 'problemDoc.json'), 'w') as outfile: | |||||
json.dump(data, outfile, indent=4) | |||||
with open(os.path.join(dst_root, 'TEST', 'problem_TEST', 'problemDoc.json'), 'w') as outfile: | |||||
json.dump(data, outfile, indent=4) | |||||
with open(os.path.join(dst_root, 'SCORE', 'problem_TEST', 'problemDoc.json'), 'w') as outfile: | |||||
json.dump(data, outfile, indent=4) | |||||
# Make an empty targets.csv | |||||
with open(os.path.join(dst_root, 'SCORE', 'targets.csv'), 'w') as outfile: | |||||
outfile.write('') | |||||
@@ -1,95 +0,0 @@ | |||||
{ | |||||
"about": { | |||||
"datasetID": "yahoo_sub_5_dataset_TEST", | |||||
"datasetName": "NULL", | |||||
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", | |||||
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", | |||||
"license": " CC Public Domain Mark 1.0 ", | |||||
"source": "OpenML", | |||||
"sourceURI": "http://www.openml.org/d/185", | |||||
"approximateSize": "", | |||||
"datasetSchemaVersion": "4.0.0", | |||||
"redacted": false, | |||||
"datasetVersion": "4.0.0" | |||||
}, | |||||
"dataResources": [ | |||||
{ | |||||
"resID": "learningData", | |||||
"resPath": "tables/learningData.csv", | |||||
"resType": "table", | |||||
"resFormat": { | |||||
"text/csv": [ | |||||
"csv" | |||||
] | |||||
}, | |||||
"isCollection": false, | |||||
"columns": [ | |||||
{ | |||||
"colIndex": 0, | |||||
"colName": "d3mIndex", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"index" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 1, | |||||
"colName": "timestamp", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 2, | |||||
"colName": "value_0", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 3, | |||||
"colName": "value_1", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 4, | |||||
"colName": "value_2", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 5, | |||||
"colName": "value_3", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 6, | |||||
"colName": "value_4", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 7, | |||||
"colName": "ground_truth", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"suggestedTarget" | |||||
] | |||||
} | |||||
], | |||||
"columnsCount": 8 | |||||
} | |||||
] | |||||
} |
@@ -1,141 +0,0 @@ | |||||
d3mIndex,timestamp,value_0,value_1,value_2,value_3,value_4,ground_truth | |||||
1260,1261,7782,0.034280386319742985,2.5072222222222003,104,3119,0 | |||||
1261,1262,7829,0.039360296791109,2.5927777777778,82,3590,0 | |||||
1262,1263,7902,0.0,2.6894444444444,208,3893,0 | |||||
1263,1264,8039,0.038944065994356014,2.6291666666667,92,3264,0 | |||||
1264,1265,8350,0.18176011684739,2.6469444444444,53,3963,0 | |||||
1265,1266,8142,0.18521047165852,2.7461111111111003,65,2757,0 | |||||
1266,1267,7886,0.13079770999921,2.9363888888889,62,2306,0 | |||||
1267,1268,7743,0.13310058077443,3.2797222222222,73,2549,0 | |||||
1268,1269,7707,0.054750658073534006,3.5194444444444,84,2212,0 | |||||
1269,1270,7726,0.030588852697706,3.8130555555556,90,2286,0 | |||||
1270,1271,7717,0.12998124134227002,3.7941666666667,80,2979,0 | |||||
1271,1272,10331,0.09100057249197198,3.6086111111111,90,3158,0 | |||||
1272,1273,10515,0.19464543002904006,3.3858333333333,84,2645,0 | |||||
1273,1274,10415,0.22178651521516,3.3336111111111,34,3161,0 | |||||
1274,1275,10387,0.22983578430825,3.3116666666667003,67,4460,0 | |||||
1275,1276,10471,0.298229429356,3.2616666666667005,74,2630,0 | |||||
1276,1277,10385,0.12923377484588,3.0044444444444003,44,2593,0 | |||||
1277,1278,10439,0.19609416059774,2.6741666666667,64,2625,0 | |||||
1278,1279,10516,0.040518533819385014,2.3191666666667,70,4834,0 | |||||
1279,1280,10587,0.07099894663641,2.0597222222222,96,4056,0 | |||||
1280,1281,10586,0.07584150637714701,2.0547222222222,110,5713,0 | |||||
1281,1282,10684,0.08180100127782801,2.1511111111111,68,3940,0 | |||||
1282,1283,10880,0.0,2.2602777777778,90,4414,0 | |||||
1283,1284,10830,0.0,2.2883333333333,90,5044,0 | |||||
1284,1285,10794,0.09140162014739303,2.3736111111111,69,3894,0 | |||||
1285,1286,10843,0.0,2.5869444444444,46,3993,0 | |||||
1286,1287,10805,0.0,2.6480555555556,74,4404,0 | |||||
1287,1288,10996,0.0,2.6077777777777995,68,4072,0 | |||||
1288,1289,11327,0.05363316840061,2.6069444444444,67,4182,0 | |||||
1289,1290,11090,0.26818151064716,2.6908333333333,51,3351,0 | |||||
1290,1291,10578,0.21887772653901,2.9019444444444003,39,4183,0 | |||||
1291,1292,10528,0.32371296573811,3.2711111111111,26,4068,0 | |||||
1292,1293,10475,0.12565805017257,3.5872222222222,25,8139,0 | |||||
1293,1294,10664,0.092277247744574,3.6913888888889,32,11000,0 | |||||
1294,1295,10513,0.077016875742983,3.6313888888889,17,2975,0 | |||||
1295,1296,9072,0.3714480797312501,3.5605555555556,19,2692,0 | |||||
1296,1297,9069,0.19332372237792,3.4402777777778,16,2502,0 | |||||
1297,1298,9089,0.06345811641554701,3.35,28,2510,0 | |||||
1298,1299,9027,0.2267121559473,3.3469444444444,24,2663,0 | |||||
1299,1300,8969,0.053072279964629,3.2708333333333,35,3575,0 | |||||
1300,1301,9073,0.13336345197744,3.2519444444444,49,2586,0 | |||||
1301,1302,8957,0.1252855094715,2.7311111111111,106,2908,0 | |||||
1302,1303,9126,0.096211952864224,2.3875,80,3530,0 | |||||
1303,1304,9122,0.096524467517755,2.0847222222222,90,2776,0 | |||||
1304,1305,9231,0.08924770147957402,2.0975,169,2962,0 | |||||
1305,1306,9368,0.11889606284162,2.1763888888889,98,3441,0 | |||||
1306,1307,9458,0.031429841710104,2.2327777777777995,92,4376,0 | |||||
1307,1308,9463,0.0,2.2725,91,3857,0 | |||||
1308,1309,9356,0.036512411627868,2.3202777777778,99,4685,0 | |||||
1309,1310,9340,0.0,2.5425,90,4585,0 | |||||
1310,1311,9340,0.0,2.5986111111111,126,3542,0 | |||||
1311,1312,9276,0.0,2.6319444444444,102,3370,0 | |||||
1312,1313,9611,0.10106696361212,2.5836111111111,132,3515,0 | |||||
1313,1314,9532,0.14854949043035,2.675,88,3793,0 | |||||
1314,1315,9156,0.08612162048398897,2.8522222222222,135,2954,0 | |||||
1315,1316,9222,0.16494200410492002,3.1302777777778,114,2627,0 | |||||
1316,1317,9282,0.28637713141253,3.4805555555556,35,2550,0 | |||||
1317,1318,9573,0.13206535647488,3.5994444444444,24,2480,0 | |||||
1318,1319,9333,0.27364025607799,3.5847222222222,44,2521,0 | |||||
1319,1320,9987,0.38382339961227,3.4963888888889,26,2860,0 | |||||
1320,1321,10133,0.08426242877623301,3.3825,37,3675,0 | |||||
1321,1322,10010,0.3290413568025901,3.2694444444444,45,2704,0 | |||||
1322,1323,10028,0.22632868808708,3.2322222222222,42,3121,0 | |||||
1323,1324,9984,0.17914189971361,3.1936111111111005,47,2603,0 | |||||
1324,1325,10041,0.30046815361859003,3.0536111111111004,34,3984,0 | |||||
1325,1326,10072,0.22650915594248,2.7819444444444,56,2537,0 | |||||
1326,1327,10025,0.0,2.4152777777778,87,3349,0 | |||||
1327,1328,10116,0.1223093269317,2.1569444444444,74,3958,0 | |||||
1328,1329,10232,0.1696074188221,2.1125,90,4243,0 | |||||
1329,1330,10516,0.0,2.1833333333333003,79,4159,0 | |||||
1330,1331,10449,0.028193633007367,2.205,97,5637,0 | |||||
1331,1332,10598,0.0,2.1697222222222,90,8142,0 | |||||
1332,1333,10337,0.0,2.3075,77,5713,0 | |||||
1333,1334,10469,0.097305232437507,2.4575,101,3668,0 | |||||
1334,1335,10426,0.11905908868379,2.6077777777777995,74,4307,0 | |||||
1335,1336,10531,0.11660374103282,2.6275,439,4354,0 | |||||
1336,1337,10875,0.060474297756584014,2.6144444444444,79,4262,0 | |||||
1337,1338,10494,0.22568442027805,2.6477777777777995,165,3446,0 | |||||
1338,1339,10195,0.14077736537045002,2.8594444444444003,139,2677,0 | |||||
1339,1340,9918,0.1924574892026,3.2675,56,4450,0 | |||||
1340,1341,9889,0.18922597300629,3.5136111111111004,102,3044,0 | |||||
1341,1342,9947,0.041593949118095004,3.5725,101,3428,0 | |||||
1342,1343,9977,0.2502095174271,3.6863888888889,41,2845,0 | |||||
1343,1344,10835,0.18663972932643,3.5636111111111,94,2781,0 | |||||
1344,1345,10765,0.07351854082400297,3.4127777777778,116,2743,0 | |||||
1345,1346,10656,0.081949111399618,3.295,94,4470,0 | |||||
1346,1347,10485,0.20148511394009,3.2666666666667004,89,2596,0 | |||||
1347,1348,10681,0.11515101921294,3.1933333333333,141,3249,0 | |||||
1348,1349,10852,0.07797276382811,3.0688888888889,167,2529,0 | |||||
1349,1350,10728,0.07244862879413201,2.8102777777778,148,2452,0 | |||||
1350,1351,10874,0.07310929970435699,2.42,105,2934,0 | |||||
1351,1352,10964,0.066868365737218,2.1358333333333,210,3159,0 | |||||
1352,1353,10984,0.05788512501593701,1.9916666666667,145,3974,0 | |||||
1353,1354,11055,0.09727414207464803,2.0947222222222,136,4305,0 | |||||
1354,1355,11233,0.033270317741558,2.1591666666667,126,5012,0 | |||||
1355,1356,11161,0.0,2.2377777777778,157,4455,0 | |||||
1356,1357,10966,0.038270957919533,2.2511111111111,105,4108,0 | |||||
1357,1358,11193,0.08728058888363299,2.4208333333333,114,4339,0 | |||||
1358,1359,11167,0.10536774813238,2.5241666666667,104,5056,0 | |||||
1359,1360,11367,0.1233991317089,2.5794444444444,69,5573,0 | |||||
1360,1361,51251,0.042565915766552,2.5936111111111,75,3366,1 | |||||
1361,1362,17953,0.23147422367229,2.6830555555556,73,2559,1 | |||||
1362,1363,170029,0.08983405162538903,2.8188888888889,74,1999,1 | |||||
1363,1364,10955,0.07464756469365201,2.9513888888888995,126,1993,0 | |||||
1364,1365,10984,0.099244104918934,3.2830555555556,67,1913,0 | |||||
1365,1366,10964,0.11535172009194,3.4819444444444,32,1760,0 | |||||
1366,1367,10980,0.21774881707852,3.5886111111111005,38,1890,0 | |||||
1367,1368,10852,0.1305066423559,3.4836111111111,34,2469,0 | |||||
1368,1369,10786,0.10054853030204,3.3955555555556,36,2133,0 | |||||
1369,1370,10841,0.02468393737575,3.2847222222222,26,3359,0 | |||||
1370,1371,10762,0.10018007414459,3.2383333333332995,74,3783,0 | |||||
1371,1372,10419,0.12522619841308,3.2188888888889,85,1809,0 | |||||
1372,1373,10467,0.11781887197077,2.9483333333333,67,2143,0 | |||||
1373,1374,10502,0.13417256350298,2.5855555555556,84,2567,0 | |||||
1374,1375,10519,0.07474686582090599,2.3005555555556003,1630,2176,0 | |||||
1375,1376,10579,0.13570963056519,2.0855555555556,1435,1929,0 | |||||
1376,1377,10502,0.076431907457478,1.9027777777778,857,2244,0 | |||||
1377,1378,10661,0.0,1.9411111111111,31,1810,0 | |||||
1378,1379,10818,0.1936428046839,2.0444444444444,500,2088,0 | |||||
1379,1380,10918,0.052826773889684014,2.1363888888889,53,2371,0 | |||||
1380,1381,10871,0.0,2.22,61,1843,0 | |||||
1381,1382,10796,0.054466597481213,2.3530555555556,158,2668,0 | |||||
1382,1383,10774,0.057459020289436,2.545,184,2309,0 | |||||
1383,1384,10898,0.28750562005936,2.6202777777778,91,1998,0 | |||||
1384,1385,11442,0.075538554674309,2.6847222222222,60,2480,0 | |||||
1385,1386,11113,0.08112608570492501,2.6591666666667004,107,2147,0 | |||||
1386,1387,10888,0.21563803296368,2.7863888888888995,5157,1802,0 | |||||
1387,1388,10894,0.095725002305685,3.0269444444444003,28,1789,0 | |||||
1388,1389,10888,0.17516056892320994,3.3227777777778,24,1999,0 | |||||
1389,1390,10896,0.32902836018586,3.6097222222222,21,2142,0 | |||||
1390,1391,10800,0.10216065221678,3.6805555555556,12,1904,0 | |||||
1391,1392,11000,0.19741931250852,3.6075,24,1876,0 | |||||
1392,1393,10985,0.10149107903671,3.4091666666667004,17,2434,0 | |||||
1393,1394,11017,0.17479255893624,3.3666666666667004,48,2472,0 | |||||
1394,1395,10863,0.034385029573777,3.3158333333333,41,1744,0 | |||||
1395,1396,10875,0.21988771218053,3.1622222222222,1088,2404,0 | |||||
1396,1397,10987,0.10149107903671,3.1086111111111,68,1971,0 | |||||
1397,1398,10778,0.10269981175445,2.6552777777778,2575,1713,0 | |||||
1398,1399,10957,0.11258759940039,2.2730555555556,4688,1765,0 | |||||
1399,1400,10832,0.13022351806001,2.0591666666667,477,3156,0 |
@@ -1,65 +0,0 @@ | |||||
{ | |||||
"about": { | |||||
"problemID": "yahoo_sub_5_problem", | |||||
"problemName": "yahoo_sub_5_problem", | |||||
"problemDescription": "Anomaly detection", | |||||
"problemVersion": "4.0.0", | |||||
"problemSchemaVersion": "4.0.0", | |||||
"taskKeywords": [ | |||||
"classification", | |||||
"binary", | |||||
"tabular" | |||||
] | |||||
}, | |||||
"inputs": { | |||||
"data": [ | |||||
{ | |||||
"datasetID": "yahoo_sub_5_dataset", | |||||
"targets": [ | |||||
{ | |||||
"targetIndex": 0, | |||||
"resID": "learningData", | |||||
"colIndex": 7, | |||||
"colName": "ground_truth" | |||||
} | |||||
] | |||||
} | |||||
], | |||||
"dataSplits": { | |||||
"method": "holdOut", | |||||
"testSize": 0.2, | |||||
"stratified": true, | |||||
"numRepeats": 0, | |||||
"randomSeed": 42, | |||||
"splitsFile": "dataSplits.csv", | |||||
"datasetViewMaps": { | |||||
"train": [ | |||||
{ | |||||
"from": "yahoo_sub_5_dataset", | |||||
"to": "yahoo_sub_5_dataset_TRAIN" | |||||
} | |||||
], | |||||
"test": [ | |||||
{ | |||||
"from": "yahoo_sub_5_dataset", | |||||
"to": "yahoo_sub_5_dataset_TEST" | |||||
} | |||||
], | |||||
"score": [ | |||||
{ | |||||
"from": "yahoo_sub_5_dataset", | |||||
"to": "yahoo_sub_5_dataset_SCORE" | |||||
} | |||||
] | |||||
} | |||||
}, | |||||
"performanceMetrics": [ | |||||
{ | |||||
"metric": "f1Macro" | |||||
} | |||||
] | |||||
}, | |||||
"expectedOutputs": { | |||||
"predictionsFile": "predictions.csv" | |||||
} | |||||
} |
@@ -1,95 +0,0 @@ | |||||
{ | |||||
"about": { | |||||
"datasetID": "yahoo_sub_5_dataset_TEST", | |||||
"datasetName": "NULL", | |||||
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", | |||||
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", | |||||
"license": " CC Public Domain Mark 1.0 ", | |||||
"source": "OpenML", | |||||
"sourceURI": "http://www.openml.org/d/185", | |||||
"approximateSize": "", | |||||
"datasetSchemaVersion": "4.0.0", | |||||
"redacted": false, | |||||
"datasetVersion": "4.0.0" | |||||
}, | |||||
"dataResources": [ | |||||
{ | |||||
"resID": "learningData", | |||||
"resPath": "tables/learningData.csv", | |||||
"resType": "table", | |||||
"resFormat": { | |||||
"text/csv": [ | |||||
"csv" | |||||
] | |||||
}, | |||||
"isCollection": false, | |||||
"columns": [ | |||||
{ | |||||
"colIndex": 0, | |||||
"colName": "d3mIndex", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"index" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 1, | |||||
"colName": "timestamp", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 2, | |||||
"colName": "value_0", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 3, | |||||
"colName": "value_1", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 4, | |||||
"colName": "value_2", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 5, | |||||
"colName": "value_3", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 6, | |||||
"colName": "value_4", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 7, | |||||
"colName": "ground_truth", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"suggestedTarget" | |||||
] | |||||
} | |||||
], | |||||
"columnsCount": 8 | |||||
} | |||||
] | |||||
} |
@@ -1,141 +0,0 @@ | |||||
d3mIndex,timestamp,value_0,value_1,value_2,value_3,value_4,ground_truth | |||||
1260,1261,7782,0.034280386319742985,2.5072222222222003,104,3119,0 | |||||
1261,1262,7829,0.039360296791109,2.5927777777778,82,3590,0 | |||||
1262,1263,7902,0.0,2.6894444444444,208,3893,0 | |||||
1263,1264,8039,0.038944065994356014,2.6291666666667,92,3264,0 | |||||
1264,1265,8350,0.18176011684739,2.6469444444444,53,3963,0 | |||||
1265,1266,8142,0.18521047165852,2.7461111111111003,65,2757,0 | |||||
1266,1267,7886,0.13079770999921,2.9363888888889,62,2306,0 | |||||
1267,1268,7743,0.13310058077443,3.2797222222222,73,2549,0 | |||||
1268,1269,7707,0.054750658073534006,3.5194444444444,84,2212,0 | |||||
1269,1270,7726,0.030588852697706,3.8130555555556,90,2286,0 | |||||
1270,1271,7717,0.12998124134227002,3.7941666666667,80,2979,0 | |||||
1271,1272,10331,0.09100057249197198,3.6086111111111,90,3158,0 | |||||
1272,1273,10515,0.19464543002904006,3.3858333333333,84,2645,0 | |||||
1273,1274,10415,0.22178651521516,3.3336111111111,34,3161,0 | |||||
1274,1275,10387,0.22983578430825,3.3116666666667003,67,4460,0 | |||||
1275,1276,10471,0.298229429356,3.2616666666667005,74,2630,0 | |||||
1276,1277,10385,0.12923377484588,3.0044444444444003,44,2593,0 | |||||
1277,1278,10439,0.19609416059774,2.6741666666667,64,2625,0 | |||||
1278,1279,10516,0.040518533819385014,2.3191666666667,70,4834,0 | |||||
1279,1280,10587,0.07099894663641,2.0597222222222,96,4056,0 | |||||
1280,1281,10586,0.07584150637714701,2.0547222222222,110,5713,0 | |||||
1281,1282,10684,0.08180100127782801,2.1511111111111,68,3940,0 | |||||
1282,1283,10880,0.0,2.2602777777778,90,4414,0 | |||||
1283,1284,10830,0.0,2.2883333333333,90,5044,0 | |||||
1284,1285,10794,0.09140162014739303,2.3736111111111,69,3894,0 | |||||
1285,1286,10843,0.0,2.5869444444444,46,3993,0 | |||||
1286,1287,10805,0.0,2.6480555555556,74,4404,0 | |||||
1287,1288,10996,0.0,2.6077777777777995,68,4072,0 | |||||
1288,1289,11327,0.05363316840061,2.6069444444444,67,4182,0 | |||||
1289,1290,11090,0.26818151064716,2.6908333333333,51,3351,0 | |||||
1290,1291,10578,0.21887772653901,2.9019444444444003,39,4183,0 | |||||
1291,1292,10528,0.32371296573811,3.2711111111111,26,4068,0 | |||||
1292,1293,10475,0.12565805017257,3.5872222222222,25,8139,0 | |||||
1293,1294,10664,0.092277247744574,3.6913888888889,32,11000,0 | |||||
1294,1295,10513,0.077016875742983,3.6313888888889,17,2975,0 | |||||
1295,1296,9072,0.3714480797312501,3.5605555555556,19,2692,0 | |||||
1296,1297,9069,0.19332372237792,3.4402777777778,16,2502,0 | |||||
1297,1298,9089,0.06345811641554701,3.35,28,2510,0 | |||||
1298,1299,9027,0.2267121559473,3.3469444444444,24,2663,0 | |||||
1299,1300,8969,0.053072279964629,3.2708333333333,35,3575,0 | |||||
1300,1301,9073,0.13336345197744,3.2519444444444,49,2586,0 | |||||
1301,1302,8957,0.1252855094715,2.7311111111111,106,2908,0 | |||||
1302,1303,9126,0.096211952864224,2.3875,80,3530,0 | |||||
1303,1304,9122,0.096524467517755,2.0847222222222,90,2776,0 | |||||
1304,1305,9231,0.08924770147957402,2.0975,169,2962,0 | |||||
1305,1306,9368,0.11889606284162,2.1763888888889,98,3441,0 | |||||
1306,1307,9458,0.031429841710104,2.2327777777777995,92,4376,0 | |||||
1307,1308,9463,0.0,2.2725,91,3857,0 | |||||
1308,1309,9356,0.036512411627868,2.3202777777778,99,4685,0 | |||||
1309,1310,9340,0.0,2.5425,90,4585,0 | |||||
1310,1311,9340,0.0,2.5986111111111,126,3542,0 | |||||
1311,1312,9276,0.0,2.6319444444444,102,3370,0 | |||||
1312,1313,9611,0.10106696361212,2.5836111111111,132,3515,0 | |||||
1313,1314,9532,0.14854949043035,2.675,88,3793,0 | |||||
1314,1315,9156,0.08612162048398897,2.8522222222222,135,2954,0 | |||||
1315,1316,9222,0.16494200410492002,3.1302777777778,114,2627,0 | |||||
1316,1317,9282,0.28637713141253,3.4805555555556,35,2550,0 | |||||
1317,1318,9573,0.13206535647488,3.5994444444444,24,2480,0 | |||||
1318,1319,9333,0.27364025607799,3.5847222222222,44,2521,0 | |||||
1319,1320,9987,0.38382339961227,3.4963888888889,26,2860,0 | |||||
1320,1321,10133,0.08426242877623301,3.3825,37,3675,0 | |||||
1321,1322,10010,0.3290413568025901,3.2694444444444,45,2704,0 | |||||
1322,1323,10028,0.22632868808708,3.2322222222222,42,3121,0 | |||||
1323,1324,9984,0.17914189971361,3.1936111111111005,47,2603,0 | |||||
1324,1325,10041,0.30046815361859003,3.0536111111111004,34,3984,0 | |||||
1325,1326,10072,0.22650915594248,2.7819444444444,56,2537,0 | |||||
1326,1327,10025,0.0,2.4152777777778,87,3349,0 | |||||
1327,1328,10116,0.1223093269317,2.1569444444444,74,3958,0 | |||||
1328,1329,10232,0.1696074188221,2.1125,90,4243,0 | |||||
1329,1330,10516,0.0,2.1833333333333003,79,4159,0 | |||||
1330,1331,10449,0.028193633007367,2.205,97,5637,0 | |||||
1331,1332,10598,0.0,2.1697222222222,90,8142,0 | |||||
1332,1333,10337,0.0,2.3075,77,5713,0 | |||||
1333,1334,10469,0.097305232437507,2.4575,101,3668,0 | |||||
1334,1335,10426,0.11905908868379,2.6077777777777995,74,4307,0 | |||||
1335,1336,10531,0.11660374103282,2.6275,439,4354,0 | |||||
1336,1337,10875,0.060474297756584014,2.6144444444444,79,4262,0 | |||||
1337,1338,10494,0.22568442027805,2.6477777777777995,165,3446,0 | |||||
1338,1339,10195,0.14077736537045002,2.8594444444444003,139,2677,0 | |||||
1339,1340,9918,0.1924574892026,3.2675,56,4450,0 | |||||
1340,1341,9889,0.18922597300629,3.5136111111111004,102,3044,0 | |||||
1341,1342,9947,0.041593949118095004,3.5725,101,3428,0 | |||||
1342,1343,9977,0.2502095174271,3.6863888888889,41,2845,0 | |||||
1343,1344,10835,0.18663972932643,3.5636111111111,94,2781,0 | |||||
1344,1345,10765,0.07351854082400297,3.4127777777778,116,2743,0 | |||||
1345,1346,10656,0.081949111399618,3.295,94,4470,0 | |||||
1346,1347,10485,0.20148511394009,3.2666666666667004,89,2596,0 | |||||
1347,1348,10681,0.11515101921294,3.1933333333333,141,3249,0 | |||||
1348,1349,10852,0.07797276382811,3.0688888888889,167,2529,0 | |||||
1349,1350,10728,0.07244862879413201,2.8102777777778,148,2452,0 | |||||
1350,1351,10874,0.07310929970435699,2.42,105,2934,0 | |||||
1351,1352,10964,0.066868365737218,2.1358333333333,210,3159,0 | |||||
1352,1353,10984,0.05788512501593701,1.9916666666667,145,3974,0 | |||||
1353,1354,11055,0.09727414207464803,2.0947222222222,136,4305,0 | |||||
1354,1355,11233,0.033270317741558,2.1591666666667,126,5012,0 | |||||
1355,1356,11161,0.0,2.2377777777778,157,4455,0 | |||||
1356,1357,10966,0.038270957919533,2.2511111111111,105,4108,0 | |||||
1357,1358,11193,0.08728058888363299,2.4208333333333,114,4339,0 | |||||
1358,1359,11167,0.10536774813238,2.5241666666667,104,5056,0 | |||||
1359,1360,11367,0.1233991317089,2.5794444444444,69,5573,0 | |||||
1360,1361,51251,0.042565915766552,2.5936111111111,75,3366,1 | |||||
1361,1362,17953,0.23147422367229,2.6830555555556,73,2559,1 | |||||
1362,1363,170029,0.08983405162538903,2.8188888888889,74,1999,1 | |||||
1363,1364,10955,0.07464756469365201,2.9513888888888995,126,1993,0 | |||||
1364,1365,10984,0.099244104918934,3.2830555555556,67,1913,0 | |||||
1365,1366,10964,0.11535172009194,3.4819444444444,32,1760,0 | |||||
1366,1367,10980,0.21774881707852,3.5886111111111005,38,1890,0 | |||||
1367,1368,10852,0.1305066423559,3.4836111111111,34,2469,0 | |||||
1368,1369,10786,0.10054853030204,3.3955555555556,36,2133,0 | |||||
1369,1370,10841,0.02468393737575,3.2847222222222,26,3359,0 | |||||
1370,1371,10762,0.10018007414459,3.2383333333332995,74,3783,0 | |||||
1371,1372,10419,0.12522619841308,3.2188888888889,85,1809,0 | |||||
1372,1373,10467,0.11781887197077,2.9483333333333,67,2143,0 | |||||
1373,1374,10502,0.13417256350298,2.5855555555556,84,2567,0 | |||||
1374,1375,10519,0.07474686582090599,2.3005555555556003,1630,2176,0 | |||||
1375,1376,10579,0.13570963056519,2.0855555555556,1435,1929,0 | |||||
1376,1377,10502,0.076431907457478,1.9027777777778,857,2244,0 | |||||
1377,1378,10661,0.0,1.9411111111111,31,1810,0 | |||||
1378,1379,10818,0.1936428046839,2.0444444444444,500,2088,0 | |||||
1379,1380,10918,0.052826773889684014,2.1363888888889,53,2371,0 | |||||
1380,1381,10871,0.0,2.22,61,1843,0 | |||||
1381,1382,10796,0.054466597481213,2.3530555555556,158,2668,0 | |||||
1382,1383,10774,0.057459020289436,2.545,184,2309,0 | |||||
1383,1384,10898,0.28750562005936,2.6202777777778,91,1998,0 | |||||
1384,1385,11442,0.075538554674309,2.6847222222222,60,2480,0 | |||||
1385,1386,11113,0.08112608570492501,2.6591666666667004,107,2147,0 | |||||
1386,1387,10888,0.21563803296368,2.7863888888888995,5157,1802,0 | |||||
1387,1388,10894,0.095725002305685,3.0269444444444003,28,1789,0 | |||||
1388,1389,10888,0.17516056892320994,3.3227777777778,24,1999,0 | |||||
1389,1390,10896,0.32902836018586,3.6097222222222,21,2142,0 | |||||
1390,1391,10800,0.10216065221678,3.6805555555556,12,1904,0 | |||||
1391,1392,11000,0.19741931250852,3.6075,24,1876,0 | |||||
1392,1393,10985,0.10149107903671,3.4091666666667004,17,2434,0 | |||||
1393,1394,11017,0.17479255893624,3.3666666666667004,48,2472,0 | |||||
1394,1395,10863,0.034385029573777,3.3158333333333,41,1744,0 | |||||
1395,1396,10875,0.21988771218053,3.1622222222222,1088,2404,0 | |||||
1396,1397,10987,0.10149107903671,3.1086111111111,68,1971,0 | |||||
1397,1398,10778,0.10269981175445,2.6552777777778,2575,1713,0 | |||||
1398,1399,10957,0.11258759940039,2.2730555555556,4688,1765,0 | |||||
1399,1400,10832,0.13022351806001,2.0591666666667,477,3156,0 |
@@ -1,65 +0,0 @@ | |||||
{ | |||||
"about": { | |||||
"problemID": "yahoo_sub_5_problem", | |||||
"problemName": "yahoo_sub_5_problem", | |||||
"problemDescription": "Anomaly detection", | |||||
"problemVersion": "4.0.0", | |||||
"problemSchemaVersion": "4.0.0", | |||||
"taskKeywords": [ | |||||
"classification", | |||||
"binary", | |||||
"tabular" | |||||
] | |||||
}, | |||||
"inputs": { | |||||
"data": [ | |||||
{ | |||||
"datasetID": "yahoo_sub_5_dataset", | |||||
"targets": [ | |||||
{ | |||||
"targetIndex": 0, | |||||
"resID": "learningData", | |||||
"colIndex": 7, | |||||
"colName": "ground_truth" | |||||
} | |||||
] | |||||
} | |||||
], | |||||
"dataSplits": { | |||||
"method": "holdOut", | |||||
"testSize": 0.2, | |||||
"stratified": true, | |||||
"numRepeats": 0, | |||||
"randomSeed": 42, | |||||
"splitsFile": "dataSplits.csv", | |||||
"datasetViewMaps": { | |||||
"train": [ | |||||
{ | |||||
"from": "yahoo_sub_5_dataset", | |||||
"to": "yahoo_sub_5_dataset_TRAIN" | |||||
} | |||||
], | |||||
"test": [ | |||||
{ | |||||
"from": "yahoo_sub_5_dataset", | |||||
"to": "yahoo_sub_5_dataset_TEST" | |||||
} | |||||
], | |||||
"score": [ | |||||
{ | |||||
"from": "yahoo_sub_5_dataset", | |||||
"to": "yahoo_sub_5_dataset_SCORE" | |||||
} | |||||
] | |||||
} | |||||
}, | |||||
"performanceMetrics": [ | |||||
{ | |||||
"metric": "f1Macro" | |||||
} | |||||
] | |||||
}, | |||||
"expectedOutputs": { | |||||
"predictionsFile": "predictions.csv" | |||||
} | |||||
} |
@@ -1,95 +0,0 @@ | |||||
{ | |||||
"about": { | |||||
"datasetID": "yahoo_sub_5_dataset_TRAIN", | |||||
"datasetName": "NULL", | |||||
"description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", | |||||
"citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", | |||||
"license": " CC Public Domain Mark 1.0 ", | |||||
"source": "OpenML", | |||||
"sourceURI": "http://www.openml.org/d/185", | |||||
"approximateSize": "", | |||||
"datasetSchemaVersion": "4.0.0", | |||||
"redacted": false, | |||||
"datasetVersion": "4.0.0" | |||||
}, | |||||
"dataResources": [ | |||||
{ | |||||
"resID": "learningData", | |||||
"resPath": "tables/learningData.csv", | |||||
"resType": "table", | |||||
"resFormat": { | |||||
"text/csv": [ | |||||
"csv" | |||||
] | |||||
}, | |||||
"isCollection": false, | |||||
"columns": [ | |||||
{ | |||||
"colIndex": 0, | |||||
"colName": "d3mIndex", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"index" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 1, | |||||
"colName": "timestamp", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 2, | |||||
"colName": "value_0", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 3, | |||||
"colName": "value_1", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 4, | |||||
"colName": "value_2", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 5, | |||||
"colName": "value_3", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 6, | |||||
"colName": "value_4", | |||||
"colType": "real", | |||||
"role": [ | |||||
"attribute" | |||||
] | |||||
}, | |||||
{ | |||||
"colIndex": 7, | |||||
"colName": "ground_truth", | |||||
"colType": "integer", | |||||
"role": [ | |||||
"suggestedTarget" | |||||
] | |||||
} | |||||
], | |||||
"columnsCount": 8 | |||||
} | |||||
] | |||||
} |
@@ -1,65 +0,0 @@ | |||||
{ | |||||
"about": { | |||||
"problemID": "yahoo_sub_5_problem", | |||||
"problemName": "yahoo_sub_5_problem", | |||||
"problemDescription": "Anomaly detection", | |||||
"problemVersion": "4.0.0", | |||||
"problemSchemaVersion": "4.0.0", | |||||
"taskKeywords": [ | |||||
"classification", | |||||
"binary", | |||||
"tabular" | |||||
] | |||||
}, | |||||
"inputs": { | |||||
"data": [ | |||||
{ | |||||
"datasetID": "yahoo_sub_5_dataset", | |||||
"targets": [ | |||||
{ | |||||
"targetIndex": 0, | |||||
"resID": "learningData", | |||||
"colIndex": 7, | |||||
"colName": "ground_truth" | |||||
} | |||||
] | |||||
} | |||||
], | |||||
"dataSplits": { | |||||
"method": "holdOut", | |||||
"testSize": 0.2, | |||||
"stratified": true, | |||||
"numRepeats": 0, | |||||
"randomSeed": 42, | |||||
"splitsFile": "dataSplits.csv", | |||||
"datasetViewMaps": { | |||||
"train": [ | |||||
{ | |||||
"from": "yahoo_sub_5_dataset", | |||||
"to": "yahoo_sub_5_dataset_TRAIN" | |||||
} | |||||
], | |||||
"test": [ | |||||
{ | |||||
"from": "yahoo_sub_5_dataset", | |||||
"to": "yahoo_sub_5_dataset_TEST" | |||||
} | |||||
], | |||||
"score": [ | |||||
{ | |||||
"from": "yahoo_sub_5_dataset", | |||||
"to": "yahoo_sub_5_dataset_SCORE" | |||||
} | |||||
] | |||||
} | |||||
}, | |||||
"performanceMetrics": [ | |||||
{ | |||||
"metric": "f1Macro" | |||||
} | |||||
] | |||||
}, | |||||
"expectedOutputs": { | |||||
"predictionsFile": "predictions.csv" | |||||
} | |||||
} |