Former-commit-id:master6f3f9d8494
[formerly07a7c27b2d
] [formerly8dc213a3a8
[formerlyee348c5b13
]] [formerly9f9acfb04e
[formerlyb5634a309b
] [formerly9f1063cac7
[formerly2abbd1ffe5
]]] [formerlyc3960dc4c3
[formerlye63a7bcb10
] [formerly40fad58471
[formerly2631617648
]] [formerlyd3374fdb78
[formerlyc1bfb57045
] [formerly011265a5b4
[formerlyb92f8aeaa6
]]]] [formerly9e7f566275
[formerly931e17f3fc
] [formerly4077e6d78e
[formerly54ccce531a
]] [formerly86a4e4a232
[formerlye2f83920e7
] [formerly91b42cffdf
[formerlyc94191f277
]]] [formerlybb5acec0b6
[formerlyfd17fb3a4e
] [formerly45db33b756
[formerly2e69351cdc
]] [formerly7eec793aea
[formerlyc2487bfedc
] [formerly19d42fcb84
[formerlya396098c45
]]]]] [formerly0b480f9ad6
[formerly689b9334cb
] [formerlyf42a306816
[formerlya5dab6c16f
]] [formerlyea2c8236ac
[formerlydfecbd50f0
] [formerly38bec5b443
[formerly307375c9b3
]]] [formerlyf6de8114e6
[formerly795e15c17a
] [formerly362dfcfa83
[formerly4760fcd7bb
]] [formerly90c615b5bb
[formerly8576464963
] [formerlyc78d8645f0
[formerlyd5e3b90ba0
]]]] [formerly43099f3d32
[formerlyc96a5f5a61
] [formerly4aea2330a1
[formerlye6be4547db
]] [formerlyc25ba7a2e0
[formerly5458af4f61
] [formerly2d2ec6153f
[formerly0a85a3208b
]]] [formerlyfd69fe4476
[formerly56ce765dfc
] [formerly262f73455b
[formerly3da8c2462c
]] [formerlyd8a809546b
[formerlyb00d8a8a15
] [formerly0308288cd2
[formerlyb546c87c7a
]]]]]] Former-commit-id:a4688064b3
[formerly6d2a8ff2e7
] [formerly59c747b73f
[formerlya3f73de2c4
]] [formerly8eb501ca14
[formerly60f7f02f4e
] [formerly5b1db973ed
[formerlyb9dec90eb8
]]] [formerlyab64338cca
[formerlyd31e2f6a92
] [formerly22e3131d9d
[formerly4e78060377
]] [formerly378b7c13de
[formerly317915c397
] [formerly678749741d
[formerlyf4993bf5ef
]]]] [formerly0c6f0d1ad7
[formerlyc816a51696
] [formerly0e94a24702
[formerly9279b59a26
]] [formerly90ffdd1ae8
[formerly710f2b7661
] [formerlyd16598c6ca
[formerly39e237df27
]]] [formerly34dd6fe1c9
[formerly3552294fc7
] [formerly59d655e2a0
[formerly82800fa139
]] [formerly3320d05e7b
[formerlya2b88f4e2e
] [formerly0308288cd2
]]]] Former-commit-id:3b60b9c05b
[formerly525640671a
] [formerlyb4c9f13430
[formerly69ca711dce
]] [formerlyd1f9656f37
[formerlyd746df9e15
] [formerlyd93051a807
[formerlyfc0662f426
]]] [formerly39a194fab9
[formerly9352b0db18
] [formerlyd64e767c5e
[formerly7aa934665d
]] [formerly27a08f3279
[formerlyc445b437d6
] [formerly6430e1f5a1
[formerly4c8718b6b5
]]]] Former-commit-id:d943b1b29e
[formerly2affc48dd3
] [formerlya90e8befbe
[formerlyf511df04ce
]] [formerly255f18ce12
[formerly4cce3a45ea
] [formerly5bd00c6593
[formerly829c5f5a2c
]]] Former-commit-id:ddd8ba810d
[formerly8b23c3eb13
] [formerly26b4bd122c
[formerly8d9436c171
]] Former-commit-id:09968fa0db
[formerlyf15bbd91f1
] Former-commit-id:8bddae74b8
@@ -1,12 +1,13 @@ | |||
# Automated Time-series Outlie Detection System | |||
This is a time-seried outlier detection system with automate machin learning. | |||
# Time-series Outlie Detection System | |||
TODS is a full-stack automated machine learning system for outlier detection on multivariate time-series data. TODS provides exahaustive modules for building machine learning-based outlier detection systems including: data processing, time series processing, feature analysis (extraction), detection algorithms, and reinforcement module. The functionalities provided via these modules including: data preprocessing for general purposes, time series data smoothing/transformation, extracting features from time/frequency domains, various detection algorithms, and involving human expertises to calibrate the system. Three common outlier detection scenarios on time-series data can be performed: point-wise detection (time points as outliers), pattern-wise detection (subsequences as outliers), and system-wise detection (sets of time series as outliers), and wide-range of corresponding algorithms are provided in TODS. This package is developed by [DATA Lab @ Texas A&M University](https://people.engr.tamu.edu/xiahu/index.html). | |||
TODS is featured for: | |||
* **Full Sack Machine Learning System** which supports exhaustive components from preprocessings, feature extraction, detection algorithms and also human-in-the loop interface. | |||
* **Wide-range of Algorithms**, including all of the point-wise detection algorithms supported by [PyOD](https://github.com/yzhao062/pyod), state-of-the-art pattern-wise (collective) detection algorithms such as [DeepLog](https://www.cs.utah.edu/~lifeifei/papers/deeplog.pdf), [Telemanon](https://arxiv.org/pdf/1802.04431.pdf), and also various ensemble algorithms for performing system-wise detection. | |||
* **Automated Machine Learning** aims on providing knowledge-free process that construct optimal pipeline based on the given data by automatically searching the best combination from all of the existing modules. | |||
## Axolotl | |||
Running pre-defined pipeline | |||
``` | |||
python examples/build_AutoEncoder_pipeline.py | |||
python examples/run_predefined_pipeline.py | |||
``` | |||
## Installation | |||
@@ -44,100 +45,66 @@ cd .. | |||
There could be some missing dependencies that are not listed above. Try to fix it by yourself if you meet any. | |||
# Dataset | |||
Datasets are located in `datasets/anomaly`. `raw_data` is the raw time series data. `transform.py` is script to transform the raw data to D3M format. `template` includes some templates for generating D3M data. If you run `transform.py`, the script will load the raw `kpi` data and create a folder named `kpi` in D3M format. | |||
The generated csv file will have the following columns: `d3mIndex`, `timestamp`, `value`, `'ground_truth`. In the example kpi dataset, there is only one value. For other datasets there could be multiple values. The goal of the pipline is to predict the `ground_truth` based on `timestamp` and the value(s). | |||
# Examples | |||
Examples are available in [/examples](examples/). For basic usage, you can evaluate a pipeline on a given datasets. Here, we provide an example to load our default pipeline and evaluate it on a subset of yahoo dataset. | |||
```python | |||
import pandas as pd | |||
There is a nice script to check whether the dataset is in the right format. Run | |||
``` | |||
python3 datasets/validate.py datasets/anomaly/kpi/ | |||
``` | |||
The expected output is as follows: | |||
``` | |||
Validating problem '/home/grads/d/daochen/tods/tods/datasets/anomaly/kpi/SCORE/problem_TEST/problemDoc.json'. | |||
Validating dataset '/home/grads/d/daochen/tods/tods/datasets/anomaly/kpi/SCORE/dataset_TEST/datasetDoc.json'. | |||
Validating problem '/home/grads/d/daochen/tods/tods/datasets/anomaly/kpi/kpi_problem/problemDoc.json'. | |||
Validating problem '/home/grads/d/daochen/tods/tods/datasets/anomaly/kpi/TEST/problem_TEST/problemDoc.json'. | |||
Validating dataset '/home/grads/d/daochen/tods/tods/datasets/anomaly/kpi/TEST/dataset_TEST/datasetDoc.json'. | |||
Validating dataset '/home/grads/d/daochen/tods/tods/datasets/anomaly/kpi/kpi_dataset/datasetDoc.json'. | |||
Validating dataset '/home/grads/d/daochen/tods/tods/datasets/anomaly/kpi/TRAIN/dataset_TRAIN/datasetDoc.json'. | |||
Validating problem '/home/grads/d/daochen/tods/tods/datasets/anomaly/kpi/TRAIN/problem_TRAIN/problemDoc.json'. | |||
Validating all datasets and problems. | |||
There are no errors. | |||
``` | |||
Of course, you can also create other datasets with `transform.py`. But for now, we can focus on this example dataset since other datasets are usually in the same format. | |||
from tods import schemas as schemas_utils | |||
from tods.utils import generate_dataset_problem, evaluate_pipeline | |||
# Example | |||
In D3M, our goal is to provide a **solution** to a **problem** on a **dataset**. Here, solution is a pipline which consists of data processing, classifiers, etc. | |||
table_path = 'datasets/yahoo_sub_5.csv' | |||
target_index = 6 # what column is the target | |||
#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset | |||
time_limit = 30 # How many seconds you wanna search | |||
#metric = 'F1' # F1 on label 1 | |||
metric = 'F1_MACRO' # F1 on both label 0 and 1 | |||
Run the example to build the first pipline with | |||
``` | |||
python3 examples/build_iforest_pipline.py | |||
``` | |||
Note that we have not implemented iForest yet. This one is actually Random Forest. This will generate a file `pipline.yml`, which describes a pipline. We can run the pipeline on the example data in this repo as follows: | |||
``` | |||
python3 -m d3m runtime fit-produce -p pipeline.yml -r datasets/anomaly/kpi/TRAIN/problem_TRAIN/problemDoc.json -i datasets/anomaly/kpi/TRAIN/dataset_TRAIN/datasetDoc.json -t datasets/anomaly/kpi/TEST/dataset_TEST/datasetDoc.json -o results.csv -O pipeline_run.yml | |||
``` | |||
Another example on a subset of the sequences of Yahoo dataset is as follows: | |||
``` | |||
python3 -m d3m runtime fit-produce -p pipeline.yml -r datasets/anomaly/yahoo_sub_5/TRAIN/problem_TRAIN/problemDoc.json -i datasets/anomaly/yahoo_sub_5/TRAIN/dataset_TRAIN/datasetDoc.json -t datasets/anomaly/yahoo_sub_5/TEST/dataset_TEST/datasetDoc.json -o results.csv -O pipeline_run.yml | |||
``` | |||
The above commands will generate two files `results.csv` and `pipline_run.yml` | |||
# Read data and generate dataset and problem | |||
df = pd.read_csv(table_path) | |||
dataset, problem_description = generate_dataset_problem(df, target_index=target_index, metric=metric) | |||
# How to add a new primitive | |||
# Load the default pipeline | |||
pipeline = schemas_utils.load_default_pipeline() | |||
For new primitives, put them in `/anomaly_pritives`. There is an example for isolation forest (however, this is essentially a RandomForest, although the name is IsolationForest. We need more efforts to change it to real IsolationForest). | |||
In addition to add a new file, you need to register the promitive in `anomaly-primitives/setup.py` and rerun pip install. | |||
Use the following command to check whether your new primitives are registered: | |||
``` | |||
python3 -m d3m index search | |||
``` | |||
Test the new primitives: | |||
``` | |||
python3 examples/build_iforest_pipline.py | |||
# Run the pipeline | |||
pipeline_result = evaluate_pipeline(problem_description, dataset, pipeline) | |||
``` | |||
We also provide AutoML support to help you automatically find a good pipeline for a your data. | |||
```python | |||
import pandas as pd | |||
# Template for meta-data in primitives | |||
* `__author__`: `DATA Lab at Texas A&M University` | |||
* `name`: Just a name. Name your primitive with a few words | |||
* `python_path`: This path should have **5** segments. The first two segments should be `d3m.primitives`. The third segment shoulb be `anomaly_detection`, `data_preprocessing` or `feature_construction` (it should match `primitive_family`). The fourth segment should be your algorithm name, e.g., `isolation_forest`. Note that this name should also be added to [this file](d3m/d3m/metadata/primitive_names.py). The last segment should be one of `Preprocessing`, `Feature`, `Algorithm` (for now). | |||
* `source`: `name` should be `DATA Lab at Texas A&M University`, `contact` should be `mailto:khlai037@tamu.edu`, `uris` should have `https://gitlab.com/lhenry15/tods.git` and the path your py file. | |||
* `algorithms_types`: Name the primitive by your self and add it to [here](d3m/d3m/metadata/schemas/v0/definitions.json#L1957). **Then reinstall d3m.** Fill this field with `metadata_base.PrimitiveAlgorithmType.YOUR_NAME` | |||
* `primitive_family`: For preprocessing primitives, use `metadata_base.PrimitiveFamily.DATA_PREPROCESSING`. For feature analysis primitives, use `metadata_base.PrimitiveFamily.FEATURE_CONSTRUCTION`. For anomaly detection primitives, use `metadata_base.PrimitiveFamily.ANOMALY_DETECTION`. | |||
* `id`: Randomly generate one with `import uuid; uuid.uuid4()` | |||
* `hyperparameters_to_tune`: Specify what hyperparameters can be tuned in your primitive | |||
* `version`: `0.0.1` | |||
Notes: | |||
from axolotl.backend.simple import SimpleRunner | |||
1. `installation` is not required. We remove it. | |||
from tods.utils import generate_dataset_problem | |||
from tods.search import BruteForceSearch | |||
2. Try to reinstall everything if it does not work. | |||
# Some information | |||
#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_GOOG.csv' # The path of the dataset | |||
#target_index = 2 # what column is the target | |||
3. An example of fake Isolation Forest is [here](anomaly-primitives/anomaly_primitives/SKIsolationForest.py#L294) | |||
table_path = 'datasets/yahoo_sub_5.csv' | |||
target_index = 6 # what column is the target | |||
#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset | |||
time_limit = 30 # How many seconds you wanna search | |||
#metric = 'F1' # F1 on label 1 | |||
metric = 'F1_MACRO' # F1 on both label 0 and 1 | |||
# Read data and generate dataset and problem | |||
df = pd.read_csv(table_path) | |||
dataset, problem_description = generate_dataset_problem(df, target_index=target_index, metric=metric) | |||
## Resources of D3M | |||
# Start backend | |||
backend = SimpleRunner(random_seed=0) | |||
If you still have questions, you may refer to the following resources. | |||
# Start search algorithm | |||
search = BruteForceSearch(problem_description=problem_description, backend=backend) | |||
Dataset format [https://gitlab.com/datadrivendiscovery/data-supply](https://gitlab.com/datadrivendiscovery/data-supply) | |||
# Find the best pipeline | |||
best_runtime, best_pipeline_result = search.search_fit(input_data=[dataset], time_limit=time_limit) | |||
best_pipeline = best_runtime.pipeline | |||
best_output = best_pipeline_result.output | |||
Instructions for creating primitives [https://docs.datadrivendiscovery.org/v2020.1.9/interfaces.html](https://docs.datadrivendiscovery.org/v2020.1.9/interfaces.html) | |||
We use a stable version of d3m core package at [https://gitlab.com/datadrivendiscovery/d3m/-/tree/v2020.1.9](https://gitlab.com/datadrivendiscovery/d3m/-/tree/v2020.1.9). | |||
The documentation is at [https://docs.datadrivendiscovery.org/](https://docs.datadrivendiscovery.org/). | |||
The core package documentation is at [https://docs.datadrivendiscovery.org/v2020.1.9/index.html](https://docs.datadrivendiscovery.org/v2020.1.9/index.html) | |||
The common-primitives is v0.8.0 at [https://gitlab.com/datadrivendiscovery/common-primitives/-/tree/v0.8.0/common_primitives](https://gitlab.com/datadrivendiscovery/common-primitives/-/tree/v0.8.0/common_primitives) | |||
The sklearn-wrap uses dist branch [https://gitlab.com/datadrivendiscovery/sklearn-wrap/-/tree/dist](https://gitlab.com/datadrivendiscovery/sklearn-wrap/-/tree/dist) | |||
There are other primitives developed by many universities but are not used in this repo. See [https://gitlab.com/datadrivendiscovery/primitives](https://gitlab.com/datadrivendiscovery/primitives) | |||
# Evaluate the best pipeline | |||
best_scores = search.evaluate(best_pipeline).scores | |||
``` |
@@ -151,7 +151,7 @@ class PipelineSearchBase: | |||
logging.error('No solution founded') | |||
pipeline_result = PipelineResult(fitted_pipeline_id='') | |||
pipeline_result.error = RuntimeError("No solution found") | |||
return _, pipeline_result | |||
return None, pipeline_result | |||
return self.fit(best_pipeline.pipeline, input_data, expose_values) | |||
@@ -0,0 +1,68 @@ | |||
NAB Data Corpus | |||
--- | |||
Data are ordered, timestamped, single-valued metrics. All data files contain anomalies, unless otherwise noted. | |||
### Real data | |||
- realAWSCloudwatch/ | |||
AWS server metrics as collected by the AmazonCloudwatch service. Example metrics include CPU Utilization, Network Bytes In, and Disk Read Bytes. | |||
- realAdExchange/ | |||
Online advertisement clicking rates, where the metrics are cost-per-click (CPC) and cost per thousand impressions (CPM). One of the files is normal, without anomalies. | |||
- realKnownCause/ | |||
This is data for which we know the anomaly causes; no hand labeling. | |||
- ambient_temperature_system_failure.csv: The ambient temperature in an office | |||
setting. | |||
- cpu_utilization_asg_misconfiguration.csv: From Amazon Web Services (AWS) | |||
monitoring CPU usage – i.e. average CPU usage across a given cluster. When | |||
usage is high, AWS spins up a new machine, and uses fewer machines when usage | |||
is low. | |||
- ec2_request_latency_system_failure.csv: CPU usage data from a server in | |||
Amazon's East Coast datacenter. The dataset ends with complete system failure | |||
resulting from a documented failure of AWS API servers. There's an interesting | |||
story behind this data in the [Numenta | |||
blog](http://numenta.com/blog/anomaly-of-the-week.html). | |||
- machine_temperature_system_failure.csv: Temperature sensor data of an | |||
internal component of a large, industrial mahcine. The first anomaly is a | |||
planned shutdown of the machine. The second anomaly is difficult to detect and | |||
directly led to the third anomaly, a catastrophic failure of the machine. | |||
- nyc_taxi.csv: Number of NYC taxi passengers, where the five anomalies occur | |||
during the NYC marathon, Thanksgiving, Christmas, New Years day, and a snow | |||
storm. The raw data is from the [NYC Taxi and Limousine Commission](http://www.nyc.gov/html/tlc/html/about/trip_record_data.shtml). | |||
The data file included here consists of aggregating the total number of | |||
taxi passengers into 30 minute buckets. | |||
- rogue_agent_key_hold.csv: Timing the key holds for several users of a | |||
computer, where the anomalies represent a change in the user. | |||
- rogue_agent_key_updown.csv: Timing the key strokes for several users of a | |||
computer, where the anomalies represent a change in the user. | |||
- realTraffic/ | |||
Real time traffic data from the Twin Cities Metro area in Minnesota, collected | |||
by the | |||
[Minnesota Department of Transportation](http://www.dot.state.mn.us/tmc/trafficinfo/developers.html). | |||
Included metrics include occupancy, speed, and travel time from specific | |||
sensors. | |||
- realTweets/ | |||
A collection of Twitter mentions of large publicly-traded companies | |||
such as Google and IBM. The metric value represents the number of mentions | |||
for a given ticker symbol every 5 minutes. | |||
### Artificial data | |||
- artificialNoAnomaly/ | |||
Artificially-generated data without any anomalies. | |||
- artificialWithAnomaly/ | |||
Artificially-generated data with varying types of anomalies. |
@@ -0,0 +1,28 @@ | |||
import pandas as pd | |||
import json | |||
import os | |||
import time | |||
import datetime | |||
label_file = open('combined_labels.json', 'r') | |||
label_info = json.load(label_file) | |||
for key in label_info.keys(): | |||
df = pd.read_csv(key) | |||
fpath, fname = key.split('/')[0], key.split('/')[1] | |||
label = [] | |||
unix_timestamp = [] | |||
for _, row in df.iterrows(): | |||
if row['timestamp'] in list(label_info[key]): | |||
label.append('1') | |||
else: | |||
label.append('0') | |||
timestamp = datetime.datetime.strptime(row['timestamp'], '%Y-%m-%d %H:%M:%S').timestamp() | |||
unix_timestamp.append(timestamp) | |||
df['label'] = label | |||
df['timestamp'] = unix_timestamp | |||
df.to_csv(fpath+"/labeled_"+fname, index=False) | |||
#os.remove(key) | |||
@@ -0,0 +1,232 @@ | |||
{ | |||
"artificialNoAnomaly/art_daily_no_noise.csv": [], | |||
"artificialNoAnomaly/art_daily_perfect_square_wave.csv": [], | |||
"artificialNoAnomaly/art_daily_small_noise.csv": [], | |||
"artificialNoAnomaly/art_flatline.csv": [], | |||
"artificialNoAnomaly/art_noisy.csv": [], | |||
"artificialWithAnomaly/art_daily_flatmiddle.csv": [ | |||
"2014-04-11 00:00:00" | |||
], | |||
"artificialWithAnomaly/art_daily_jumpsdown.csv": [ | |||
"2014-04-11 09:00:00" | |||
], | |||
"artificialWithAnomaly/art_daily_jumpsup.csv": [ | |||
"2014-04-11 09:00:00" | |||
], | |||
"artificialWithAnomaly/art_daily_nojump.csv": [ | |||
"2014-04-11 09:00:00" | |||
], | |||
"artificialWithAnomaly/art_increase_spike_density.csv": [ | |||
"2014-04-07 23:10:00" | |||
], | |||
"artificialWithAnomaly/art_load_balancer_spikes.csv": [ | |||
"2014-04-11 04:35:00" | |||
], | |||
"realAWSCloudwatch/ec2_cpu_utilization_24ae8d.csv": [ | |||
"2014-02-26 22:05:00", | |||
"2014-02-27 17:15:00" | |||
], | |||
"realAWSCloudwatch/ec2_cpu_utilization_53ea38.csv": [ | |||
"2014-02-19 19:10:00", | |||
"2014-02-23 20:05:00" | |||
], | |||
"realAWSCloudwatch/ec2_cpu_utilization_5f5533.csv": [ | |||
"2014-02-19 00:22:00", | |||
"2014-02-24 18:37:00" | |||
], | |||
"realAWSCloudwatch/ec2_cpu_utilization_77c1ca.csv": [ | |||
"2014-04-09 10:15:00" | |||
], | |||
"realAWSCloudwatch/ec2_cpu_utilization_825cc2.csv": [ | |||
"2014-04-15 15:44:00", | |||
"2014-04-16 03:34:00" | |||
], | |||
"realAWSCloudwatch/ec2_cpu_utilization_ac20cd.csv": [ | |||
"2014-04-15 00:49:00" | |||
], | |||
"realAWSCloudwatch/ec2_cpu_utilization_c6585a.csv": [], | |||
"realAWSCloudwatch/ec2_cpu_utilization_fe7f93.csv": [ | |||
"2014-02-17 06:12:00", | |||
"2014-02-22 00:02:00", | |||
"2014-02-23 15:17:00" | |||
], | |||
"realAWSCloudwatch/ec2_disk_write_bytes_1ef3de.csv": [ | |||
"2014-03-10 21:09:00" | |||
], | |||
"realAWSCloudwatch/ec2_disk_write_bytes_c0d644.csv": [ | |||
"2014-04-09 01:30:00", | |||
"2014-04-10 14:35:00", | |||
"2014-04-13 03:00:00" | |||
], | |||
"realAWSCloudwatch/ec2_network_in_257a54.csv": [ | |||
"2014-04-15 16:44:00" | |||
], | |||
"realAWSCloudwatch/ec2_network_in_5abac7.csv": [ | |||
"2014-03-10 18:56:00", | |||
"2014-03-12 21:01:00" | |||
], | |||
"realAWSCloudwatch/elb_request_count_8c0756.csv": [ | |||
"2014-04-12 17:24:00", | |||
"2014-04-22 19:34:00" | |||
], | |||
"realAWSCloudwatch/grok_asg_anomaly.csv": [ | |||
"2014-01-20 08:30:00", | |||
"2014-01-21 10:45:00", | |||
"2014-01-29 00:45:00" | |||
], | |||
"realAWSCloudwatch/iio_us-east-1_i-a2eb1cd9_NetworkIn.csv": [ | |||
"2013-10-10 09:35:00", | |||
"2013-10-10 20:40:00" | |||
], | |||
"realAWSCloudwatch/rds_cpu_utilization_cc0c53.csv": [ | |||
"2014-02-25 07:15:00", | |||
"2014-02-27 00:50:00" | |||
], | |||
"realAWSCloudwatch/rds_cpu_utilization_e47b3b.csv": [ | |||
"2014-04-13 06:52:00", | |||
"2014-04-18 23:27:00" | |||
], | |||
"realAdExchange/exchange-2_cpc_results.csv": [ | |||
"2011-07-14 13:00:01" | |||
], | |||
"realAdExchange/exchange-2_cpm_results.csv": [ | |||
"2011-07-26 06:00:01", | |||
"2011-08-10 17:00:01" | |||
], | |||
"realAdExchange/exchange-3_cpc_results.csv": [ | |||
"2011-07-14 10:15:01", | |||
"2011-07-20 10:15:01", | |||
"2011-08-13 10:15:01" | |||
], | |||
"realAdExchange/exchange-3_cpm_results.csv": [ | |||
"2011-08-19 18:15:01" | |||
], | |||
"realAdExchange/exchange-4_cpc_results.csv": [ | |||
"2011-07-16 09:15:01", | |||
"2011-08-02 12:15:01", | |||
"2011-08-23 08:15:01" | |||
], | |||
"realAdExchange/exchange-4_cpm_results.csv": [ | |||
"2011-07-16 09:15:01", | |||
"2011-08-01 07:15:01", | |||
"2011-08-23 08:15:01", | |||
"2011-08-28 13:15:01" | |||
], | |||
"realKnownCause/ambient_temperature_system_failure.csv": [ | |||
"2013-12-22 20:00:00", | |||
"2014-04-13 09:00:00" | |||
], | |||
"realKnownCause/cpu_utilization_asg_misconfiguration.csv": [ | |||
"2014-07-12 02:04:00", | |||
"2014-07-14 21:44:00" | |||
], | |||
"realKnownCause/ec2_request_latency_system_failure.csv": [ | |||
"2014-03-14 09:06:00", | |||
"2014-03-18 22:41:00", | |||
"2014-03-21 03:01:00" | |||
], | |||
"realKnownCause/machine_temperature_system_failure.csv": [ | |||
"2013-12-11 06:00:00", | |||
"2013-12-16 17:25:00", | |||
"2014-01-28 13:55:00", | |||
"2014-02-08 14:30:00" | |||
], | |||
"realKnownCause/nyc_taxi.csv": [ | |||
"2014-11-01 19:00:00", | |||
"2014-11-27 15:30:00", | |||
"2014-12-25 15:00:00", | |||
"2015-01-01 01:00:00", | |||
"2015-01-27 00:00:00" | |||
], | |||
"realKnownCause/rogue_agent_key_hold.csv": [ | |||
"2014-07-15 08:30:00", | |||
"2014-07-17 09:50:00" | |||
], | |||
"realKnownCause/rogue_agent_key_updown.csv": [ | |||
"2014-07-15 04:00:00", | |||
"2014-07-17 08:50:00" | |||
], | |||
"realTraffic/TravelTime_387.csv": [ | |||
"2015-07-30 12:29:00", | |||
"2015-08-18 16:26:00", | |||
"2015-09-01 05:34:00" | |||
], | |||
"realTraffic/TravelTime_451.csv": [ | |||
"2015-08-11 12:07:00" | |||
], | |||
"realTraffic/occupancy_6005.csv": [ | |||
"2015-09-15 06:55:00" | |||
], | |||
"realTraffic/occupancy_t4013.csv": [ | |||
"2015-09-16 08:09:00", | |||
"2015-09-17 07:55:00" | |||
], | |||
"realTraffic/speed_6005.csv": [ | |||
"2015-09-17 07:00:00" | |||
], | |||
"realTraffic/speed_7578.csv": [ | |||
"2015-09-11 16:44:00", | |||
"2015-09-15 14:34:00", | |||
"2015-09-16 14:14:00", | |||
"2015-09-16 17:10:00" | |||
], | |||
"realTraffic/speed_t4013.csv": [ | |||
"2015-09-16 08:04:00", | |||
"2015-09-17 08:15:00" | |||
], | |||
"realTweets/Twitter_volume_AAPL.csv": [ | |||
"2015-03-03 21:07:53", | |||
"2015-03-09 17:32:53", | |||
"2015-03-16 02:57:53", | |||
"2015-03-31 03:27:53" | |||
], | |||
"realTweets/Twitter_volume_AMZN.csv": [ | |||
"2015-03-05 19:47:53", | |||
"2015-03-11 20:57:53", | |||
"2015-04-01 21:57:53", | |||
"2015-04-08 04:52:53" | |||
], | |||
"realTweets/Twitter_volume_CRM.csv": [ | |||
"2015-03-09 19:07:53", | |||
"2015-03-19 23:07:53", | |||
"2015-03-26 19:07:53" | |||
], | |||
"realTweets/Twitter_volume_CVS.csv": [ | |||
"2015-03-04 16:02:53", | |||
"2015-03-05 19:57:53", | |||
"2015-03-26 14:07:53", | |||
"2015-04-14 22:37:53" | |||
], | |||
"realTweets/Twitter_volume_FB.csv": [ | |||
"2015-03-16 07:07:53", | |||
"2015-04-03 17:47:53" | |||
], | |||
"realTweets/Twitter_volume_GOOG.csv": [ | |||
"2015-03-13 20:22:53", | |||
"2015-03-14 16:27:53", | |||
"2015-03-22 22:52:53", | |||
"2015-04-01 05:27:53" | |||
], | |||
"realTweets/Twitter_volume_IBM.csv": [ | |||
"2015-03-23 22:27:53", | |||
"2015-04-20 20:07:53" | |||
], | |||
"realTweets/Twitter_volume_KO.csv": [ | |||
"2015-03-20 13:12:53", | |||
"2015-04-08 23:42:53", | |||
"2015-04-14 14:52:53" | |||
], | |||
"realTweets/Twitter_volume_PFE.csv": [ | |||
"2015-03-02 21:22:53", | |||
"2015-03-04 10:32:53", | |||
"2015-03-13 19:57:53", | |||
"2015-04-07 23:42:53" | |||
], | |||
"realTweets/Twitter_volume_UPS.csv": [ | |||
"2015-03-03 00:27:53", | |||
"2015-03-04 11:07:53", | |||
"2015-03-05 15:22:53", | |||
"2015-03-24 18:17:53", | |||
"2015-03-29 16:27:53" | |||
] | |||
} |
@@ -0,0 +1 @@ | |||
948611b07519538ef036e0ec1c948f6bf97009cf |
@@ -0,0 +1 @@ | |||
428229640a5466e68014f74649a24f00abb1150b |
@@ -0,0 +1 @@ | |||
68dd1084ed091fb9affe45b4e0894250c6c62c07 |
@@ -0,0 +1 @@ | |||
7bebf0fe077dda56f789d644090faf1d2484913c |
@@ -0,0 +1 @@ | |||
59fdf3c2b8d171704e3de1e10d8ccfca72c8ab9a |
@@ -0,0 +1 @@ | |||
d20453833fc13c681f0b5f5a830f3aba52b774cd |
@@ -0,0 +1 @@ | |||
ed60bba6f53c779335874c39966b7d5e4309e2c3 |
@@ -0,0 +1 @@ | |||
f01b654d9a6a6ebc7efc65da240f83680de2131d |
@@ -0,0 +1 @@ | |||
8e0088d97641d6ab39b808fe03ac0a7ec9ea99b9 |
@@ -0,0 +1 @@ | |||
d72fffb08da82bb70ecc379bb1fa56316efda557 |
@@ -0,0 +1 @@ | |||
4c2f8543201c0a66e44815dee128d9044a41c382 |
@@ -0,0 +1 @@ | |||
25a0dd3110986418d379a887cc575f9fdc45a6da |
@@ -0,0 +1,20 @@ | |||
# Minimal makefile for Sphinx documentation | |||
# | |||
# You can set these variables from the command line, and also | |||
# from the environment for the first two. | |||
SPHINXOPTS ?= | |||
SPHINXBUILD ?= sphinx-build | |||
SOURCEDIR = source | |||
BUILDDIR = build | |||
# Put it first so that "make" without argument is like "make help". | |||
help: | |||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) | |||
.PHONY: help Makefile | |||
# Catch-all target: route all unknown targets to Sphinx using the new | |||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). | |||
%: Makefile | |||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) |
@@ -0,0 +1,35 @@ | |||
@ECHO OFF | |||
pushd %~dp0 | |||
REM Command file for Sphinx documentation | |||
if "%SPHINXBUILD%" == "" ( | |||
set SPHINXBUILD=sphinx-build | |||
) | |||
set SOURCEDIR=source | |||
set BUILDDIR=build | |||
if "%1" == "" goto help | |||
%SPHINXBUILD% >NUL 2>NUL | |||
if errorlevel 9009 ( | |||
echo. | |||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx | |||
echo.installed, then set the SPHINXBUILD environment variable to point | |||
echo.to the full path of the 'sphinx-build' executable. Alternatively you | |||
echo.may add the Sphinx directory to PATH. | |||
echo. | |||
echo.If you don't have Sphinx installed, grab it from | |||
echo.http://sphinx-doc.org/ | |||
exit /b 1 | |||
) | |||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% | |||
goto end | |||
:help | |||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% | |||
:end | |||
popd |
@@ -0,0 +1,85 @@ | |||
# Configuration file for the Sphinx documentation builder. | |||
# | |||
# This file only contains a selection of the most common options. For a full | |||
# list see the documentation: | |||
# https://www.sphinx-doc.org/en/master/usage/configuration.html | |||
# -- Path setup -------------------------------------------------------------- | |||
# If extensions (or modules to document with autodoc) are in another directory, | |||
# add these directories to sys.path here. If the directory is relative to the | |||
# documentation root, use os.path.abspath to make it absolute, like shown here. | |||
# | |||
import os | |||
import sys | |||
sys.path.append(os.path.abspath('../../tods')) | |||
sys.path.append(os.path.abspath('../../')) | |||
# -- Auto-doc Skip -------------------- | |||
def skip_member(app, what, name, obj, skip, opts): | |||
# we can document otherwise excluded entities here by returning False | |||
# or skip otherwise included entities by returning True | |||
if name == "__author" or name == "metadata": | |||
return True | |||
return None | |||
def setup(app): | |||
app.connect('autodoc-skip-member', skip_member) | |||
# -- Project information ----------------------------------------------------- | |||
project = 'TODS' | |||
copyright = '2020, DataLab@Texas A&M University' | |||
author = 'DataLab@Texas A&M University' | |||
# The full version, including alpha/beta/rc tags | |||
release = '0.0.1' | |||
# -- General configuration --------------------------------------------------- | |||
# Add any Sphinx extension module names here, as strings. They can be | |||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom | |||
# ones. | |||
extensions = [ | |||
'sphinx.ext.autodoc', | |||
'sphinx.ext.doctest', | |||
'sphinx.ext.intersphinx', | |||
'sphinx.ext.todo', | |||
'sphinx.ext.coverage', | |||
'sphinx.ext.mathjax', | |||
'sphinx.ext.githubpages', | |||
'sphinx.ext.napoleon', | |||
] | |||
# Add any paths that contain templates here, relative to this directory. | |||
templates_path = ['_templates'] | |||
source_suffix = '.rst' | |||
# The master toctree document. | |||
master_doc = 'doctree' | |||
# List of patterns, relative to source directory, that match files and | |||
# directories to ignore when looking for source files. | |||
# This pattern also affects html_static_path and html_extra_path. | |||
exclude_patterns = [] | |||
# The name of the Pygments (syntax highlighting) style to use. | |||
pygments_style = None | |||
# -- Options for HTML output ------------------------------------------------- | |||
# The theme to use for HTML and HTML Help pages. See the documentation for | |||
# a list of builtin themes. | |||
# | |||
html_theme = 'sphinx_rtd_theme' | |||
# Add any paths that contain custom static files (such as style sheets) here, | |||
# relative to this directory. They are copied after the builtin static files, | |||
# so a file named "default.css" will overwrite the builtin "default.css". | |||
html_static_path = ['_static'] | |||
html_sidebars = { | |||
'**': ['fulltoc.html', 'sourcelink.html', 'searchbox.html'] | |||
} | |||
@@ -0,0 +1,31 @@ | |||
.. rlcard documentation master file, created by | |||
sphinx-quickstart on Thu Sep 5 18:45:31 2019. | |||
You can adapt this file completely to your liking, but it should at least | |||
contain the root `toctree` directive. | |||
.. toctree:: | |||
:glob: | |||
:caption: Documentation: | |||
overview | |||
getting_started | |||
.. toctree:: | |||
:glob: | |||
:caption: API Documents: | |||
tods.data_processing | |||
tods.timeseries_processing | |||
tods.feature_analysis | |||
tods.detection_algorithm | |||
tods.reinforcement | |||
Indices and tables | |||
================== | |||
* :ref:`genindex` | |||
* :ref:`modindex` | |||
* :ref:`search` |
@@ -0,0 +1,595 @@ | |||
Getting Started | |||
=============== | |||
In this document, we provide some toy examples for getting started. All | |||
the examples in this document and even more examples are available in | |||
`examples/ <https://github.com/datamllab/rlcard/tree/master/examples>`__. | |||
Playing with Random Agents | |||
-------------------------- | |||
We have set up a random agent that can play randomly on each | |||
environment. An example of applying a random agent on Blackjack is as | |||
follow: | |||
.. code:: python | |||
import rlcard | |||
from rlcard.agents import RandomAgent | |||
from rlcard.utils import set_global_seed | |||
# Make environment | |||
env = rlcard.make('blackjack', config={'seed': 0}) | |||
episode_num = 2 | |||
# Set a global seed | |||
set_global_seed(0) | |||
# Set up agents | |||
agent_0 = RandomAgent(action_num=env.action_num) | |||
env.set_agents([agent_0]) | |||
for episode in range(episode_num): | |||
# Generate data from the environment | |||
trajectories, _ = env.run(is_training=False) | |||
# Print out the trajectories | |||
print('\nEpisode {}'.format(episode)) | |||
for ts in trajectories[0]: | |||
print('State: {}, Action: {}, Reward: {}, Next State: {}, Done: {}'.format(ts[0], ts[1], ts[2], ts[3], ts[4])) | |||
The expected output should look like something as follows: | |||
:: | |||
Episode 0 | |||
State: {'obs': array([20, 3]), 'legal_actions': [0, 1]}, Action: 0, Reward: 0, Next State: {'obs': array([15, 3]), 'legal_actions': [0, 1]}, Done: False | |||
State: {'obs': array([15, 3]), 'legal_actions': [0, 1]}, Action: 1, Reward: -1, Next State: {'obs': array([15, 20]), 'legal_actions': [0, 1]}, Done: True | |||
Episode 1 | |||
State: {'obs': array([15, 5]), 'legal_actions': [0, 1]}, Action: 1, Reward: 1, Next State: {'obs': array([15, 23]), 'legal_actions': [0, 1]}, Done: True | |||
Note that the states and actions are wrapped by ``env`` in Blackjack. In | |||
this example, the ``[20, 3]`` suggests the current player obtains score | |||
20 while the card that faces up in the dealer’s hand has score 3. Action | |||
0 means “hit” while action 1 means “stand”. Reward 1 suggests the player | |||
wins while reward -1 suggests the dealer wins. Reward 0 suggests a tie. | |||
The above data can be directly fed into a RL algorithm for training. | |||
Deep-Q Learning on Blackjack | |||
---------------------------- | |||
The second example is to use Deep-Q learning to train an agent on | |||
Blackjack. We aim to use this example to show how reinforcement learning | |||
algorithms can be developed and applied in our toolkit. We design a | |||
``run`` function which plays one complete game and provides the data for | |||
training RL agents. The example is shown below: | |||
.. code:: python | |||
import tensorflow as tf | |||
import os | |||
import rlcard | |||
from rlcard.agents import DQNAgent | |||
from rlcard.utils import set_global_seed, tournament | |||
from rlcard.utils import Logger | |||
# Make environment | |||
env = rlcard.make('blackjack', config={'seed': 0}) | |||
eval_env = rlcard.make('blackjack', config={'seed': 0}) | |||
# Set the iterations numbers and how frequently we evaluate/save plot | |||
evaluate_every = 100 | |||
evaluate_num = 10000 | |||
episode_num = 100000 | |||
# The intial memory size | |||
memory_init_size = 100 | |||
# Train the agent every X steps | |||
train_every = 1 | |||
# The paths for saving the logs and learning curves | |||
log_dir = './experiments/blackjack_dqn_result/' | |||
# Set a global seed | |||
set_global_seed(0) | |||
with tf.Session() as sess: | |||
# Initialize a global step | |||
global_step = tf.Variable(0, name='global_step', trainable=False) | |||
# Set up the agents | |||
agent = DQNAgent(sess, | |||
scope='dqn', | |||
action_num=env.action_num, | |||
replay_memory_init_size=memory_init_size, | |||
train_every=train_every, | |||
state_shape=env.state_shape, | |||
mlp_layers=[10,10]) | |||
env.set_agents([agent]) | |||
eval_env.set_agents([agent]) | |||
# Initialize global variables | |||
sess.run(tf.global_variables_initializer()) | |||
# Init a Logger to plot the learning curve | |||
logger = Logger(log_dir) | |||
for episode in range(episode_num): | |||
# Generate data from the environment | |||
trajectories, _ = env.run(is_training=True) | |||
# Feed transitions into agent memory, and train the agent | |||
for ts in trajectories[0]: | |||
agent.feed(ts) | |||
# Evaluate the performance. Play with random agents. | |||
if episode % evaluate_every == 0: | |||
logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) | |||
# Close files in the logger | |||
logger.close_files() | |||
# Plot the learning curve | |||
logger.plot('DQN') | |||
# Save model | |||
save_dir = 'models/blackjack_dqn' | |||
if not os.path.exists(save_dir): | |||
os.makedirs(save_dir) | |||
saver = tf.train.Saver() | |||
saver.save(sess, os.path.join(save_dir, 'model')) | |||
The expected output is something like below: | |||
:: | |||
---------------------------------------- | |||
timestep | 1 | |||
reward | -0.7342 | |||
---------------------------------------- | |||
INFO - Agent dqn, step 100, rl-loss: 1.0042707920074463 | |||
INFO - Copied model parameters to target network. | |||
INFO - Agent dqn, step 136, rl-loss: 0.7888197302818298 | |||
---------------------------------------- | |||
timestep | 136 | |||
reward | -0.1406 | |||
---------------------------------------- | |||
INFO - Agent dqn, step 278, rl-loss: 0.6946825981140137 | |||
---------------------------------------- | |||
timestep | 278 | |||
reward | -0.1523 | |||
---------------------------------------- | |||
INFO - Agent dqn, step 412, rl-loss: 0.62268990278244025 | |||
---------------------------------------- | |||
timestep | 412 | |||
reward | -0.088 | |||
---------------------------------------- | |||
INFO - Agent dqn, step 544, rl-loss: 0.69050502777099616 | |||
---------------------------------------- | |||
timestep | 544 | |||
reward | -0.08 | |||
---------------------------------------- | |||
INFO - Agent dqn, step 681, rl-loss: 0.61789089441299444 | |||
---------------------------------------- | |||
timestep | 681 | |||
reward | -0.0793 | |||
---------------------------------------- | |||
In Blackjack, the player will get a payoff at the end of the game: 1 if | |||
the player wins, -1 if the player loses, and 0 if it is a tie. The | |||
performance is measured by the average payoff the player obtains by | |||
playing 10000 episodes. The above example shows that the agent achieves | |||
better and better performance during training. The logs and learning | |||
curves are saved in ``./experiments/blackjack_dqn_result/``. | |||
Running Multiple Processes | |||
-------------------------- | |||
The environments can be run with multiple processes to accelerate the | |||
training. Below is an example to train DQN on Blackjack with multiple | |||
processes. | |||
.. code:: python | |||
''' An example of learning a Deep-Q Agent on Blackjack with multiple processes | |||
Note that we must use if __name__ == '__main__' for multiprocessing | |||
''' | |||
import tensorflow as tf | |||
import os | |||
import rlcard | |||
from rlcard.agents import DQNAgent | |||
from rlcard.utils import set_global_seed, tournament | |||
from rlcard.utils import Logger | |||
def main(): | |||
# Make environment | |||
env = rlcard.make('blackjack', config={'seed': 0, 'env_num': 4}) | |||
eval_env = rlcard.make('blackjack', config={'seed': 0, 'env_num': 4}) | |||
# Set the iterations numbers and how frequently we evaluate performance | |||
evaluate_every = 100 | |||
evaluate_num = 10000 | |||
iteration_num = 100000 | |||
# The intial memory size | |||
memory_init_size = 100 | |||
# Train the agent every X steps | |||
train_every = 1 | |||
# The paths for saving the logs and learning curves | |||
log_dir = './experiments/blackjack_dqn_result/' | |||
# Set a global seed | |||
set_global_seed(0) | |||
with tf.Session() as sess: | |||
# Initialize a global step | |||
global_step = tf.Variable(0, name='global_step', trainable=False) | |||
# Set up the agents | |||
agent = DQNAgent(sess, | |||
scope='dqn', | |||
action_num=env.action_num, | |||
replay_memory_init_size=memory_init_size, | |||
train_every=train_every, | |||
state_shape=env.state_shape, | |||
mlp_layers=[10,10]) | |||
env.set_agents([agent]) | |||
eval_env.set_agents([agent]) | |||
# Initialize global variables | |||
sess.run(tf.global_variables_initializer()) | |||
# Initialize a Logger to plot the learning curve | |||
logger = Logger(log_dir) | |||
for iteration in range(iteration_num): | |||
# Generate data from the environment | |||
trajectories, _ = env.run(is_training=True) | |||
# Feed transitions into agent memory, and train the agent | |||
for ts in trajectories[0]: | |||
agent.feed(ts) | |||
# Evaluate the performance. Play with random agents. | |||
if iteration % evaluate_every == 0: | |||
logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) | |||
# Close files in the logger | |||
logger.close_files() | |||
# Plot the learning curve | |||
logger.plot('DQN') | |||
# Save model | |||
save_dir = 'models/blackjack_dqn' | |||
if not os.path.exists(save_dir): | |||
os.makedirs(save_dir) | |||
saver = tf.train.Saver() | |||
saver.save(sess, os.path.join(save_dir, 'model')) | |||
if __name__ == '__main__': | |||
main() | |||
Example output is as follow: | |||
:: | |||
---------------------------------------- | |||
timestep | 17 | |||
reward | -0.7378 | |||
---------------------------------------- | |||
INFO - Copied model parameters to target network. | |||
INFO - Agent dqn, step 1100, rl-loss: 0.40940183401107797 | |||
INFO - Copied model parameters to target network. | |||
INFO - Agent dqn, step 2100, rl-loss: 0.44971221685409546 | |||
INFO - Copied model parameters to target network. | |||
INFO - Agent dqn, step 2225, rl-loss: 0.65466868877410897 | |||
---------------------------------------- | |||
timestep | 2225 | |||
reward | -0.0658 | |||
---------------------------------------- | |||
INFO - Agent dqn, step 3100, rl-loss: 0.48663979768753053 | |||
INFO - Copied model parameters to target network. | |||
INFO - Agent dqn, step 4100, rl-loss: 0.71293979883193974 | |||
INFO - Copied model parameters to target network. | |||
INFO - Agent dqn, step 4440, rl-loss: 0.55871248245239263 | |||
---------------------------------------- | |||
timestep | 4440 | |||
reward | -0.0736 | |||
---------------------------------------- | |||
Training CFR on Leduc Hold’em | |||
----------------------------- | |||
To show how we can use ``step`` and ``step_back`` to traverse the game | |||
tree, we provide an example of solving Leduc Hold’em with CFR: | |||
.. code:: python | |||
import numpy as np | |||
import rlcard | |||
from rlcard.agents import CFRAgent | |||
from rlcard import models | |||
from rlcard.utils import set_global_seed, tournament | |||
from rlcard.utils import Logger | |||
# Make environment and enable human mode | |||
env = rlcard.make('leduc-holdem', config={'seed': 0, 'allow_step_back':True}) | |||
eval_env = rlcard.make('leduc-holdem', config={'seed': 0}) | |||
# Set the iterations numbers and how frequently we evaluate/save plot | |||
evaluate_every = 100 | |||
save_plot_every = 1000 | |||
evaluate_num = 10000 | |||
episode_num = 10000 | |||
# The paths for saving the logs and learning curves | |||
log_dir = './experiments/leduc_holdem_cfr_result/' | |||
# Set a global seed | |||
set_global_seed(0) | |||
# Initilize CFR Agent | |||
agent = CFRAgent(env) | |||
agent.load() # If we have saved model, we first load the model | |||
# Evaluate CFR against pre-trained NFSP | |||
eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]]) | |||
# Init a Logger to plot the learning curve | |||
logger = Logger(log_dir) | |||
for episode in range(episode_num): | |||
agent.train() | |||
print('\rIteration {}'.format(episode), end='') | |||
# Evaluate the performance. Play with NFSP agents. | |||
if episode % evaluate_every == 0: | |||
agent.save() # Save model | |||
logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) | |||
# Close files in the logger | |||
logger.close_files() | |||
# Plot the learning curve | |||
logger.plot('CFR') | |||
In the above example, the performance is measured by playing against a | |||
pre-trained NFSP model. The expected output is as below: | |||
:: | |||
Iteration 0 | |||
---------------------------------------- | |||
timestep | 192 | |||
reward | -1.3662 | |||
---------------------------------------- | |||
Iteration 100 | |||
---------------------------------------- | |||
timestep | 19392 | |||
reward | 0.9462 | |||
---------------------------------------- | |||
Iteration 200 | |||
---------------------------------------- | |||
timestep | 38592 | |||
reward | 0.8591 | |||
---------------------------------------- | |||
Iteration 300 | |||
---------------------------------------- | |||
timestep | 57792 | |||
reward | 0.7861 | |||
---------------------------------------- | |||
Iteration 400 | |||
---------------------------------------- | |||
timestep | 76992 | |||
reward | 0.7752 | |||
---------------------------------------- | |||
Iteration 500 | |||
---------------------------------------- | |||
timestep | 96192 | |||
reward | 0.7215 | |||
---------------------------------------- | |||
We observe that CFR achieves better performance as NFSP. However, CFR | |||
requires traversal of the game tree, which is infeasible in large | |||
environments. | |||
Having Fun with Pretrained Leduc Model | |||
-------------------------------------- | |||
We have designed simple human interfaces to play against the pretrained | |||
model. Leduc Hold’em is a simplified version of Texas Hold’em. Rules can | |||
be found `here <games.md#leduc-holdem>`__. Example of playing against | |||
Leduc Hold’em CFR model is as below: | |||
.. code:: python | |||
import rlcard | |||
from rlcard import models | |||
from rlcard.agents import LeducholdemHumanAgent as HumanAgent | |||
from rlcard.utils import print_card | |||
# Make environment | |||
# Set 'record_action' to True because we need it to print results | |||
env = rlcard.make('leduc-holdem', config={'record_action': True}) | |||
human_agent = HumanAgent(env.action_num) | |||
cfr_agent = models.load('leduc-holdem-cfr').agents[0] | |||
env.set_agents([human_agent, cfr_agent]) | |||
print(">> Leduc Hold'em pre-trained model") | |||
while (True): | |||
print(">> Start a new game") | |||
trajectories, payoffs = env.run(is_training=False) | |||
# If the human does not take the final action, we need to | |||
# print other players action | |||
final_state = trajectories[0][-1][-2] | |||
action_record = final_state['action_record'] | |||
state = final_state['raw_obs'] | |||
_action_list = [] | |||
for i in range(1, len(action_record)+1): | |||
if action_record[-i][0] == state['current_player']: | |||
break | |||
_action_list.insert(0, action_record[-i]) | |||
for pair in _action_list: | |||
print('>> Player', pair[0], 'chooses', pair[1]) | |||
# Let's take a look at what the agent card is | |||
print('=============== CFR Agent ===============') | |||
print_card(env.get_perfect_information()['hand_cards'][1]) | |||
print('=============== Result ===============') | |||
if payoffs[0] > 0: | |||
print('You win {} chips!'.format(payoffs[0])) | |||
elif payoffs[0] == 0: | |||
print('It is a tie.') | |||
else: | |||
print('You lose {} chips!'.format(-payoffs[0])) | |||
print('') | |||
input("Press any key to continue...") | |||
Example output is as follow: | |||
:: | |||
>> Leduc Hold'em pre-trained model | |||
>> Start a new game! | |||
>> Agent 1 chooses raise | |||
=============== Community Card =============== | |||
┌─────────┐ | |||
│░░░░░░░░░│ | |||
│░░░░░░░░░│ | |||
│░░░░░░░░░│ | |||
│░░░░░░░░░│ | |||
│░░░░░░░░░│ | |||
│░░░░░░░░░│ | |||
│░░░░░░░░░│ | |||
└─────────┘ | |||
=============== Your Hand =============== | |||
┌─────────┐ | |||
│J │ | |||
│ │ | |||
│ │ | |||
│ ♥ │ | |||
│ │ | |||
│ │ | |||
│ J│ | |||
└─────────┘ | |||
=============== Chips =============== | |||
Yours: + | |||
Agent 1: +++ | |||
=========== Actions You Can Choose =========== | |||
0: call, 1: raise, 2: fold | |||
>> You choose action (integer): | |||
We also provide a running demo of a rule-based agent for UNO. Try it by | |||
running ``examples/uno_human.py``. | |||
Leduc Hold’em as Single-Agent Environment | |||
----------------------------------------- | |||
We have wrraped the environment as single agent environment by assuming | |||
that other players play with pre-trained models. The interfaces are | |||
exactly the same to OpenAI Gym. Thus, any single-agent algorithm can be | |||
connected to the environment. An example of Leduc Hold’em is as below: | |||
.. code:: python | |||
import tensorflow as tf | |||
import os | |||
import numpy as np | |||
import rlcard | |||
from rlcard.agents import DQNAgent | |||
from rlcard.agents import RandomAgent | |||
from rlcard.utils import set_global_seed, tournament | |||
from rlcard.utils import Logger | |||
# Make environment | |||
env = rlcard.make('leduc-holdem', config={'seed': 0, 'single_agent_mode':True}) | |||
eval_env = rlcard.make('leduc-holdem', config={'seed': 0, 'single_agent_mode':True}) | |||
# Set the iterations numbers and how frequently we evaluate/save plot | |||
evaluate_every = 1000 | |||
evaluate_num = 10000 | |||
timesteps = 100000 | |||
# The intial memory size | |||
memory_init_size = 1000 | |||
# Train the agent every X steps | |||
train_every = 1 | |||
# The paths for saving the logs and learning curves | |||
log_dir = './experiments/leduc_holdem_single_dqn_result/' | |||
# Set a global seed | |||
set_global_seed(0) | |||
with tf.Session() as sess: | |||
# Initialize a global step | |||
global_step = tf.Variable(0, name='global_step', trainable=False) | |||
# Set up the agents | |||
agent = DQNAgent(sess, | |||
scope='dqn', | |||
action_num=env.action_num, | |||
replay_memory_init_size=memory_init_size, | |||
train_every=train_every, | |||
state_shape=env.state_shape, | |||
mlp_layers=[128,128]) | |||
# Initialize global variables | |||
sess.run(tf.global_variables_initializer()) | |||
# Init a Logger to plot the learning curve | |||
logger = Logger(log_dir) | |||
state = env.reset() | |||
for timestep in range(timesteps): | |||
action = agent.step(state) | |||
next_state, reward, done = env.step(action) | |||
ts = (state, action, reward, next_state, done) | |||
agent.feed(ts) | |||
if timestep % evaluate_every == 0: | |||
rewards = [] | |||
state = eval_env.reset() | |||
for _ in range(evaluate_num): | |||
action, _ = agent.eval_step(state) | |||
_, reward, done = env.step(action) | |||
if done: | |||
rewards.append(reward) | |||
logger.log_performance(env.timestep, np.mean(rewards)) | |||
# Close files in the logger | |||
logger.close_files() | |||
# Plot the learning curve | |||
logger.plot('DQN') | |||
# Save model | |||
save_dir = 'models/leduc_holdem_single_dqn' | |||
if not os.path.exists(save_dir): | |||
os.makedirs(save_dir) | |||
saver = tf.train.Saver() | |||
saver.save(sess, os.path.join(save_dir, 'model')) |
@@ -0,0 +1,28 @@ | |||
.. Time Series Outlier Detection System documentation master file, created by | |||
sphinx-quickstart on Wed Sep 9 22:52:15 2020. | |||
You can adapt this file completely to your liking, but it should at least | |||
contain the root `toctree` directive. | |||
Welcome to TOD's documentation! | |||
================================================================ | |||
.. toctree:: | |||
:maxdepth: 4 | |||
:caption: Contents: | |||
API Documents | |||
================== | |||
.. toctree:: | |||
:maxdepth: 4 | |||
:caption: API Documents: | |||
tods.data_processing | |||
tods.timeseries_processing | |||
tods.feature_analysis | |||
tods.detection_algorithm | |||
tods.reinforcement | |||
* :ref:`genindex` | |||
* :ref:`modindex` | |||
* :ref:`search` |
@@ -0,0 +1,7 @@ | |||
tods | |||
==== | |||
.. toctree:: | |||
:maxdepth: 4 | |||
tods |
@@ -0,0 +1,101 @@ | |||
Overview | |||
======== | |||
Design Principles | |||
~~~~~~~~~~~~~~~~~ | |||
The toolkit wraps each game by ``Env`` class with easy-to-use | |||
interfaces. The goal of this toolkit is to enable the users to focus on | |||
algorithm development without caring about the environment. The | |||
following design principles are applied when developing the toolkit: | |||
* **Reproducible.** Results on the environments can be reproduced. The same result should be obtained with the same random seed in different runs. | |||
* **Accessible.** The experiences are collected and well organized after each game with easy-to-use interfaces. Uses can conveniently configure state representation, action encoding, reward design, or even the game rules. | |||
* **Scalable.** New card environments can be added conveniently into the toolkit with the above design principles. We also try to minimize the dependencies in the toolkit so that the codes can be easily maintained. | |||
TODS High-level Design | |||
~~~~~~~~~~~~~~~~~~~~~~~~ | |||
This document introduces the high-level design for the environments, the | |||
games, and the agents (algorithms). | |||
.. image:: img/framework.pdf | |||
:width: 800 | |||
Data-Processing | |||
--------------- | |||
We wrap each game with an ``Env`` class. The responsibility of ``Env`` | |||
is to help you generate trajectories of the games. For developing | |||
Reinforcement Learning (RL) algorithms, we recommend to use the | |||
following interfaces: | |||
- ``set_agents``: This function tells the ``Env`` what agents will be | |||
used to perform actions in the game. Different games may have a | |||
different number of agents. The input of the function is a list of | |||
``Agent`` class. For example, | |||
``env.set_agent([RandomAgent(), RandomAgent()])`` indicates that two | |||
random agents will be used to generate the trajectories. | |||
- ``run``: After setting the agents, this interface will run a complete | |||
trajectory of the game, calculate the reward for each transition, and | |||
reorganize the data so that it can be directly fed into a RL | |||
algorithm. | |||
For advanced access to the environment, such as traversal of the game | |||
tree, we provide the following interfaces: | |||
- ``step``: Given the current state, the environment takes one step | |||
forward, and returns the next state and the next player. | |||
- ``step_back``: Takes one step backward. The environment will restore | |||
to the last state. The ``step_back`` is defaultly turned off since it | |||
requires expensively recoeding previous states. To turn it on, set | |||
``allow_step_back = True`` when ``make`` environments. | |||
- ``get_payoffs``: At the end of the game, this function can be called | |||
to obtain the payoffs for each player. | |||
We also support single-agent mode and human mode. Examples can be found | |||
in ``examples/``. | |||
- Single agent mode: single-agent environments are developped by | |||
simulating other players with pre-trained models or rule-based | |||
models. You can enable single-agent mode by | |||
``rlcard.make(ENV_ID, config={'single_agent_mode':True})``. Then the | |||
``step`` function will return ``(next_state, reward, done)`` just as | |||
common single-agent environments. ``env.reset()`` will reset the game | |||
and return the first state. | |||
Games | |||
----- | |||
Card games usually have similar structures. We abstract some concepts in | |||
card games and follow the same design pattern. In this way, | |||
users/developers can easily dig into the code and change the rules for | |||
research purpose. Specifically, the following classes are used in all | |||
the games: | |||
- ``Game``: A game is defined as a complete sequence starting from one | |||
of the non-terminal states to a terminal state. | |||
- ``Round``: A round is a part of the sequence of a game. Most card | |||
games can be naturally divided into multiple rounds. | |||
- ``Dealer``: A dealer is responsible for shuffling and allocating a | |||
deck of cards. | |||
- ``Judger``: A judger is responsible for making major decisions at the | |||
end of a round or a game. | |||
- ``Player``: A player is a role who plays cards following a strategy. | |||
To summarize, in one ``Game``, a ``Dealer`` deals the cards for each | |||
``Player``. In each ``Round`` of the game, a ``Judger`` will make major | |||
decisions about the next round and the payoffs in the end of the game. | |||
Agents | |||
------ | |||
We provide examples of several representative algorithms and wrap them | |||
as ``Agent`` to show how a learning algorithm can be connected to the | |||
toolkit. The first example is DQN which is a representative of the | |||
Reinforcement Learning (RL) algorithms category. The second example is | |||
NFSP which is a representative of the Reinforcement Learning (RL) with | |||
self-play. We also provide CFR and DeepCFR which belong to Conterfactual | |||
Regret Minimization (CFR) category. Other algorithms from these three | |||
categories can be connected in similar ways. |
@@ -0,0 +1,69 @@ | |||
tods.data\_processing package | |||
============================= | |||
Submodules | |||
---------- | |||
tods.data\_processing.CategoricalToBinary module | |||
------------------------------------------------ | |||
.. automodule:: tods.data_processing.CategoricalToBinary | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.data\_processing.ColumnFilter module | |||
----------------------------------------- | |||
.. automodule:: tods.data_processing.ColumnFilter | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.data\_processing.ContinuityValidation module | |||
------------------------------------------------- | |||
.. automodule:: tods.data_processing.ContinuityValidation | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.data\_processing.DatasetToDataframe module | |||
----------------------------------------------- | |||
.. automodule:: tods.data_processing.DatasetToDataframe | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.data\_processing.DuplicationValidation module | |||
-------------------------------------------------- | |||
.. automodule:: tods.data_processing.DuplicationValidation | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.data\_processing.TimeIntervalTransform module | |||
-------------------------------------------------- | |||
.. automodule:: tods.data_processing.TimeIntervalTransform | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.data\_processing.TimeStampValidation module | |||
------------------------------------------------ | |||
.. automodule:: tods.data_processing.TimeStampValidation | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
Module contents | |||
--------------- | |||
.. automodule:: tods.data_processing | |||
:members: | |||
:noindex: | |||
:show-inheritance: |
@@ -0,0 +1,189 @@ | |||
tods.detection\_algorithm package | |||
================================= | |||
Submodules | |||
---------- | |||
tods.detection\_algorithm.AutoRegODetect module | |||
----------------------------------------------- | |||
.. automodule:: tods.detection_algorithm.AutoRegODetect | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.DeepLog module | |||
---------------------------------------- | |||
.. automodule:: tods.detection_algorithm.DeepLog | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.KDiscordODetect module | |||
------------------------------------------------ | |||
.. automodule:: tods.detection_algorithm.KDiscordODetect | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.LSTMODetect module | |||
-------------------------------------------- | |||
.. automodule:: tods.detection_algorithm.LSTMODetect | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.MatrixProfile module | |||
---------------------------------------------- | |||
.. automodule:: tods.detection_algorithm.MatrixProfile | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.PCAODetect module | |||
------------------------------------------- | |||
.. automodule:: tods.detection_algorithm.PCAODetect | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.PyodABOD module | |||
----------------------------------------- | |||
.. automodule:: tods.detection_algorithm.PyodABOD | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.PyodAE module | |||
--------------------------------------- | |||
.. automodule:: tods.detection_algorithm.PyodAE | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.PyodCBLOF module | |||
------------------------------------------ | |||
.. automodule:: tods.detection_algorithm.PyodCBLOF | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.PyodCOF module | |||
---------------------------------------- | |||
.. automodule:: tods.detection_algorithm.PyodCOF | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.PyodHBOS module | |||
----------------------------------------- | |||
.. automodule:: tods.detection_algorithm.PyodHBOS | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.PyodIsolationForest module | |||
---------------------------------------------------- | |||
.. automodule:: tods.detection_algorithm.PyodIsolationForest | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.PyodKNN module | |||
---------------------------------------- | |||
.. automodule:: tods.detection_algorithm.PyodKNN | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.PyodLODA module | |||
----------------------------------------- | |||
.. automodule:: tods.detection_algorithm.PyodLODA | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.PyodLOF module | |||
---------------------------------------- | |||
.. automodule:: tods.detection_algorithm.PyodLOF | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.PyodMoGaal module | |||
------------------------------------------- | |||
.. automodule:: tods.detection_algorithm.PyodMoGaal | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.PyodOCSVM module | |||
------------------------------------------ | |||
.. automodule:: tods.detection_algorithm.PyodOCSVM | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.PyodSOD module | |||
---------------------------------------- | |||
.. automodule:: tods.detection_algorithm.PyodSOD | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.PyodSoGaal module | |||
------------------------------------------- | |||
.. automodule:: tods.detection_algorithm.PyodSoGaal | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.PyodVAE module | |||
---------------------------------------- | |||
.. automodule:: tods.detection_algorithm.PyodVAE | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.Telemanom module | |||
------------------------------------------ | |||
.. automodule:: tods.detection_algorithm.Telemanom | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.detection\_algorithm.UODBasePrimitive module | |||
------------------------------------------------- | |||
.. automodule:: tods.detection_algorithm.UODBasePrimitive | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
Module contents | |||
--------------- | |||
.. automodule:: tods.detection_algorithm | |||
:members: | |||
:noindex: | |||
:show-inheritance: |
@@ -0,0 +1,253 @@ | |||
tods.feature\_analysis package | |||
============================== | |||
Submodules | |||
---------- | |||
tods.feature\_analysis.AutoCorrelation module | |||
--------------------------------------------- | |||
.. automodule:: tods.feature_analysis.AutoCorrelation | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.BKFilter module | |||
-------------------------------------- | |||
.. automodule:: tods.feature_analysis.BKFilter | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.DiscreteCosineTransform module | |||
----------------------------------------------------- | |||
.. automodule:: tods.feature_analysis.DiscreteCosineTransform | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.FastFourierTransform module | |||
-------------------------------------------------- | |||
.. automodule:: tods.feature_analysis.FastFourierTransform | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.HPFilter module | |||
-------------------------------------- | |||
.. automodule:: tods.feature_analysis.HPFilter | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.NonNegativeMatrixFactorization module | |||
------------------------------------------------------------ | |||
.. automodule:: tods.feature_analysis.NonNegativeMatrixFactorization | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.SKTruncatedSVD module | |||
-------------------------------------------- | |||
.. automodule:: tods.feature_analysis.SKTruncatedSVD | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.SpectralResidualTransform module | |||
------------------------------------------------------- | |||
.. automodule:: tods.feature_analysis.SpectralResidualTransform | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalAbsEnergy module | |||
-------------------------------------------------- | |||
.. automodule:: tods.feature_analysis.StatisticalAbsEnergy | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalAbsSum module | |||
----------------------------------------------- | |||
.. automodule:: tods.feature_analysis.StatisticalAbsSum | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalGmean module | |||
---------------------------------------------- | |||
.. automodule:: tods.feature_analysis.StatisticalGmean | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalHmean module | |||
---------------------------------------------- | |||
.. automodule:: tods.feature_analysis.StatisticalHmean | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalKurtosis module | |||
------------------------------------------------- | |||
.. automodule:: tods.feature_analysis.StatisticalKurtosis | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalMaximum module | |||
------------------------------------------------ | |||
.. automodule:: tods.feature_analysis.StatisticalMaximum | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalMean module | |||
--------------------------------------------- | |||
.. automodule:: tods.feature_analysis.StatisticalMean | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalMeanAbs module | |||
------------------------------------------------ | |||
.. automodule:: tods.feature_analysis.StatisticalMeanAbs | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalMeanAbsTemporalDerivative module | |||
------------------------------------------------------------------ | |||
.. automodule:: tods.feature_analysis.StatisticalMeanAbsTemporalDerivative | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalMeanTemporalDerivative module | |||
--------------------------------------------------------------- | |||
.. automodule:: tods.feature_analysis.StatisticalMeanTemporalDerivative | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalMedian module | |||
----------------------------------------------- | |||
.. automodule:: tods.feature_analysis.StatisticalMedian | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalMedianAbsoluteDeviation module | |||
---------------------------------------------------------------- | |||
.. automodule:: tods.feature_analysis.StatisticalMedianAbsoluteDeviation | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalMinimum module | |||
------------------------------------------------ | |||
.. automodule:: tods.feature_analysis.StatisticalMinimum | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalSkew module | |||
--------------------------------------------- | |||
.. automodule:: tods.feature_analysis.StatisticalSkew | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalStd module | |||
-------------------------------------------- | |||
.. automodule:: tods.feature_analysis.StatisticalStd | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalVar module | |||
-------------------------------------------- | |||
.. automodule:: tods.feature_analysis.StatisticalVar | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalVariation module | |||
-------------------------------------------------- | |||
.. automodule:: tods.feature_analysis.StatisticalVariation | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalVecSum module | |||
----------------------------------------------- | |||
.. automodule:: tods.feature_analysis.StatisticalVecSum | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalWillisonAmplitude module | |||
---------------------------------------------------------- | |||
.. automodule:: tods.feature_analysis.StatisticalWillisonAmplitude | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.StatisticalZeroCrossing module | |||
----------------------------------------------------- | |||
.. automodule:: tods.feature_analysis.StatisticalZeroCrossing | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.TRMF module | |||
---------------------------------- | |||
.. automodule:: tods.feature_analysis.TRMF | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.feature\_analysis.WaveletTransform module | |||
---------------------------------------------- | |||
.. automodule:: tods.feature_analysis.WaveletTransform | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
Module contents | |||
--------------- | |||
.. automodule:: tods.feature_analysis | |||
:members: | |||
:noindex: | |||
:show-inheritance: |
@@ -0,0 +1,21 @@ | |||
tods.reinforcement package | |||
========================== | |||
Submodules | |||
---------- | |||
tods.reinforcement.RuleBasedFilter module | |||
----------------------------------------- | |||
.. automodule:: tods.reinforcement.RuleBasedFilter | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
Module contents | |||
--------------- | |||
.. automodule:: tods.reinforcement | |||
:members: | |||
:noindex: | |||
:show-inheritance: |
@@ -0,0 +1,24 @@ | |||
tods package | |||
============ | |||
Subpackages | |||
----------- | |||
.. toctree:: | |||
:maxdepth: 2 | |||
tods.data_processing | |||
tods.detection_algorithm | |||
tods.feature_analysis | |||
tods.reinforcement | |||
tods.searcher | |||
tods.timeseries_processing | |||
Module contents | |||
--------------- | |||
.. automodule:: tods | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: |
@@ -0,0 +1,37 @@ | |||
tods.searcher package | |||
===================== | |||
Subpackages | |||
----------- | |||
.. toctree:: | |||
:maxdepth: 4 | |||
tods.searcher.search | |||
Submodules | |||
---------- | |||
tods.searcher.schemas module | |||
---------------------------- | |||
.. automodule:: tods.searcher.schemas | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
tods.searcher.utils module | |||
-------------------------- | |||
.. automodule:: tods.searcher.utils | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: | |||
Module contents | |||
--------------- | |||
.. automodule:: tods.searcher | |||
:members: | |||
:undoc-members: | |||
:show-inheritance: |
@@ -0,0 +1,21 @@ | |||
tods.searcher.search package | |||
============================ | |||
Submodules | |||
---------- | |||
tods.searcher.search.brute\_force\_search module | |||
------------------------------------------------ | |||
.. automodule:: tods.searcher.search.brute_force_search | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
Module contents | |||
--------------- | |||
.. automodule:: tods.searcher.search | |||
:members: | |||
:noindex: | |||
:show-inheritance: |
@@ -0,0 +1,85 @@ | |||
tods.timeseries\_processing package | |||
=================================== | |||
Submodules | |||
---------- | |||
tods.timeseries\_processing.HoltSmoothing module | |||
------------------------------------------------ | |||
.. automodule:: tods.timeseries_processing.HoltSmoothing | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.timeseries\_processing.HoltWintersExponentialSmoothing module | |||
------------------------------------------------------------------ | |||
.. automodule:: tods.timeseries_processing.HoltWintersExponentialSmoothing | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.timeseries\_processing.MovingAverageTransform module | |||
--------------------------------------------------------- | |||
.. automodule:: tods.timeseries_processing.MovingAverageTransform | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.timeseries\_processing.SKAxiswiseScaler module | |||
--------------------------------------------------- | |||
.. automodule:: tods.timeseries_processing.SKAxiswiseScaler | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.timeseries\_processing.SKPowerTransformer module | |||
----------------------------------------------------- | |||
.. automodule:: tods.timeseries_processing.SKPowerTransformer | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.timeseries\_processing.SKQuantileTransformer module | |||
-------------------------------------------------------- | |||
.. automodule:: tods.timeseries_processing.SKQuantileTransformer | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.timeseries\_processing.SKStandardScaler module | |||
--------------------------------------------------- | |||
.. automodule:: tods.timeseries_processing.SKStandardScaler | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.timeseries\_processing.SimpleExponentialSmoothing module | |||
------------------------------------------------------------- | |||
.. automodule:: tods.timeseries_processing.SimpleExponentialSmoothing | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
tods.timeseries\_processing.TimeSeriesSeasonalityTrendDecomposition module | |||
-------------------------------------------------------------------------- | |||
.. automodule:: tods.timeseries_processing.TimeSeriesSeasonalityTrendDecomposition | |||
:members: | |||
:noindex: | |||
:show-inheritance: | |||
Module contents | |||
--------------- | |||
.. automodule:: tods.timeseries_processing | |||
:members: | |||
:noindex: | |||
:show-inheritance: |
@@ -0,0 +1,23 @@ | |||
import pandas as pd | |||
from searcher import schemas as schemas_utils | |||
from searcher.utils import generate_dataset_problem, evaluate_pipeline | |||
table_path = 'datasets/yahoo_sub_5.csv' | |||
target_index = 6 # what column is the target | |||
#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset | |||
time_limit = 30 # How many seconds you wanna search | |||
#metric = 'F1' # F1 on label 1 | |||
metric = 'F1_MACRO' # F1 on both label 0 and 1 | |||
# Read data and generate dataset and problem | |||
df = pd.read_csv(table_path) | |||
dataset, problem_description = generate_dataset_problem(df, target_index=target_index, metric=metric) | |||
# Load the default pipeline | |||
pipeline = schemas_utils.load_default_pipeline() | |||
# Run the pipeline | |||
pipeline_result = evaluate_pipeline(problem_description, dataset, pipeline) | |||
print(pipeline_result) | |||
@@ -1,33 +1,59 @@ | |||
import uuid | |||
import random | |||
import pandas as pd | |||
from pprint import pprint | |||
from sklearn.datasets import make_classification | |||
from d3m import container | |||
from d3m.metadata.pipeline import Pipeline | |||
from d3m.metadata.problem import TaskKeyword, PerformanceMetric | |||
from axolotl.utils import data_problem | |||
from axolotl.backend.simple import SimpleRunner | |||
from axolotl.backend.ray import RayRunner | |||
from axolotl.algorithms.base import PipelineSearchBase | |||
from axolotl.utils import pipeline as pipeline_utils, schemas as schemas_utils | |||
import tods | |||
from tods.search import BruteForceSearch | |||
from searcher.utils import generate_dataset_problem | |||
from searcher.search import BruteForceSearch | |||
table_path = 'datasets/anomaly/kpi/kpi_dataset/tables/learningData.csv' | |||
df = pd.read_csv(table_path) | |||
dataset, problem_description = data_problem.generate_dataset_problem(df, | |||
target_index=3, | |||
task_keywords=[TaskKeyword.ANOMALY_DETECTION,], | |||
performance_metrics=[{'metric': PerformanceMetric.F1}]) | |||
# Some information | |||
#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_GOOG.csv' # The path of the dataset | |||
#target_index = 2 # what column is the target | |||
table_path = 'datasets/yahoo_sub_5.csv' | |||
target_index = 6 # what column is the target | |||
#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset | |||
time_limit = 30 # How many seconds you wanna search | |||
print(dataset) | |||
print(problem_description) | |||
#metric = 'F1' # F1 on label 1 | |||
metric = 'F1_MACRO' # F1 on both label 0 and 1 | |||
# Read data and generate dataset and problem | |||
df = pd.read_csv(table_path) | |||
dataset, problem_description = generate_dataset_problem(df, target_index=target_index, metric=metric) | |||
# Start backend | |||
backend = SimpleRunner(random_seed=42) | |||
backend = SimpleRunner(random_seed=0) | |||
# Start search algorithm | |||
search = BruteForceSearch(problem_description=problem_description, backend=backend) | |||
print(search) | |||
# Find the best pipeline | |||
best_runtime, best_pipeline_result = search.search_fit(input_data=[dataset], time_limit=time_limit) | |||
best_pipeline = best_runtime.pipeline | |||
best_output = best_pipeline_result.output | |||
# Evaluate the best pipeline | |||
best_scores = search.evaluate(best_pipeline).scores | |||
print('*' * 52) | |||
print('Search History:') | |||
for pipeline_result in search.history: | |||
print('-' * 52) | |||
print('Pipeline id:', pipeline_result.pipeline.id) | |||
print(pipeline_result.scores) | |||
print('*' * 52) | |||
print('') | |||
print('*' * 52) | |||
print('Best pipeline:') | |||
print('-' * 52) | |||
print('Pipeline id:', best_pipeline.id) | |||
print('Pipeline json:', best_pipeline.to_json()) | |||
print('Output:') | |||
print(best_output) | |||
print('Scores:') | |||
print(best_scores) | |||
print('*' * 52) | |||
@@ -1,51 +0,0 @@ | |||
import uuid | |||
import random | |||
import pandas as pd | |||
import json | |||
from pprint import pprint | |||
from sklearn.datasets import make_classification | |||
from d3m import container | |||
from d3m.metadata.pipeline import Pipeline | |||
from d3m.metadata.problem import TaskKeyword, PerformanceMetric | |||
from axolotl.utils import data_problem | |||
from axolotl.backend.simple import SimpleRunner | |||
# from axolotl.backend.ray import RayRunner | |||
# from axolotl.algorithms.base import PipelineSearchBase | |||
from axolotl.utils import pipeline as pipeline_utils, schemas as schemas_utils | |||
import tods | |||
from tods.search import BruteForceSearch | |||
table_path = 'datasets/anomaly/yahoo_sub_5/yahoo_sub_5_dataset/tables/learningData.csv' | |||
df = pd.read_csv(table_path) | |||
dataset, problem_description = data_problem.generate_dataset_problem(df, | |||
target_index=7, | |||
task_keywords=[TaskKeyword.ANOMALY_DETECTION,], | |||
performance_metrics=[{'metric': PerformanceMetric.F1}]) | |||
print(dataset) | |||
print(problem_description) | |||
metrics = [{'metric': PerformanceMetric.F1, 'params': {'pos_label': '1'}}, | |||
] | |||
pipeline_path = 'example_pipeline.json' | |||
pipeline = pipeline_utils.load_pipeline(pipeline_path) | |||
print(pipeline) | |||
data_preparation_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA") | |||
scoring_pipeline = schemas_utils.get_scoring_pipeline() | |||
data_preparation_params = schemas_utils.DATA_PREPARATION_PARAMS['no_split'] | |||
backend = SimpleRunner(random_seed=0) | |||
pipeline_result = backend.evaluate_pipeline(problem_description=problem_description, | |||
pipeline=pipeline, | |||
input_data=[dataset], | |||
metrics=metrics, | |||
data_preparation_pipeline=data_preparation_pipeline, | |||
scoring_pipeline=scoring_pipeline, | |||
data_preparation_params=data_preparation_params) | |||
print(pipeline_result) | |||
@@ -1,4 +1,4 @@ | |||
scikit-learn==0.21.3 | |||
scikit-learn==0.22.0 | |||
pytypes==1.0b5 | |||
frozendict==1.2 | |||
numpy>=1.15.4,<=1.18.1 | |||
@@ -9,7 +9,7 @@ rfc3987==1.3.8 | |||
webcolors>=1.8.1,<=1.10 | |||
dateparser>=0.7.0,<=0.7.2 | |||
python-dateutil==2.8.1 | |||
pandas==0.23.4 | |||
pandas==0.25 | |||
typing-inspect==0.5.0 | |||
GitPython==3.1.0 | |||
jsonpath-ng==1.4.3 | |||
@@ -0,0 +1,21 @@ | |||
from distutils.command.sdist import sdist as sdist_orig | |||
from distutils.errors import DistutilsExecError | |||
from setuptools import setup, find_packages | |||
class install(sdist_orig): | |||
def run(self): | |||
try: | |||
self.spawn(['sh', '.install.sh']) | |||
except DistutilsExecError: | |||
self.warn('lost installation script') | |||
super().run() | |||
setup(name='tods', | |||
version='0.0.1', | |||
cmdclass={ | |||
'install': install | |||
}, | |||
) |
@@ -1,4 +1,4 @@ | |||
scikit-learn==0.21.3 | |||
scikit-learn==0.22.0 | |||
pytypes==1.0b5 | |||
frozendict==1.2 | |||
numpy>=1.15.4,<=1.18.1 | |||
@@ -9,7 +9,7 @@ rfc3987==1.3.8 | |||
webcolors>=1.8.1,<=1.10 | |||
dateparser>=0.7.0,<=0.7.2 | |||
python-dateutil==2.8.1 | |||
pandas==0.23.4 | |||
pandas==0.25 | |||
typing-inspect==0.5.0 | |||
GitPython>=2.1.11,<=3.0.5 | |||
jsonpath-ng==1.4.3 | |||
@@ -25,7 +25,7 @@ setup( | |||
'd3m', | |||
'Jinja2==2.9.4', | |||
'simplejson==3.12.0', | |||
'scikit-learn==0.21.3', | |||
'scikit-learn==0.22.0', | |||
], | |||
url='https://gitlab.datadrivendiscovery.org/jpl/sklearn-wrapping', | |||
entry_points = { | |||
@@ -68,8 +68,6 @@ tods.detection_algorithm.pyod_mogaal = detection_algorithm.PyodMoGaal:Mo_GaalPri | |||
tods.detection_algorithm.matrix_profile = detection_algorithm.MatrixProfile:MatrixProfile | |||
tods.detection_algorithm.AutoRegODetector = detection_algorithm.AutoRegODetect:AutoRegODetector | |||
tods.detection_algorithm.KDiscordDetector = detection_algorithm.KDiscordODetect:KDiscordDetector | |||
tods.detection_algorithm.PCADetector = detection_algorithm.PCAODetect:PCADetector | |||
tods.detection_algorithm.LSTMODetector = detection_algorithm.LSTMODetect:LSTMODetector | |||
tods.detection_algorithm.AutoRegODetector = detection_algorithm.AutoRegODetect:AutoRegODetector | |||
@@ -1,4 +1,4 @@ | |||
scikit-learn==0.21.3 | |||
scikit-learn==0.22.0 | |||
pytypes==1.0b5 | |||
frozendict==1.2 | |||
numpy>=1.15.4,<=1.18.1 | |||
@@ -9,7 +9,7 @@ rfc3987==1.3.8 | |||
webcolors>=1.8.1,<=1.10 | |||
dateparser>=0.7.0,<=0.7.2 | |||
python-dateutil==2.8.1 | |||
pandas==0.23.4 | |||
pandas==0.25.0 | |||
typing-inspect==0.5.0 | |||
GitPython>=2.1.11,<=3.0.5 | |||
jsonpath-ng==1.4.3 | |||
@@ -0,0 +1 @@ | |||
{"id": "384bbfab-4f6d-4001-9f90-684ea5681f5d", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-09-09T23:40:01.756164Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.7.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common", "name": "Extract a DataFrame from a Dataset", "digest": "b94ee59ccf8db678d506adddbc238fb2049fb664a1e3f3f3f6a6517c0c4f8e5f"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7", "version": "0.6.0", "python_path": "d3m.primitives.data_transformation.column_parser.Common", "name": "Parses strings into their types", "digest": "256f0155c7185d747b3b23096e46c40d15844106f9ed6346453f6010891f1896"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "85fe81066e85dbb62eacbe8a96be52d08e7aec22a025a29c81feaaaa72d7f7d0"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "85fe81066e85dbb62eacbe8a96be52d08e7aec22a025a29c81feaaaa72d7f7d0"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "642de2e7-5590-3cab-9266-2a53c326c461", "version": "0.0.1", "python_path": "d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler", "name": "Axis_wise_scale"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "eaff2f35-978c-4530-a12e-061a5f0beacd", "version": "0.1.0", "python_path": "d3m.primitives.tods.feature_analysis.statistical_mean", "name": "Time Series Decompostional", "digest": "2f2a8c07878643fe29c346096b91b5ba91477baa1e7e78684f07e53d29766ca4"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_vae", "name": "TODS.anomaly_detection_primitives.VariationalAutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.5.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.construct_predictions.Common", "name": "Construct pipeline predictions output", "digest": "d5384857f75090844f367504befb1a854e5088589f6aae0795f66ccf10403e19"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.6.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "8d969800816d9596e94cb045aacce43dc3d49e8c5bedb403e35af6c9b8339990"} |
@@ -0,0 +1,10 @@ | |||
import os | |||
resource_dir = os.path.dirname(__file__) | |||
DEFAULT_PIPELINE_DIR = os.path.join(resource_dir, 'resources', 'default_pipeline.json') | |||
def load_default_pipeline(): | |||
from axolotl.utils import pipeline as pipeline_utils | |||
pipeline = pipeline_utils.load_pipeline(DEFAULT_PIPELINE_DIR) | |||
return pipeline |
@@ -0,0 +1,294 @@ | |||
# A Brute-Force Search | |||
import uuid | |||
import random | |||
from d3m.metadata.pipeline import Pipeline | |||
from axolotl.algorithms.base import PipelineSearchBase | |||
from axolotl.utils import schemas as schemas_utils | |||
class BruteForceSearch(PipelineSearchBase): | |||
def __init__(self, problem_description, backend, *, primitives_blocklist=None, ranking_function=None): | |||
super().__init__(problem_description=problem_description, backend=backend, | |||
primitives_blocklist=primitives_blocklist, ranking_function=ranking_function) | |||
if self.ranking_function is None: | |||
self.ranking_function = _rank_first_metric | |||
# Find the candidates | |||
self.task_description = schemas_utils.get_task_description(self.problem_description['problem']['task_keywords']) | |||
self.available_pipelines = self._return_pipelines( | |||
self.task_description['task_type'], self.task_description['task_subtype'], self.task_description['data_types']) | |||
self.metrics = self.problem_description['problem']['performance_metrics'] | |||
self.data_preparation_pipeline = _generate_data_preparation_pipeline() | |||
self.scoring_pipeline = _generate_scoring_pipeline() | |||
self.data_preparation_params = _generate_data_preparation_params() | |||
self.current_pipeline_index = 0 | |||
self.offset = 1 | |||
def evaluate(self, pipeline_to_eval, input_data=None): | |||
if input_data is None: | |||
input_data = self.input_data | |||
pipeline_result = self.backend.evaluate_pipeline( | |||
problem_description=self.problem_description, | |||
pipeline=pipeline_to_eval, | |||
input_data=input_data, | |||
metrics=self.metrics, | |||
data_preparation_pipeline=self.data_preparation_pipeline, | |||
scoring_pipeline=self.scoring_pipeline, | |||
data_preparation_params=self.data_preparation_params) | |||
return pipeline_result | |||
def _search(self, time_left): | |||
# Read all the pipelines to be evaluated | |||
pipelines_to_eval = self.available_pipelines[self.current_pipeline_index: self.current_pipeline_index+self.offset] | |||
self.current_pipeline_index += 1 | |||
pipeline_results = self.backend.evaluate_pipelines( | |||
problem_description=self.problem_description, | |||
pipelines=pipelines_to_eval, | |||
input_data=self.input_data, | |||
metrics=self.metrics, | |||
data_preparation_pipeline=self.data_preparation_pipeline, | |||
scoring_pipeline=self.scoring_pipeline, | |||
data_preparation_params=self.data_preparation_params) | |||
# DEBUG | |||
#################### | |||
for pipeline_result in pipeline_results: | |||
try: | |||
for error in pipeline_result.error: | |||
if error is not None: | |||
raise error | |||
except: | |||
import traceback | |||
traceback.print_exc() | |||
#################### | |||
return [self.ranking_function(pipeline_result) for pipeline_result in pipeline_results] | |||
def _return_pipelines(self, task_type, task_subtype, data_type): | |||
pipeline_candidates = _generate_pipelines(primitive_python_paths) | |||
return pipeline_candidates | |||
primitive_python_paths = { | |||
'data_processing': [ | |||
#'d3m.primitives.tods.data_processing.time_interval_transform', | |||
#'d3m.primitives.tods.data_processing.categorical_to_binary', | |||
'd3m.primitives.tods.data_processing.column_filter', | |||
#'d3m.primitives.tods.data_processing.timestamp_validation', | |||
#'d3m.primitives.tods.data_processing.duplication_validation', | |||
#'d3m.primitives.tods.data_processing.continuity_validation', | |||
], | |||
'timeseries_processing': [ | |||
'd3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler', | |||
'd3m.primitives.tods.timeseries_processing.transformation.standard_scaler', | |||
'd3m.primitives.tods.timeseries_processing.transformation.power_transformer', | |||
'd3m.primitives.tods.timeseries_processing.transformation.quantile_transformer', | |||
'd3m.primitives.tods.timeseries_processing.transformation.moving_average_transform', | |||
'd3m.primitives.tods.timeseries_processing.transformation.simple_exponential_smoothing', | |||
#'d3m.primitives.tods.timeseries_processing.transformation.holt_smoothing', | |||
#'d3m.primitives.tods.timeseries_processing.transformation.holt_winters_exponential_smoothing', | |||
#'d3m.primitives.tods.timeseries_processing.decomposition.time_series_seasonality_trend_decomposition', | |||
], | |||
'feature_analysis': [ | |||
#'d3m.primitives.tods.feature_analysis.auto_correlation', | |||
'd3m.primitives.tods.feature_analysis.statistical_mean', | |||
'd3m.primitives.tods.feature_analysis.statistical_median', | |||
'd3m.primitives.tods.feature_analysis.statistical_g_mean', | |||
'd3m.primitives.tods.feature_analysis.statistical_abs_energy', | |||
'd3m.primitives.tods.feature_analysis.statistical_abs_sum', | |||
'd3m.primitives.tods.feature_analysis.statistical_h_mean', | |||
'd3m.primitives.tods.feature_analysis.statistical_maximum', | |||
#'d3m.primitives.tods.feature_analysis.statistical_minimum', | |||
#'d3m.primitives.tods.feature_analysis.statistical_mean_abs', | |||
#'d3m.primitives.tods.feature_analysis.statistical_mean_abs_temporal_derivative', | |||
#'d3m.primitives.tods.feature_analysis.statistical_mean_temporal_derivative', | |||
#'d3m.primitives.tods.feature_analysis.statistical_median_abs_deviation', | |||
#'d3m.primitives.tods.feature_analysis.statistical_kurtosis', | |||
#'d3m.primitives.tods.feature_analysis.statistical_skew', | |||
#'d3m.primitives.tods.feature_analysis.statistical_std', | |||
#'d3m.primitives.tods.feature_analysis.statistical_var', | |||
#'d3m.primitives.tods.feature_analysis.statistical_variation', | |||
#'d3m.primitives.tods.feature_analysis.statistical_vec_sum', | |||
#'d3m.primitives.tods.feature_analysis.statistical_willison_amplitude', | |||
#'d3m.primitives.tods.feature_analysis.statistical_zero_crossing', | |||
#'d3m.primitives.tods.feature_analysis.spectral_residual_transform', | |||
#'d3m.primitives.tods.feature_analysis.fast_fourier_transform', | |||
#'d3m.primitives.tods.feature_analysis.discrete_cosine_transform', | |||
#'d3m.primitives.tods.feature_analysis.non_negative_matrix_factorization', | |||
#'d3m.primitives.tods.feature_analysis.bk_filter', | |||
#'d3m.primitives.tods.feature_analysis.hp_filter', | |||
#'d3m.primitives.tods.feature_analysis.truncated_svd', | |||
#'d3m.primitives.tods.feature_analysis.wavelet_transform', | |||
#'d3m.primitives.tods.feature_analysis.trmf', | |||
], | |||
'detection_algorithm': [ | |||
'd3m.primitives.tods.detection_algorithm.pyod_ae', | |||
'd3m.primitives.tods.detection_algorithm.pyod_vae', | |||
'd3m.primitives.tods.detection_algorithm.pyod_cof', | |||
'd3m.primitives.tods.detection_algorithm.pyod_sod', | |||
'd3m.primitives.tods.detection_algorithm.pyod_abod', | |||
'd3m.primitives.tods.detection_algorithm.pyod_hbos', | |||
'd3m.primitives.tods.detection_algorithm.pyod_iforest', | |||
#'d3m.primitives.tods.detection_algorithm.pyod_lof', | |||
#'d3m.primitives.tods.detection_algorithm.pyod_knn', | |||
#'d3m.primitives.tods.detection_algorithm.pyod_ocsvm', | |||
#'d3m.primitives.tods.detection_algorithm.pyod_loda', | |||
#'d3m.primitives.tods.detection_algorithm.pyod_cblof', | |||
#'d3m.primitives.tods.detection_algorithm.pyod_sogaal', | |||
#'d3m.primitives.tods.detection_algorithm.pyod_mogaal', | |||
#'d3m.primitives.tods.detection_algorithm.matrix_profile', | |||
#'d3m.primitives.tods.detection_algorithm.AutoRegODetector', | |||
#'d3m.primitives.tods.detection_algorithm.LSTMODetector', | |||
#'d3m.primitives.tods.detection_algorithm.AutoRegODetector', | |||
#'d3m.primitives.tods.detection_algorithm.PCAODetector', | |||
#'d3m.primitives.tods.detection_algorithm.KDiscordODetector', | |||
#'d3m.primitives.tods.detection_algorithm.deeplog', | |||
#'d3m.primitives.tods.detection_algorithm.telemanom', | |||
], | |||
'contamination': [0.01, 0.02, 0.05, 0.07, 0.1, 0.15, 0.2], | |||
} | |||
def _rank_first_metric(pipeline_result): | |||
if pipeline_result.status == 'COMPLETED': | |||
scores = pipeline_result.scores | |||
pipeline_result.rank = -scores['value'][0] | |||
return pipeline_result | |||
else: | |||
# error | |||
pipeline_result.rank = 1 | |||
return pipeline_result | |||
def _generate_data_preparation_params(): | |||
from axolotl.utils import schemas as schemas_utils | |||
data_preparation_params = schemas_utils.DATA_PREPARATION_PARAMS['no_split'] | |||
return data_preparation_params | |||
def _generate_scoring_pipeline(): | |||
from axolotl.utils import schemas as schemas_utils | |||
scoring_pipeline = schemas_utils.get_scoring_pipeline() | |||
return scoring_pipeline | |||
def _generate_data_preparation_pipeline(): | |||
from axolotl.utils import schemas as schemas_utils | |||
data_preparation_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA") | |||
return data_preparation_pipeline | |||
def _generate_pipline(combinations): | |||
from d3m import index | |||
from d3m.metadata.base import ArgumentType | |||
from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
piplines = [] | |||
for combination in combinations: | |||
# Creating pipeline | |||
pipeline_description = Pipeline() | |||
pipeline_description.add_input(name='inputs') | |||
# The first three steps are fixed | |||
# Step 0: dataset_to_dataframe | |||
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) | |||
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
step_0.add_output('produce') | |||
pipeline_description.add_step(step_0) | |||
# Step 1: column_parser | |||
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) | |||
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_1.add_output('produce') | |||
pipeline_description.add_step(step_1) | |||
# Step 2: extract_columns_by_semantic_types(attributes) | |||
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
step_2.add_output('produce') | |||
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
pipeline_description.add_step(step_2) | |||
# Step 3: extract_columns_by_semantic_types(targets) | |||
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
step_3.add_output('produce') | |||
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
pipeline_description.add_step(step_3) | |||
attributes = 'steps.2.produce' | |||
targets = 'steps.3.produce' | |||
tods_step_4 = PrimitiveStep(primitive=index.get_primitive(combination[0])) | |||
tods_step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
tods_step_4.add_output('produce') | |||
pipeline_description.add_step(tods_step_4) | |||
tods_step_5 = PrimitiveStep(primitive=index.get_primitive(combination[1])) | |||
tods_step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') | |||
tods_step_5.add_output('produce') | |||
pipeline_description.add_step(tods_step_5) | |||
tods_step_6= PrimitiveStep(primitive=index.get_primitive(combination[2])) | |||
tods_step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') | |||
tods_step_6.add_output('produce') | |||
tods_step_6.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=combination[3]) | |||
pipeline_description.add_step(tods_step_6) | |||
#tods_step_7 = PrimitiveStep(primitive=index.get_primitive(combination[3])) | |||
#tods_step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce') | |||
#tods_step_7.add_output('produce') | |||
#pipeline_description.add_step(tods_step_7) | |||
# Finalize the pipeline | |||
final_step = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) | |||
final_step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce') | |||
final_step.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
final_step.add_output('produce') | |||
pipeline_description.add_step(final_step) | |||
pipeline_description.add_output(name='output predictions', data_reference='steps.7.produce') | |||
pipeline_description.id = str(uuid.uuid4()) | |||
pipeline_description.created = Pipeline().created | |||
piplines.append(pipeline_description) | |||
return piplines | |||
def _generate_pipelines(primitive_python_paths, cpu_count=40): | |||
""" | |||
Args: | |||
primitive_python_paths: a list of primitive Python paths for algorithms | |||
Returns: | |||
the pipline description json | |||
""" | |||
import itertools | |||
import multiprocessing as mp | |||
#components = ['data_processing', 'timeseries_processing', 'feature_analysis', 'detection_algorithm'] | |||
components = ['timeseries_processing', 'feature_analysis', 'detection_algorithm', 'contamination'] | |||
combinations = itertools.product(*(primitive_python_paths[k] for k in components)) | |||
return _generate_pipline(combinations) | |||
#pipelines = [] | |||
## Allocate tasks | |||
#combination_each_core_list = [[] for i in range(cpu_count)] | |||
#for idx, combination in enumerate(combinations): | |||
# core = idx % cpu_count | |||
# combination_each_core_list[core].append(combination) | |||
## Obtain all the pipelines | |||
#pool = mp.Pool(processes=cpu_count) | |||
#results = [pool.apply_async(_generate_pipline, | |||
# args=(combinations,)) | |||
# for combinations in combination_each_core_list] | |||
#piplines = [] | |||
#for p in results: | |||
# piplines.extend(p.get()) | |||
return piplines |
@@ -0,0 +1,59 @@ | |||
def generate_dataset_problem(df, target_index, metric): | |||
""" | |||
A wrapper for generating dataset and problem | |||
Args: | |||
df (pandas.DataFrame): dataset | |||
target_index (int): The column index of the target | |||
metric (str): `F1` for computing F1 on label 1, 'F1_MACRO` for | |||
macro-F1 on both 0 and 1 | |||
returns: | |||
dataset, problem | |||
""" | |||
from axolotl.utils import data_problem | |||
from d3m.metadata.problem import TaskKeyword, PerformanceMetric | |||
if metric == 'F1': | |||
performance_metrics = [{'metric': PerformanceMetric.F1, 'params': {'pos_label': '1'}}] | |||
elif metric == 'F1_MACRO': | |||
performance_metrics = [{'metric': PerformanceMetric.F1_MACRO, 'params': {}}] | |||
else: | |||
raise ValueError('The metric {} not supported.'.format(metric)) | |||
dataset, problem_description = data_problem.generate_dataset_problem(df, | |||
target_index=target_index, | |||
task_keywords=[TaskKeyword.ANOMALY_DETECTION,], | |||
performance_metrics=performance_metrics) | |||
return dataset, problem_description | |||
def evaluate_pipeline(problem_description, dataset, pipeline): | |||
from axolotl.utils import schemas as schemas_utils | |||
from axolotl.backend.simple import SimpleRunner | |||
data_preparation_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA") | |||
scoring_pipeline = schemas_utils.get_scoring_pipeline() | |||
data_preparation_params = schemas_utils.DATA_PREPARATION_PARAMS['no_split'] | |||
metrics = problem_description['problem']['performance_metrics'] | |||
backend = SimpleRunner(random_seed=0) | |||
pipeline_result = backend.evaluate_pipeline(problem_description=problem_description, | |||
pipeline=pipeline, | |||
input_data=[dataset], | |||
metrics=metrics, | |||
data_preparation_pipeline=data_preparation_pipeline, | |||
scoring_pipeline=scoring_pipeline, | |||
data_preparation_params=data_preparation_params) | |||
try: | |||
for error in pipeline_result.error: | |||
if error is not None: | |||
raise error | |||
except: | |||
import traceback | |||
traceback.print_exc() | |||
return pipeline_result | |||
@@ -0,0 +1,51 @@ | |||
def generate_dataset_problem(df, target_index, metric): | |||
""" | |||
A wrapper for generating dataset and problem | |||
Args: | |||
df (pandas.DataFrame): dataset | |||
target_index (int): The column index of the target | |||
metric (str): `F1` for computing F1 on label 1, 'F1_MACRO` for | |||
macro-F1 on both 0 and 1 | |||
returns: | |||
dataset, problem | |||
""" | |||
from axolotl.utils import data_problem | |||
from d3m.metadata.problem import TaskKeyword, PerformanceMetric | |||
if metric == 'F1': | |||
performance_metrics = [{'metric': PerformanceMetric.F1, 'params': {'pos_label': '1'}}] | |||
elif metric == 'F1_MACRO': | |||
performance_metrics = [{'metric': PerformanceMetric.F1_MACRO, 'params': {}}] | |||
else: | |||
raise ValueError('The metric {} not supported.'.format(metric)) | |||
dataset, problem_description = data_problem.generate_dataset_problem(df, | |||
target_index=target_index, | |||
task_keywords=[TaskKeyword.ANOMALY_DETECTION,], | |||
performance_metrics=performance_metrics) | |||
return dataset, problem_description | |||
def evaluate_pipeline(problem_description, dataset, pipeline): | |||
from axolotl.utils import schemas as schemas_utils | |||
from axolotl.backend.simple import SimpleRunner | |||
data_preparation_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA") | |||
scoring_pipeline = schemas_utils.get_scoring_pipeline() | |||
data_preparation_params = schemas_utils.DATA_PREPARATION_PARAMS['no_split'] | |||
metrics = problem_description['problem']['performance_metrics'] | |||
backend = SimpleRunner(random_seed=0) | |||
pipeline_result = backend.evaluate_pipeline(problem_description=problem_description, | |||
pipeline=pipeline, | |||
input_data=[dataset], | |||
metrics=metrics, | |||
data_preparation_pipeline=data_preparation_pipeline, | |||
scoring_pipeline=scoring_pipeline, | |||
data_preparation_params=data_preparation_params) | |||
return pipeline_result | |||
@@ -29,7 +29,7 @@ setup( | |||
'd3m', | |||
'Jinja2', | |||
'simplejson==3.12.0', | |||
'scikit-learn==0.21.3', | |||
'scikit-learn==0.22.0', | |||
'statsmodels==0.11.1', | |||
'PyWavelets>=1.1.1', | |||
'tensorflow', # should be removed later | |||
@@ -1,36 +0,0 @@ | |||
# A Brute-Force Search | |||
import uuid | |||
from d3m.metadata.pipeline import Pipeline | |||
from axolotl.algorithms.base import PipelineSearchBase | |||
from axolotl.utils import pipeline as pipeline_utils, schemas as schemas_utils | |||
def random_rank(pipeline_result): | |||
if pipeline_result.status == 'COMPLETED': | |||
pipeline_result.rank = random.uniform(0, 1) | |||
return pipeline_result | |||
class BruteForceSearch(PipelineSearchBase): | |||
def __init__(self, problem_description, backend, *, primitives_blocklist=None, ranking_function=None): | |||
super().__init__(problem_description=problem_description, backend=backend, | |||
primitives_blocklist=primitives_blocklist, ranking_function=ranking_function) | |||
if self.ranking_function is None: | |||
self.ranking_function = random_rank | |||
# Find th candidates | |||
self.task_description = schemas_utils.get_task_description(self.problem_description['problem']['task_keywords']) | |||
print('task_description:', self.task_description) | |||
self.available_pipelines = self._return_pipelines( | |||
self.task_description['task_type'], self.task_description['task_subtype'], self.task_description['data_types']) | |||
print('available_pipelines:', self.available_pipelines) | |||
def _return_pipelines(self, task_type, task_subtype, data_type): | |||
pipeline_candidates = [] | |||
for pipeline_dict in schemas_utils.get_pipelines_db()['CLASSIFICATION']: | |||
pipeline = pipeline_utils.load_pipeline(pipeline_dict) | |||
pipeline.id = str(uuid.uuid4()) | |||
pipeline.created = Pipeline().created | |||
pipeline_candidates.append(pipeline) | |||
return pipeline_candidates |