@@ -0,0 +1,55 @@ | |||||
# Revisiting Time Series Outlier Detection:Definitions and Benchmarks | |||||
This branch is the source code of experiment part of our paper. We provide everything needed when running the experiments: Dataset, Dataset Generator, Pipeline json, Python script, runner and the result (in "./result") we get from the experiments. | |||||
## Resources | |||||
* Paper: Under review | |||||
## Cite this Work: | |||||
If you find this work useful, you may cite this work: | |||||
``` | |||||
@misc{lai2020tods, | |||||
title={TODS: An Automated Time Series Outlier Detection System}, | |||||
author={Kwei-Harng Lai and Daochen Zha and Guanchu Wang and Junjie Xu and Yue Zhao and Devesh Kumar and Yile Chen and Purav Zumkhawaka and Minyang Wan and Diego Martinez and Xia Hu}, | |||||
year={2020}, | |||||
eprint={2009.09822}, | |||||
archivePrefix={arXiv}, | |||||
primaryClass={cs.DB} | |||||
} | |||||
``` | |||||
## Datasets | |||||
To get the dataset, please go to "data/script" to run all of the python scripts. They will download and preprocess the data automatically into "data/" folder. | |||||
## Pipeline | |||||
This Pipeline json files are organized by different settings of algorithms. | |||||
## Runner | |||||
To run a pipeline, you can generate your own pipeline json file from script. | |||||
```python | |||||
python pipeline_construction/pipeline_construction_simple.py | |||||
python pipeline_construction/pipeline_construction_subseq.py | |||||
python pipeline_construction/neural/build_AE_pipeline.py | |||||
python pipeline_construction/neural/build_RNNLSTM_pipeline.py | |||||
``` | |||||
Then run the pipeline with run\_pipeline.py (Below is the example for running IForest on GECCO dataset) | |||||
```python | |||||
python run_pipeline.py --pipeline_path pipelines/simple/pyod_iforest_0.01.json --data_path ./data/water_quality.csv | |||||
``` | |||||
Or you can directly use the pipelines we have generated in /pipelines with bash script: | |||||
```python | |||||
./run.sh | |||||
``` | |||||
*Please refer master branch of TODS for details of running pipelines. |
@@ -72,7 +72,7 @@ for cont_rate in [0.01, 0.05, 0.1, 0.15, 0.2, 0.25]: | |||||
# Output to json | # Output to json | ||||
data = pipeline_description.to_json() | data = pipeline_description.to_json() | ||||
with open('../../pipelines/AE/'+data_name+'/ae_pipeline_'+str(cont_rate)+'.json', 'w') as f: | |||||
with open('./pipelines/AE/'+data_name+'/ae_pipeline_'+str(cont_rate)+'.json', 'w') as f: | |||||
f.write(data) | f.write(data) | ||||
print(data) | print(data) | ||||
@@ -72,7 +72,7 @@ for cont_rate in [0.01, 0.05, 0.1, 0.15, 0.2, 0.25]: | |||||
# Output to json | # Output to json | ||||
data = pipeline_description.to_json() | data = pipeline_description.to_json() | ||||
with open('../../pipelines/RNN_LSTM/'+data_name+'/rnnlstm_pipeline_'+str(cont_rate)+'.json', 'w') as f: | |||||
with open('./pipelines/RNN_LSTM/'+data_name+'/rnnlstm_pipeline_'+str(cont_rate)+'.json', 'w') as f: | |||||
f.write(data) | f.write(data) | ||||
print(data) | print(data) | ||||
@@ -152,7 +152,7 @@ def _generate_pipeline(combinations): | |||||
pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') | pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') | ||||
data = pipeline_description.to_json() | data = pipeline_description.to_json() | ||||
#with open('../pipelines/'+str(combination[1].split(".")[-1])+'_'+str(combination[2])+".json", 'w') as f: | #with open('../pipelines/'+str(combination[1].split(".")[-1])+'_'+str(combination[2])+".json", 'w') as f: | ||||
with open('./simple/'+str(combination[1].split(".")[-1])+'_'+str(combination[2])+".json", 'w') as f: | |||||
with open('./pipelines/simple/'+str(combination[1].split(".")[-1])+'_'+str(combination[2])+".json", 'w') as f: | |||||
f.write(data) | f.write(data) | ||||
pipeline_description.id = str(uuid.uuid4()) | pipeline_description.id = str(uuid.uuid4()) | ||||
pipeline_description.created = Pipeline().created | pipeline_description.created = Pipeline().created | ||||
@@ -152,7 +152,7 @@ def _generate_pipeline(combinations): | |||||
pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') | pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') | ||||
data = pipeline_description.to_json() | data = pipeline_description.to_json() | ||||
#with open('../pipelines/'+str(combination[1].split(".")[-1])+'_'+str(combination[2])+".json", 'w') as f: | #with open('../pipelines/'+str(combination[1].split(".")[-1])+'_'+str(combination[2])+".json", 'w') as f: | ||||
with open('./subseq/'+str(combination[1].split(".")[-1])+'_subseq_'+str(combination[2])+".json", 'w') as f: | |||||
with open('./pipelines/subseq/'+str(combination[1].split(".")[-1])+'_subseq_'+str(combination[2])+".json", 'w') as f: | |||||
f.write(data) | f.write(data) | ||||
pipeline_description.id = str(uuid.uuid4()) | pipeline_description.id = str(uuid.uuid4()) | ||||
pipeline_description.created = Pipeline().created | pipeline_description.created = Pipeline().created | ||||
@@ -1,19 +1,36 @@ | |||||
#!/bin/bash | #!/bin/bash | ||||
#data="web_attack water_quality" | |||||
data="creditcard" | |||||
data="creditcard web_attack water_quality swan_sf" | |||||
simple_pipelines=$(ls pipelines/simple | grep mogaal) | |||||
simple_pipelines=$(ls pipelines/simple) | |||||
subseq_pipelines=$(ls pipelines/subseq) | subseq_pipelines=$(ls pipelines/subseq) | ||||
for d in $data | for d in $data | ||||
do | do | ||||
for p in $subseq_pipelines | for p in $subseq_pipelines | ||||
do | do | ||||
tsp python run_pipeline.py --pipeline_path pipelines/subseq/$p --data_path ./data/$d.csv | |||||
python run_pipeline.py --pipeline_path pipelines/subseq/$p --data_path ./data/$d.csv | |||||
done | |||||
for p in $simple_pipelines | |||||
do | |||||
python run_pipeline.py --pipeline_path pipelines/simple/$p --data_path ./data/$d.csv | |||||
done | |||||
done | |||||
for d in $data | |||||
do | |||||
rnn_pipelines=$(ls pipelines/RNN_LSTM/$d) | |||||
for p in $rnn_pipelines | |||||
do | |||||
python run_pipeline.py --pipeline_path pipelines/RNN_LSTM/$d/$p --data_path ./data/$d.csv | |||||
done | |||||
done | |||||
for d in $data | |||||
do | |||||
ae_pipelines=$(ls pipelines/AE/$d) | |||||
for p in $ae_pipelines | |||||
do | |||||
tsp python run_pipeline.py --pipeline_path pipelines/AE/$d/$p --data_path ./data/$d.csv | |||||
done | done | ||||
#for p in $simple_pipelines | |||||
#do | |||||
# tsp python run_pipeline.py --pipeline_path pipelines/simple/$p --data_path ./data/$d.csv | |||||
#done | |||||
done | done |
@@ -1,15 +1,12 @@ | |||||
#!/bin/bash | #!/bin/bash | ||||
#data="swan_sf creditcard web_attack water_quality" | |||||
data="creditcard" | |||||
echo $rnn_pipelines | |||||
data="swan_sf creditcard web_attack water_quality" | |||||
for d in $data | for d in $data | ||||
do | do | ||||
rnn_pipelines=$(ls pipelines/RNN_LSTM/$d) | rnn_pipelines=$(ls pipelines/RNN_LSTM/$d) | ||||
for p in $rnn_pipelines | for p in $rnn_pipelines | ||||
do | do | ||||
tsp python run_pipeline.py --pipeline_path pipelines/RNN_LSTM/$d/$p --data_path ./data/$d.csv | |||||
python run_pipeline.py --pipeline_path pipelines/RNN_LSTM/$d/$p --data_path ./data/$d.csv | |||||
done | done | ||||
done | done |
@@ -21,6 +21,7 @@ If you find this work useful, you may cite this work: | |||||
## Datasets | ## Datasets | ||||
This All of the datasets were generated from the generators. Tuning parameters in the generators can get your own datasets. | This All of the datasets were generated from the generators. Tuning parameters in the generators can get your own datasets. | ||||
The datasets used in the paper is provided in "unidataset" and "multidataset" | |||||
## Pipeline | ## Pipeline | ||||
@@ -33,7 +34,7 @@ To run a pipeline, you can generate your own pipeline json file from script. Tak | |||||
```python | ```python | ||||
python script/simple_algo/build_AutoEncoder_pipeline.py | python script/simple_algo/build_AutoEncoder_pipeline.py | ||||
``` | ``` | ||||
Then run the json using run_pipeline in /runner | |||||
Then run the json using run\_pipeline in /runner | |||||
```python | ```python | ||||
python runner/run_pipeline.py --pipeline_path ae_pipeline_default_con0.05.json | python runner/run_pipeline.py --pipeline_path ae_pipeline_default_con0.05.json | ||||
``` | ``` | ||||
@@ -48,4 +49,4 @@ python runner/run_pipeline.py --pipeline_path Pipeline/AutoEncoder/ae_pipeline_d | |||||
*Please refer master branch of TODS for details of running pipelines. | |||||
*Please refer master branch of TODS for details of running pipelines. |