You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

run_pipeline_system.py 2.0 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. import sys
  2. import argparse
  3. import os
  4. import pandas as pd
  5. from tods import generate_dataset, load_pipeline, evaluate_pipeline
  6. this_path = os.path.dirname(os.path.abspath(__file__))
  7. #table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset
  8. parser = argparse.ArgumentParser(description='Arguments for running predefined pipelin.')
  9. parser.add_argument('--table_path', type=str, default=os.path.join(this_path, '../../datasets/anomaly/system_wise/sample/train.csv'),
  10. help='Input the path of the input data table')
  11. parser.add_argument('--system_dir', type=str, default=os.path.join(this_path, '../../datasets/anomaly/system_wise/sample/systems'),
  12. help='The directory of where the systems are stored')
  13. parser.add_argument('--target_index', type=int, default=2,
  14. help='Index of the ground truth (for evaluation)')
  15. parser.add_argument('--metric',type=str, default='F1_MACRO',
  16. help='Evaluation Metric (F1, F1_MACRO)')
  17. parser.add_argument('--pipeline_path', default=os.path.join(this_path, './example_pipelines/system_pipeline.json'),
  18. help='Input the path of the pre-built pipeline description')
  19. # parser.add_argument('--pipeline_path', default=os.path.join(this_path, '../tods/resources/default_pipeline.json'),
  20. # help='Input the path of the pre-built pipeline description')
  21. args = parser.parse_args()
  22. table_path = args.table_path
  23. target_index = args.target_index # what column is the target
  24. system_dir = args.system_dir
  25. pipeline_path = args.pipeline_path
  26. metric = args.metric # F1 on both label 0 and 1
  27. # Read data and generate dataset
  28. df = pd.read_csv(table_path)
  29. dataset = generate_dataset(df, target_index ,system_dir)
  30. # Load the default pipeline
  31. pipeline = load_pipeline(pipeline_path)
  32. # Run the pipeline
  33. pipeline_result = evaluate_pipeline(dataset, pipeline, metric)
  34. print(pipeline_result)
  35. # For debugging
  36. if pipeline_result.status == 'ERRORED':
  37. raise pipeline_result.error[0]

全栈的自动化机器学习系统,主要针对多变量时间序列数据的异常检测。TODS提供了详尽的用于构建基于机器学习的异常检测系统的模块,它们包括:数据处理(data processing),时间序列处理( time series processing),特征分析(feature analysis),检测算法(detection algorithms),和强化模块( reinforcement module)。这些模块所提供的功能包括常见的数据预处理、时间序列数据的平滑或变换,从时域或频域中抽取特征、多种多样的检测算