You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

system_KNN.py 2.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. import numpy as np
  2. from tods.sk_interface.feature_analysis.StatisticalMaximum_skinterface import StatisticalMaximumSKI
  3. from tods.sk_interface.detection_algorithm.KNN_skinterface import KNNSKI
  4. from tods.sk_interface.data_ensemble.Ensemble_skinterface import EnsembleSKI
  5. from tods.sk_interface.utils.data import generate_3D_data, load_sys_data, generate_sys_feature
  6. # Generate 3D data (n, T, d), n: system number, T: time, d: dimension
  7. # n_sys = 5
  8. # X_train, y_train, X_test, y_test = generate_3D_data(n_sys=n_sys,
  9. # n_train=1000,
  10. # n_test=1000,
  11. # n_features=3,
  12. # contamination=0.1)
  13. X_train, y_train, sys_info_train = load_sys_data('../../datasets/anomaly/system_wise/sample/train.csv',
  14. '../../datasets/anomaly/system_wise/sample/systems')
  15. X_test, y_test, sys_info_test = load_sys_data('../../datasets/anomaly/system_wise/sample/train.csv',
  16. '../../datasets/anomaly/system_wise/sample/systems')
  17. n_sys = sys_info_train['sys_num']
  18. # feature analysis algorithms
  19. stmax = StatisticalMaximumSKI(system_num=n_sys)
  20. # OD algorithms
  21. detection_module = KNNSKI(contamination=0.1, system_num=n_sys)
  22. # ensemble model
  23. ensemble_module = EnsembleSKI()
  24. # Fit the feature analysis algorithms
  25. X_train = stmax.produce(X_train)
  26. X_test = stmax.produce(X_test)
  27. # Fit the detector
  28. detection_module.fit(X_train)
  29. sys_ts_score = detection_module.predict_score(X_test) # shape (n, T, 1)
  30. # generate sys_feature based on the time-series anomaly score
  31. sys_feature = generate_sys_feature(sys_ts_score) # shape (T, n)
  32. print(sys_feature.shape)
  33. print(sys_feature.ndim)
  34. # Ensemble the time series outlier socre for each system
  35. ensemble_module.fit(sys_feature)
  36. sys_score = ensemble_module.predict(sys_feature)
  37. print(sys_score)

全栈的自动化机器学习系统,主要针对多变量时间序列数据的异常检测。TODS提供了详尽的用于构建基于机器学习的异常检测系统的模块,它们包括:数据处理(data processing),时间序列处理( time series processing),特征分析(feature analysis),检测算法(detection algorithms),和强化模块( reinforcement module)。这些模块所提供的功能包括常见的数据预处理、时间序列数据的平滑或变换,从时域或频域中抽取特征、多种多样的检测算