You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

swan_sf.py 1.5 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. import pandas as pd
  2. import os
  3. import requests
  4. def get_data():
  5. link="https://bitbucket.org/gsudmlab/mvtsdata_toolkit/downloads/petdataset_01.zip"
  6. r = requests.get(link)
  7. with open('./swan_sf.zip', 'wb') as f:
  8. f.write(r.content)
  9. os.system("unzip swan_sf.zip")
  10. def read_labeled_data():
  11. dir_path = "./petdataset_01"
  12. files = os.listdir(dir_path)
  13. inlier = []
  14. label = {}
  15. for f in files:
  16. #if "csv" not in f and "NF" not in f:
  17. if "csv" not in f:
  18. continue
  19. label = f.split("lab[")[1].split("]")[0]
  20. #print(label)
  21. f_path = os.path.join(dir_path, f)
  22. df = pd.read_csv(f_path, header=0, sep='\t')
  23. df['label'] = label
  24. inlier.append(df)
  25. df = pd.concat(inlier, axis=0, ignore_index=True)
  26. df = df.sort_values(by=['Timestamp'])
  27. drop_cols = [col for col in df.columns if "label" in col or "loc" in col or "Timestamp" in col][:-1]
  28. df = df.drop(columns=drop_cols)
  29. df.reset_index(drop=True, inplace=True)
  30. df = df.fillna(method='ffill')
  31. df = df.dropna(axis="columns")
  32. df['label'].replace({"NF":0, "C":1, "B":1, "M":1, "X":1}, inplace=True)
  33. df['IS_TMFI'].replace({True:1, False:0}, inplace=True)
  34. cols = df.columns.tolist()
  35. cols = cols[-1:] + cols[:-1]
  36. df = df[cols]
  37. df.to_csv("../swan_sf.csv", index=False)
  38. if __name__ == "__main__":
  39. get_data()
  40. read_labeled_data()
  41. os.system("rm -rf swan_sf.zip petdataset_01")

全栈的自动化机器学习系统,主要针对多变量时间序列数据的异常检测。TODS提供了详尽的用于构建基于机器学习的异常检测系统的模块,它们包括:数据处理(data processing),时间序列处理( time series processing),特征分析(feature analysis),检测算法(detection algorithms),和强化模块( reinforcement module)。这些模块所提供的功能包括常见的数据预处理、时间序列数据的平滑或变换,从时域或频域中抽取特征、多种多样的检测算