You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tods_dataset_base.py 4.1 kB

resolve construct prediction bug Former-commit-id: b0d35d9aea14dc7b9382642dc4aebbb7b33227af [formerly 28b7582d253ac7370edb4ee77c41a78ec5efe21b] [formerly 632f8af13ad3d37eb5e721730ccef4c795052ff0 [formerly 9eb35832567152e9d8b616951cba2f9d472e3fd0]] [formerly ab6f5a8a9c6b9255cdcd65805974015a7daf0693 [formerly 2720e70c78084e35396f51bb28c2090679f1a53b] [formerly c1f615a3c58a4916b810c6a566c16c12bc507908 [formerly 21840b378759723d0ff5a11d59d3e2daade1e679]]] [formerly 2184b60ad634dbc847bfaeb3413ae99b824bad0a [formerly 2dae6c4ed4a1995848d151ca54fddd69969145a1] [formerly 6a577865b8a1f92a963b3d168e06a63dcb0161b3 [formerly ee28aaed84e9f06385a453ed06a7314fc603ca91]] [formerly 72078d88c8363efb9a91ddd5963d234b8ab0fd10 [formerly ab1fb9d5125467f2bad37c01b395460e15830910] [formerly 476600a47c7aa6c88895eda67185ee6a70d1fd5d [formerly f5a6205bb56bdfcde3584db8107b25af51ce1feb]]]] [formerly e5c2c3deefcac7dea9094877181d7910eb5286a5 [formerly 8585dc41a9c94b9a7084484b60e3cc07b9cef7fe] [formerly 9cc7fe2088bcc893022abf4b6ba137fdbdda4211 [formerly 1d2104316f6dc308482b2d8c1c0210a240ef4f4a]] [formerly b51e614a11931bf92fcbc497ce6509339a4a9771 [formerly f9891a191d390c09ecd274931f6fbab9f09fe06c] [formerly 048aa2f114fe950c50e3d9833dbe4967c65f25b1 [formerly d4de64574be6221a8287ce4c4f72e7b39f361258]]] [formerly ff66f55a512c8e42de6d48182b79b9c8e42bfae1 [formerly 0b691b9a7f54cef7d12d14b5e5859959503838c8] [formerly e64e8dd25314730134bf6de25d3d3c0a9d5c8d63 [formerly cc45939e01d88b193138b72384ba519e4df731e7]] [formerly cdad10712ac93799628bcf24365402a347dbb16d [formerly 2789e20e79f5b9726ad7c1a002a31c5d2253f63b] [formerly 25924f293c4e9d69e44c77a08a05c46d3f66bb27 [formerly 32997accab262d1a8960e38792035422d14efd13]]]]] [formerly f43b4310405ef100174e295d57504653eb26f04f [formerly 95815d02ca055259aa579dc3822f1e77ddca8a1c] [formerly fe9bd45d441e3608622bdfb90d3748bb2fd17bbd [formerly 6daf0aa73e40a4027dd5ae04cf16522974b71b36]] [formerly 61ab30c9a3ab7d8fb4d7366dd09937813e34a7fb [formerly a13c6e23b463cb54a80938f981b811f33326f68e] [formerly 86fa5919eec79d126e750a176f67f1554eac41fa [formerly 1e49e1a30348ac38fe7e31a59a50006aabfac43c]]] [formerly 69c5bc967a3bd09af946c28c8680a510614897fd [formerly ab82915cbd9157ad0dc204c0247107bda650742a] [formerly f8057c3b14e022eb44a0abb79f50196e093eaa35 [formerly 5232f345780a58c7544bd6a4d5c31b930cd0a36e]] [formerly 671c54e9520203b5ed14053770c4a1ed4b621959 [formerly 6454a28f2685a4176ead96d7ee68e15afaaadfa8] [formerly 3db6ff66b90f6abf68a8c78ae929d548c1f6b2ad [formerly aa8c7fe127cf82a244718da550b121052855ad27]]]] [formerly 86b2ec6b84d16ae7d5c51eace4f24ea472ea11a2 [formerly f35c344efebe54ad170cd6de9bc7f4126e53f510] [formerly d5616f66cd90f797ad80a85b38fe9234f86540fe [formerly 98c9dca7daf1efc38766b3fde4854f06538d7efa]] [formerly a7dcc62bc54b16698b3b7e06aa9a6735afd63b08 [formerly 4ef4fa0c98b2f210cbe7de6f2a00ba5c63c1b2ba] [formerly 55f670b9ae053698ddba84658940ddea9ac795e6 [formerly 1cd4421e2e02bfb8597f338583ad32c0550c02af]]] [formerly d7a5bab8320a31eb1dedac6a7806e5c59683053f [formerly c77c5b48df83e1aed383fb28f0812fc87ecbf973] [formerly 01ffd33e2ff1a6fa5cffd6dd82289111682cd9a1 [formerly aea728ceb67c39709f9a3921547a95e09c1dcff7]] [formerly 16afb18e352c8da18fff35384430bbc427cc4802 [formerly 4768b156f37cfba1a657944fe8d4314af3532685] [formerly 3c1298c626a945d362a74f922c5660b11e8ad491 [formerly 1e61cf09743c7ec285301e748dbbec3caf42d89c]]]]]] Former-commit-id: 28b09a56cb8b16b07fc356e5f3133ad2bb9d5859 [formerly 5241dbb36c08ebc2ecfeac6450b1cb3b52f624ea] [formerly d43909d97927eee8f9f09fcc8b4e5c92cddb3a6a [formerly 43b0cca7f5e0372770f2b2a530bc921d8ac9ec8d]] [formerly 8d8f384c8e46748e59790e0421d07bd8a942043d [formerly bcf58203c66d0bc6c5392a855d1b5d617dcef9df] [formerly ca56bff2d0a2b498941b59b240f1cf4ac420462c [formerly 7a8750ffc99115c6cb37e63ce9fd0c3958d235a8]]] [formerly 2ce9fa87aeb4fe174de68dc4d79501b057b477b1 [formerly e7b1b542f5b9fbf69df7912c4393d264713c24f4] [formerly 62a7edf94a8193828f465bd432ffc15edd480ea1 [formerly 26ca5f220dcc3745ee271e5712d72836f2255222]] [formerly 73d10253b93b35f692e13089ddb148d408cf14fe [formerly af463cecb0333e14aabc502126f7c6d22973e214] [formerly 961314f474a483302d2a5ce0a12b51afc386cb6d [formerly 6e7141e5e2e9f72d2102892b4affce0de0944cb0]]]] [formerly c3f8938ba5ba29f2ad8de1cfc486a7f323ecea69 [formerly 7f6674829240eab485a34d67eb41f61ca744b26f] [formerly 3e5ef2c136584b7f0e769f5bdf43ab994fb12c21 [formerly 266ddb4ccc8948b124ef81cee46efd1804bddd2e]] [formerly 4a8a5437b39ea3daf303fa79dbae22c294ff0b40 [formerly cbcb0f87775b068d2fccd5c8632cf05ab0a019a7] [formerly c212b8217fd92fa1b7899ab5211e0262f3a9c927 [formerly 5f3d3d01c87574acb44b077dd7439ac53b98a0f1]]] [formerly 09764ba6cd565eae5cb426f705ec72ee255e4b49 [formerly 4db991be5f3f4d27092a19876eb75c5e51f60bb0] [formerly f79c6ec15df35536aeba92957c37306643e3a517 [formerly 7f8eb54d4752bbefbc208251563af8e77a1792c0]] [formerly 771b00b188ceaa0b9569636bae5f28643999ce47 [formerly f1dcba565fc0943c24d468b496b48df7f815420f] [formerly 83c42510ad4af56394a30b78f2b4eeb693b2df6b [formerly 3c1298c626a945d362a74f922c5660b11e8ad491]]]]] Former-commit-id: f5cdcca4f3e91cc75d3af0de6bd17cce1dfaf021 [formerly dc57f947a2385967e8847ab784a433184f9620a3] [formerly 55cd6eb9a35d44b00d0d55a30c9686dffafdae57 [formerly e92d6c09230f08e9760fc17f45f8389e919b82a3]] [formerly ba80ed43d23d7ff4c349e534fb9036b84064bbf3 [formerly 0a2f65401a73b676569fb809d0e4e9dd28803952] [formerly 73c0c2ebb36b0a14c038f327507a133d082fef6f [formerly bf80cf285eec74221e6add1ff41e5f5c3b3ab808]]] [formerly a34f21d93329c3bee2d5e97fcbe820a0eab1d5df [formerly ae311cb3e77af5bcd03976e2a108ac1eba772fc4] [formerly ae0e3ed079c601e0096c648911ae8246eef220fe [formerly c9030d0303acdbd031b19797c30e98573cf32837]] [formerly f8f4f6a8ec75e269e0db55255d3a01dcafc79c23 [formerly 343dd65df69b51f0f404f200c3af4cbc52320b25] [formerly 1c97e6a7baa6ff68d466723b754b2887cc834be9 [formerly c69d281aca97ee3bde4328d04d92163eab376623]]]] Former-commit-id: b93de572408cfd327f0ca40b8d62530945e5c5ba [formerly 6fd32d0759069e91a1183acb86096ccbab5f88e9] [formerly ff29e71cb0b43473b0be6345b1747e6ec2df856b [formerly ee483b56f98eb5e187b7b262278d121c34ed86ab]] [formerly 2f944ec28b50590b4b4eb1e7aa07518f78234810 [formerly 15c46e806cd9f35403a758aa6f46f3d746b0d0c5] [formerly dca0c18b8b693227cdef4539165e8e2d6a47c340 [formerly f185a8658cf2893c7fa8b0393d52489593d32d37]]] Former-commit-id: 76ed7f184f13ac60e2d458d60e9a8befe3cace6f [formerly c8adbe1dea7c40540fc74be2867a780b10a5788f] [formerly 392fc3e54b90f8d1201d2986ffbde77529b26560 [formerly 5bd396738e6152ff5b4b28bbda4cb495bcda1459]] Former-commit-id: 11579edbe104d2a7361847a5dd71fc06dc4f369c [formerly 6970795314200993d1d027617ceddd7a7c6421a3] Former-commit-id: 8fca90698b3ffff3c183e4754cabc6b39fe9b16f
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. import warnings
  2. import os
  3. import os.path
  4. import numpy as np
  5. import codecs
  6. import string
  7. import gzip
  8. import lzma
  9. from typing import Any, Callable, Dict, IO, List, Optional, Tuple, Union
  10. from dataset_utils import download_url, download_and_extract_archive, extract_archive, verify_str_arg
  11. # tqdm >= 4.31.1
  12. from tods import generate_dataset
  13. from sklearn import preprocessing
  14. import pandas as pd
  15. class TODS_dataset:
  16. resources = []
  17. training_file = None
  18. testing_file = None
  19. ground_truth_index = None
  20. _repr_indent = None
  21. @property
  22. def raw_folder(self) -> str:
  23. return os.path.join(self.root, self.__class__.__name__, 'raw')
  24. @property
  25. def processed_folder(self) -> str:
  26. return os.path.join(self.root, self.__class__.__name__, 'processed')
  27. def __init__(self, root, train, transform=None, download=True):
  28. self.root = root
  29. self.train = train
  30. self.transform = self.transform_init(transform)
  31. if download:
  32. self.download()
  33. pass
  34. self.process()
  35. def _check_exists(self) -> bool:
  36. return (os.path.exists(os.path.join(self.processed_folder,
  37. self.training_file)) and
  38. os.path.exists(os.path.join(self.processed_folder,
  39. self.testing_file)))
  40. def download(self) -> None:
  41. if self._check_exists():
  42. return
  43. os.makedirs(self.raw_folder, exist_ok=True)
  44. # download files
  45. for url, md5 in self.resources:
  46. filename = url.rpartition('/')[2]
  47. download_and_extract_archive(url, download_root=self.raw_folder, filename=filename, md5=md5)
  48. def process(self) -> None:
  49. pass
  50. def process_dataframe(self) -> None:
  51. if self.transform is None:
  52. pass
  53. else:
  54. self.transform.fit(self.training_set_dataframe)
  55. self.training_set_array = self.transform.transform(self.training_set_dataframe.values)
  56. self.testing_set_array = self.transform.transform(self.testing_set_dataframe.values)
  57. self.training_set_dataframe = pd.DataFrame(self.training_set_array)
  58. self.testing_set_dataframe = pd.DataFrame(self.testing_set_array)
  59. def transform_init(self, transform_str):
  60. if transform_str is None:
  61. return None
  62. elif transform_str == 'standardscale':
  63. return preprocessing.StandardScaler()
  64. elif transform_str == 'normalize':
  65. return preprocessing.Normalizer()
  66. elif transform_str == 'minmaxscale':
  67. return preprocessing.MinMaxScaler()
  68. elif transform_str == 'maxabsscale':
  69. return preprocessing.MaxAbsScaler()
  70. elif transform_str == 'binarize':
  71. return preprocessing.Binarizer()
  72. else:
  73. raise ValueError("Input parameter transform must take value of 'standardscale', 'normalize', " +
  74. "'minmaxscale', 'maxabsscale' or 'binarize'."
  75. )
  76. def to_axolotl_dataset(self):
  77. if self.train:
  78. return generate_dataset(self.training_set_dataframe, self.ground_truth_index)
  79. else:
  80. return generate_dataset(self.testing_set_dataframe, self.ground_truth_index)
  81. def __repr__(self) -> str:
  82. head = "Dataset " + self.__class__.__name__
  83. body = ["Number of datapoints: {}".format(self.__len__())]
  84. if self.root is not None:
  85. body.append("Root location: {}".format(self.root))
  86. body += self.extra_repr().splitlines()
  87. if hasattr(self, "transforms") and self.transforms is not None:
  88. body += [repr(self.transforms)]
  89. lines = [head] + [" " * self._repr_indent + line for line in body]
  90. print(self.training_set_dataframe)
  91. return '\n'.join(lines)
  92. def __len__(self) -> int:
  93. return len(self.training_set_dataframe)
  94. def extra_repr(self) -> str:
  95. return ""
  96. # kpi(root='./datasets', train=True)
  97. # class yahoo5:
  98. #
  99. # def __init__(self):
  100. # pass

全栈的自动化机器学习系统,主要针对多变量时间序列数据的异常检测。TODS提供了详尽的用于构建基于机器学习的异常检测系统的模块,它们包括:数据处理(data processing),时间序列处理( time series processing),特征分析(feature analysis),检测算法(detection algorithms),和强化模块( reinforcement module)。这些模块所提供的功能包括常见的数据预处理、时间序列数据的平滑或变换,从时域或频域中抽取特征、多种多样的检测算