You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

log_parser.py 3.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. # -*- coding: UTF-8 -*-
  2. """
  3. Copyright 2021 Tianshu AI Platform. All Rights Reserved.
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License.
  13. =============================================================
  14. """
  15. import time
  16. from utils.logfile_utils import get_runinfo
  17. from python_io.lazy_load import LazyLoad
  18. from multiprocessing import Process
  19. from pathlib import Path
  20. from shutil import rmtree
  21. from python_io.dictionary_watcher import start_run_watcher
  22. from utils.cache_io import CacheIO
  23. from utils.redis_utils import RedisInstance
  24. class ParserWorker(Process):
  25. def __init__(self, uid, logdir, cachedir):
  26. super(ParserWorker, self).__init__()
  27. self.uid = uid
  28. self._logdir = logdir
  29. self._cachedir = cachedir
  30. def run(self):
  31. if not Path(self._logdir).exists():
  32. raise FileExistsError("No such dictionary {}".format(self._logdir))
  33. run_dirs = get_runinfo(self._logdir)
  34. # 开启监听当前解析的文件夹
  35. start_run_watcher(self.uid, '.', self._logdir, self._cachedir)
  36. # 解析日志
  37. print(f'({self._logdir}) starts to parse successfully')
  38. start_time = time.time()
  39. run_logs = {}
  40. for _run, _dir in run_dirs.items():
  41. LazyLoad(self.uid, _run, _dir, run_logs).init_load(self._cachedir, is_init=True)
  42. # 检查是否解析完成
  43. assert len(run_logs) == len(run_dirs)
  44. while len(run_logs) > 0:
  45. runs = list(run_logs.keys())
  46. for run in runs:
  47. if len(run_logs[run]) == 0:
  48. run_logs.pop(run)
  49. if time.time() - start_time >= 30:
  50. return
  51. else:
  52. time.sleep(0.5)
  53. class LogParser:
  54. def __init__(self, uid, logdir, cachedir):
  55. super(LogParser, self).__init__()
  56. self.uid = uid
  57. self.logdir = logdir
  58. self.cachedir = Path(cachedir).absolute()
  59. self.r = RedisInstance
  60. self.alive = False
  61. def start(self):
  62. self.alive = True
  63. # 记录日志解析后的本地缓存路径
  64. self.r.set(self.uid, str(self.cachedir))
  65. if self.cachedir.exists():
  66. rmtree(self.cachedir)
  67. self.worker = ParserWorker(self.uid, self.logdir, self.cachedir)
  68. self.worker.run()
  69. def close(self):
  70. self.alive = False
  71. print(f'({self.uid}) : clean up ... ')
  72. # 关闭解析进程
  73. if self.worker.is_alive():
  74. self.worker.terminate()
  75. # 关闭当前parser已打开的文件io
  76. files = list(CacheIO.file_io.keys())
  77. for file in files:
  78. if str(self.cachedir) in str(file):
  79. CacheIO.file_io[file].close()
  80. CacheIO.file_io.pop(file)
  81. # 清除redis缓存
  82. for key in self.r.keys(self.uid + '*'):
  83. self.r.delete(key)
  84. # 清除缓存文件
  85. if self.cachedir.exists():
  86. rmtree(self.cachedir)
  87. try:
  88. # 尝试删除空的父目录,直至cache根目录
  89. parent_cache = self.cachedir.parent
  90. while '__cache__' in str(parent_cache):
  91. parent_cache.rmdir()
  92. parent_cache = parent_cache.parent
  93. except:
  94. pass

一站式算法开发平台、高性能分布式深度学习框架、先进算法模型库、视觉模型炼知平台、数据可视化分析平台等一系列平台及工具,在模型高效分布式训练、数据处理和可视分析、模型炼知和轻量化等技术上形成独特优势,目前已在产学研等各领域近千家单位及个人提供AI应用赋能