You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

profiler.py 6.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. import json
  10. import os
  11. import re
  12. from contextlib import ContextDecorator, contextmanager
  13. from functools import wraps
  14. from typing import List
  15. from weakref import WeakSet
  16. from .. import _atexit
  17. from ..core._imperative_rt.core2 import (
  18. pop_scope,
  19. push_scope,
  20. start_profile,
  21. stop_profile,
  22. sync,
  23. )
  24. from ..logger import get_logger
  25. _running_profiler = None
  26. _living_profilers = WeakSet()
  27. class Profiler(ContextDecorator):
  28. r"""
  29. Profile graph execution in imperative mode.
  30. :type path: Optional[str]
  31. :param path: default path prefix for profiler to dump.
  32. Examples:
  33. .. code-block::
  34. import megengine as mge
  35. import megengine.module as M
  36. from megengine.utils.profiler import Profiler
  37. # With Learnable Parameters
  38. profiler = Profiler()
  39. for iter in range(0, 10):
  40. # Only profile record of last iter would be saved
  41. with profiler:
  42. # your code here
  43. # Then open the profile file in chrome timeline window
  44. """
  45. CHROME_TIMELINE = "chrome_timeline.json"
  46. valid_options = {"sample_rate": 0, "profile_device": 1, "num_tensor_watch": 10}
  47. valid_formats = {"chrome_timeline.json", "memory_flow.svg"}
  48. def __init__(
  49. self,
  50. path: str = "profile",
  51. format: str = "chrome_timeline.json",
  52. formats: List[str] = None,
  53. **kwargs
  54. ) -> None:
  55. if not formats:
  56. formats = [format]
  57. assert not isinstance(formats, str), "formats excepts list, got str"
  58. for format in formats:
  59. assert format in Profiler.valid_formats, "unsupported format {}".format(
  60. format
  61. )
  62. self._path = path
  63. self._formats = formats
  64. self._options = {}
  65. for opt, optval in Profiler.valid_options.items():
  66. self._options[opt] = int(kwargs.pop(opt, optval))
  67. self._pid = "<PID>"
  68. self._dump_callback = None
  69. @property
  70. def path(self):
  71. if len(self._formats) == 0:
  72. format = "<FORMAT>"
  73. elif len(self._formats) == 1:
  74. format = self._formats[0]
  75. else:
  76. format = "{" + ",".join(self._formats) + "}"
  77. return self.format_path(self._path, self._pid, format)
  78. @property
  79. def directory(self):
  80. return self._path
  81. @property
  82. def formats(self):
  83. return list(self._formats)
  84. def start(self):
  85. global _running_profiler
  86. assert _running_profiler is None
  87. _running_profiler = self
  88. self._pid = os.getpid()
  89. start_profile(self._options)
  90. return self
  91. def stop(self):
  92. global _running_profiler
  93. assert _running_profiler is self
  94. _running_profiler = None
  95. sync()
  96. self._dump_callback = stop_profile()
  97. self._pid = os.getpid()
  98. _living_profilers.add(self)
  99. def dump(self):
  100. if self._dump_callback is not None:
  101. if not os.path.exists(self._path):
  102. os.makedirs(self._path)
  103. if not os.path.isdir(self._path):
  104. get_logger().warning(
  105. "{} is not a directory, cannot write profiling results".format(
  106. self._path
  107. )
  108. )
  109. return
  110. for format in self._formats:
  111. path = self.format_path(self._path, self._pid, format)
  112. get_logger().info("process {} generating {}".format(self._pid, format))
  113. self._dump_callback(path, format)
  114. get_logger().info("profiling results written to {}".format(path))
  115. self._dump_callback = None
  116. _living_profilers.remove(self)
  117. def format_path(self, path, pid, format):
  118. return os.path.join(path, "{}.{}".format(pid, format))
  119. def __enter__(self):
  120. self.start()
  121. def __exit__(self, val, tp, trace):
  122. self.stop()
  123. def __call__(self, func):
  124. func = super().__call__(func)
  125. func.__profiler__ = self
  126. return func
  127. def __del__(self):
  128. self.dump()
  129. @contextmanager
  130. def scope(name):
  131. push_scope(name)
  132. yield
  133. pop_scope(name)
  134. def profile(*args, **kwargs):
  135. if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
  136. return Profiler()(args[0])
  137. return Profiler(*args, **kwargs)
  138. def merge_trace_events(directory: str):
  139. names = filter(
  140. lambda x: re.match(r"\d+\.chrome_timeline\.json", x), os.listdir(directory)
  141. )
  142. def load_trace_events(name):
  143. with open(os.path.join(directory, name), "r", encoding="utf-8") as f:
  144. return json.load(f)
  145. def find_metadata(content):
  146. if isinstance(content, dict):
  147. assert "traceEvents" in content
  148. content = content["traceEvents"]
  149. if len(content) == 0:
  150. return None
  151. assert content[0]["name"] == "Metadata"
  152. return content[0]["args"]
  153. contents = list(map(load_trace_events, names))
  154. metadata_list = list(map(find_metadata, contents))
  155. min_local_time = min(
  156. map(lambda x: x["localTime"], filter(lambda x: x is not None, metadata_list))
  157. )
  158. events = []
  159. for content, metadata in zip(contents, metadata_list):
  160. local_events = content["traceEvents"]
  161. if len(local_events) == 0:
  162. continue
  163. local_time = metadata["localTime"]
  164. time_shift = local_time - min_local_time
  165. for event in local_events:
  166. if "ts" in event:
  167. event["ts"] = int(event["ts"] + time_shift)
  168. events.extend(filter(lambda x: x["name"] != "Metadata", local_events))
  169. result = {
  170. "traceEvents": events,
  171. }
  172. path = os.path.join(directory, "merge.chrome_timeline.json")
  173. with open(path, "w") as f:
  174. json.dump(result, f, ensure_ascii=False, separators=(",", ":"))
  175. get_logger().info("profiling results written to {}".format(path))
  176. def is_profiling():
  177. return _running_profiler is not None
  178. def _stop_current_profiler():
  179. global _running_profiler
  180. if _running_profiler is not None:
  181. _running_profiler.stop()
  182. living_profilers = [*_living_profilers]
  183. for profiler in living_profilers:
  184. profiler.dump()
  185. _atexit(_stop_current_profiler)

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台