You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

profiler.py 8.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. import base64
  10. import json
  11. import os
  12. import re
  13. from typing import Iterable, List, Optional
  14. from ..core._imperative_rt import OperatorNodeConfig, ProfileEntry
  15. from ..core._imperative_rt import ProfilerImpl as _Profiler
  16. from ..core._imperative_rt.imperative import sync
  17. from ..core._imperative_rt.ops import CollectiveComm
  18. def _make_dict(**kwargs):
  19. unused_keys = []
  20. for k, v in kwargs.items():
  21. if v is None:
  22. unused_keys.append(k)
  23. for k in unused_keys:
  24. del kwargs[k]
  25. return kwargs
  26. def _print_opnode_config(config):
  27. return _make_dict(
  28. name=config.name, dtype=config.dtype, comp_node_arr=config.comp_node_arr,
  29. )
  30. def _dump_chrome_timeline(entries: List[ProfileEntry], path: str):
  31. pid = os.getpid()
  32. trace_events = []
  33. def append_event(**kwargs):
  34. trace_events.append(_make_dict(**kwargs))
  35. for id, entry in enumerate(entries):
  36. op = entry.op
  37. name = type(op).__name__
  38. host_begin, host_end = entry.host
  39. device_list = entry.device_list
  40. args = Profiler.fetch_attrs(op)
  41. args["__id__"] = "[{}]".format(id)
  42. cat = name
  43. for ts, ph in [(host_begin, "B"), (host_end, "E")]:
  44. append_event(
  45. name=name, ph=ph, ts=ts * 1000, pid=pid, tid="host", args=args, cat=cat,
  46. )
  47. for device, device_begin, device_end in device_list:
  48. for ts, ph in [(device_begin(), "B"), (device_end(), "E")]:
  49. append_event(
  50. name=name, ph=ph, ts=ts * 1000, pid=pid, tid=str(device), args=args,
  51. )
  52. with open("{}.chrome_timeline.json".format(path), "w") as f:
  53. json.dump(trace_events, f, indent=2)
  54. def _dump_compatible(entries: List[ProfileEntry], path: str):
  55. obj = {
  56. "graph_exec": {"var": [], "operator": {}},
  57. "profiler": {"device": {}, "host": {}, "opr_footprint": {}},
  58. }
  59. var_list = obj["graph_exec"]["var"]
  60. operator_dict = obj["graph_exec"]["operator"]
  61. device_dict = obj["profiler"]["device"]
  62. host_dict = obj["profiler"]["host"]
  63. opr_foot_print_dict = obj["profiler"]["opr_footprint"]
  64. def add_var(var) -> int:
  65. var_id = len(var_list)
  66. var_list.append(
  67. {"comp_node": str(var[2]),}
  68. )
  69. return var_id
  70. for op_id, entry in enumerate(entries):
  71. operator_dict[op_id] = {
  72. "input": [add_var(var) for var in entry.inputs],
  73. "output": [add_var(var) for var in entry.outputs],
  74. "name": str(entry.op.ctype()),
  75. "type": "imperative",
  76. "id": entry.id,
  77. }
  78. op_device_dict = {}
  79. for device, device_begin, device_end in entry.device_list:
  80. op_device_dict[str(device)] = {
  81. "start": device_begin(),
  82. "kern": device_begin(),
  83. "end": device_end(),
  84. }
  85. device_dict[op_id] = op_device_dict
  86. host_begin, host_end = entry.host
  87. host_dict[op_id] = {
  88. "host": {"start": host_begin, "kern": host_begin, "end": host_end}
  89. }
  90. opr_footprint = {
  91. "out_shapes": [oup[1] for oup in entry.outputs],
  92. "in_shapes": [inp[1] for inp in entry.inputs],
  93. "params": {},
  94. }
  95. if entry.memory > 0:
  96. opr_footprint["memory"] = entry.memory
  97. if entry.computation > 0:
  98. opr_footprint["computation"] = entry.computation
  99. opr_foot_print_dict[op_id] = opr_footprint
  100. with open("{}.compatible.json".format(path), "w") as f:
  101. json.dump(obj, f, indent=2)
  102. def _dump_graphviz(entries: List[ProfileEntry], path: str):
  103. import json
  104. import graphviz
  105. graph = graphviz.Digraph()
  106. graph.graph_attr["ordering"] = "out"
  107. var_cache = {}
  108. def cache_var(var_id, var_shape):
  109. if var_id not in var_cache:
  110. var_name = "var({})".format(var_id)
  111. var_label = "{}\nshape:{}\n".format(var_name, shape)
  112. graph.node(var_name, var_label)
  113. var_cache[var_id] = var_name
  114. return var_cache[var_id]
  115. for op_id, entry in enumerate(entries):
  116. op = entry.op
  117. op_name = "op({})".format(op_id)
  118. op_type = type(op).__name__
  119. op_attrs = Profiler.fetch_attrs(op)
  120. label_lines = []
  121. if "param" in op_attrs:
  122. del op_attrs["param"]
  123. label_lines.append("{}:{}".format(op_name, op_type))
  124. for k, v in op_attrs.items():
  125. label_lines.append("attr[{}]: {}".format(k, v))
  126. op_param_str = entry.param
  127. if len(op_param_str) > 0:
  128. op_param = json.loads(op_param_str)
  129. for k, v in op_param.items():
  130. label_lines.append("param[{}]:{}".format(k, v))
  131. host_begin, host_end = entry.host
  132. label_lines.append("time[host]: {:f}ms".format(host_end - host_begin))
  133. for device, device_begin, device_end in entry.device_list:
  134. device_time = device_end() - device_begin()
  135. label_lines.append("time[{}]: {:f}ms".format(device, device_time))
  136. op_label = "\n".join(label_lines)
  137. graph.node(op_name, op_label, shape="rectangle")
  138. for var_id, shape, device in entry.inputs:
  139. graph.edge(cache_var(var_id, shape), op_name)
  140. for var_id, shape, device in entry.outputs:
  141. graph.edge(op_name, cache_var(var_id, shape))
  142. graph.save("{}.graphviz.dot".format(path))
  143. class Profiler:
  144. r"""
  145. Profile graph execution in imperative mode.
  146. :type path: Optional[str]
  147. :param path: default path prefix for profiler to dump.
  148. Examples:
  149. .. code-block::
  150. import megengine as mge
  151. import megengine.module as M
  152. from megengine.utils.profiler import Profiler
  153. # With Learnable Parameters
  154. for iter in range(0, 10):
  155. # Only profile record of last iter would be saved
  156. with Profiler("profile"):
  157. # your code here
  158. # Then open the profile file in chrome timeline window
  159. """
  160. CHROME_TIMELINE = "chrome_timeline"
  161. COMPATIBLE = "compatible"
  162. GRAPHVIZ = "graphviz"
  163. WITH_FOOTPRINT = 1
  164. _type_map = {
  165. OperatorNodeConfig: lambda x: _print_opnode_config(x),
  166. bytes: lambda x: base64.encodebytes(x).decode("ascii"),
  167. CollectiveComm.Mode: lambda x: str(x),
  168. }
  169. _dumper_map = {
  170. CHROME_TIMELINE: _dump_chrome_timeline,
  171. COMPATIBLE: _dump_compatible,
  172. GRAPHVIZ: _dump_graphviz,
  173. }
  174. def __init__(
  175. self,
  176. path: str = "profile",
  177. *,
  178. formats: Iterable[str] = (CHROME_TIMELINE,),
  179. type_filter: str = ".*",
  180. exit_dump: bool = True
  181. ) -> None:
  182. self._impl = _Profiler()
  183. self._path = path
  184. if isinstance(formats, str):
  185. formats = (formats,)
  186. self._filter = type_filter
  187. self._dumpers = [Profiler._dumper_map[fmt] for fmt in formats]
  188. self._exit_dump = exit_dump
  189. def __enter__(self):
  190. sync()
  191. self._impl.start(Profiler.WITH_FOOTPRINT)
  192. return self
  193. def __exit__(self, val, tp, trace):
  194. if self._exit_dump:
  195. self.dump()
  196. sync()
  197. self._impl.stop()
  198. self._impl.clear()
  199. @classmethod
  200. def fetch_attrs(cls, op):
  201. attrs = dir(op)
  202. results = {}
  203. for attr in attrs:
  204. if attr.startswith("_"):
  205. continue
  206. value = op.__getattribute__(attr)
  207. if callable(value):
  208. continue
  209. value_type = type(value)
  210. if value_type in cls._type_map:
  211. value = cls._type_map[value_type](value)
  212. results[attr] = str(value)
  213. return results
  214. def dump(self, path: Optional[str] = None):
  215. sync()
  216. raw = [
  217. entry
  218. for entry in self._impl.dump()
  219. if re.match(self._filter, type(entry.op).__name__)
  220. ]
  221. if path is None:
  222. path = self._path
  223. for dumper in self._dumpers:
  224. dumper(raw, path)
  225. def __call__(self, func):
  226. def wrapper(*args, **kwargs):
  227. with self:
  228. return func(*args, **kwargs)
  229. return wrapper
  230. profile = Profiler

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台