You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

profile_analyzer.py 13 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. import collections
  10. import copy
  11. import functools
  12. from typing import Callable, List, Optional, Union
  13. import numpy as np
  14. class NonExistNum:
  15. """An object that behaves like a number but means a field does not exist; It is
  16. always greater than any real number
  17. """
  18. def __truediv__(self, _):
  19. return self
  20. def __add__(self, rhs):
  21. return rhs
  22. def __radd__(self, lhs):
  23. return lhs
  24. def __neg__(self):
  25. return self
  26. def __gt__(self, rhs):
  27. if isinstance(rhs) is NonExistNum:
  28. return id(self) > id(rhs)
  29. return True
  30. def __ge__(self, rhs):
  31. return self > rhs or self == rhs
  32. def __lt__(self, rhs):
  33. if isinstance(rhs) is NonExistNum:
  34. return id(self) < id(rhs)
  35. return False
  36. def __le__(self, rhs):
  37. return self < rhs or self == rhs
  38. def __eq__(self, rhs):
  39. return self is rhs
  40. def __format__(self, spec):
  41. return "N/A"
  42. def __repr__(self):
  43. return "N/A"
  44. class OprProfRst:
  45. """Opr profiling result dumped from megengine profiler."""
  46. opr_info = None
  47. """A dict containing operator info: name, id and type."""
  48. time_dict = None
  49. """A mapping from ``"host"`` or ``"device"`` to list of profiling
  50. results."""
  51. footprint = None
  52. """A mapping from ``"memory"`` or ``"computation"`` to the actual number
  53. of corresponding operations"""
  54. def __init__(self, entry: dict):
  55. """Opr profiling initialization, which sets up name, type and id of opr_info.
  56. :param entry: profiling json exec_graph items
  57. """
  58. assert isinstance(entry, dict)
  59. self.opr_info = collections.OrderedDict()
  60. for key in ["name", "type", "id"]:
  61. self.opr_info[key] = entry[key]
  62. self.time_dict = collections.defaultdict(list)
  63. self.footprint = collections.defaultdict(NonExistNum)
  64. def update_device_prof_info(self, dev_time: dict):
  65. """Updates device profiling info
  66. :param dev_time: device time for single opr,
  67. is an attribute of profiling result.
  68. """
  69. assert isinstance(dev_time, dict)
  70. self.time_dict["device"].append(copy.deepcopy(dev_time))
  71. def update_host_prof_info(self, host_time: dict):
  72. """Updates host profiling info
  73. :param host_time: host time for single opr,
  74. is an attribute of profiling result.
  75. """
  76. assert isinstance(host_time, dict)
  77. self.time_dict["host"].append(copy.deepcopy(host_time))
  78. def update_footprint(self, footprint: dict):
  79. """Updates opr footprint
  80. :param footprint: footprint for single opr,
  81. is an attribute of profiling result.
  82. """
  83. assert isinstance(footprint, dict)
  84. self.footprint.update(footprint)
  85. class Record:
  86. """A record of analyzing result"""
  87. __slot__ = [
  88. "time",
  89. "info",
  90. "computation",
  91. "memory",
  92. "in_shapes",
  93. "in_layouts",
  94. "out_shapes",
  95. "flops",
  96. "bandwidth",
  97. "opr_id",
  98. ]
  99. def __init__(self, time: float, info: dict, footprint: dict):
  100. """Initializes single record
  101. :param time: opr running time, evaluated by applying users providing
  102. function to OprProfRst.
  103. :param info: opr information, could be original opr information or
  104. aggregate infomation if aggregating enabled.
  105. :param footprint: contains footprint information, for now, we have
  106. ``"computation"``, ``"memory"``, ``"in_shapes"``, ``"out_shapes"``.
  107. """
  108. assert isinstance(footprint, dict)
  109. self.time = time
  110. self.info = collections.OrderedDict(copy.deepcopy(info))
  111. self.computation = footprint["computation"] or NonExistNum()
  112. self.memory = footprint["memory"]
  113. self.in_shapes = footprint["in_shapes"]
  114. self.in_layouts = footprint.get("in_layouts")
  115. self.out_shapes = footprint["out_shapes"]
  116. self.flops = self.computation / self.time
  117. self.bandwidth = self.memory / self.time
  118. self.opr_id = info.get("id")
  119. if isinstance(self.opr_id, str) and self.opr_id != "N/A":
  120. self.opr_id = int(self.opr_id)
  121. def get_column_by_name(self, name: str = None):
  122. """extracts column value by its column name
  123. :param name: column name, None for time.
  124. """
  125. if name is None:
  126. name = "time"
  127. return getattr(self, name)
  128. class ProfileAnalyzer:
  129. def __init__(self, obj: dict, opr_filter: Callable = lambda opr, inp, out: True):
  130. """Initializes ProfileAnalyzer
  131. :param obj: dict dumped from json str.
  132. :param opr_filter: function that filter oprs.
  133. """
  134. self._opr_set = dict() # type: dict
  135. assert isinstance(obj, dict)
  136. varz = obj["graph_exec"]["var"]
  137. for opr_id, entry in obj["graph_exec"]["operator"].items():
  138. inp = [varz[i] for i in entry["input"]]
  139. out = [varz[i] for i in entry["output"]]
  140. if opr_filter(entry, inp, out):
  141. self._opr_set[opr_id] = OprProfRst(entry)
  142. for opr_id, entry in obj["profiler"]["device"].items():
  143. if opr_id not in self._opr_set:
  144. continue
  145. opr = self._opr_set[opr_id]
  146. for _, time in entry.items():
  147. opr.update_device_prof_info(time)
  148. for opr_id, entry in obj["profiler"]["host"].items():
  149. if opr_id not in self._opr_set:
  150. continue
  151. opr = self._opr_set[opr_id]
  152. for _, time in entry.items():
  153. opr.update_host_prof_info(time)
  154. for opr_id, entry in obj["profiler"].get("opr_footprint", {}).items():
  155. if opr_id not in self._opr_set:
  156. continue
  157. opr = self._opr_set[opr_id]
  158. opr.update_footprint(entry)
  159. def _aggregate(
  160. self, records: List[Record], aop: Union[str, Callable], atype: Optional[str]
  161. ) -> List[Record]:
  162. """Aggregate operation
  163. :param records: selected records
  164. :param aop: aggregate operation, if aop is str, we would replace it
  165. with associated numpy function wth aop name"
  166. :param atype: the type aggregated by, None for aggregating all into single
  167. record.
  168. """
  169. if aop is None:
  170. assert atype is None, "must specify aggregate op"
  171. return records
  172. if isinstance(aop, str):
  173. aop = getattr(np, aop)
  174. type2stat = collections.defaultdict(lambda: [[], [], []]) # type: dict
  175. for item in records:
  176. if atype == "type":
  177. d = type2stat[item.info["type"]]
  178. else:
  179. d = type2stat["all"]
  180. d[0].append(item.time)
  181. d[1].append(item.computation)
  182. d[2].append(item.memory)
  183. rst = []
  184. for opr_type in type2stat.keys():
  185. time, computation, memory = type2stat[opr_type]
  186. nr_oprs = len(time)
  187. time_rst = aop(time)
  188. comp_rst = aop(computation)
  189. mem_rst = aop(memory)
  190. item = Record(
  191. time_rst,
  192. {"type": opr_type, "count": nr_oprs, "id": "N/A"},
  193. {
  194. "computation": comp_rst,
  195. "memory": mem_rst,
  196. "in_shapes": None,
  197. "out_shapes": None,
  198. },
  199. )
  200. rst.append(item)
  201. return rst
  202. def _sort(self, records: List[Record], sort_by: str) -> List[Record]:
  203. """sort operation
  204. :param records: the records after aggregate operation.
  205. :param sort_by: keyword for sorting the list
  206. """
  207. if sort_by is None:
  208. return records
  209. if sort_by.startswith("+"):
  210. sort_by = sort_by[1:]
  211. key = lambda record: record.get_column_by_name(sort_by)
  212. else:
  213. key = lambda record: -record.get_column_by_name(sort_by)
  214. records.sort(key=key)
  215. return records
  216. def select(
  217. self,
  218. time_func: Callable,
  219. opr_filter: Callable = lambda opr: True,
  220. aggregate: Callable = None,
  221. aggregate_by: str = None,
  222. sort_by: str = None,
  223. top_k: int = 0,
  224. ) -> List[Record]:
  225. """Select operation
  226. :param time_func: time_func provided by user, would apply to every
  227. OprProfRst
  228. :param opr_filter: filter satisfied operatiors.
  229. :param aggregate: function that apply to list of records which are
  230. aggregated by atype
  231. :param aggregate_by: the type aggregated by
  232. :param sort_by: keyword for sorting all records.
  233. :param top_k: specify the maximum number of records.
  234. :return: the records that go through select, aggregate, sort.
  235. """
  236. records = []
  237. for opr in self._opr_set.values():
  238. if opr_filter(opr):
  239. time = time_func(opr)
  240. if time is None:
  241. continue
  242. item = Record(time, opr.opr_info, opr.footprint)
  243. records.append(item)
  244. records = self._aggregate(records, aggregate, aggregate_by)
  245. if not records:
  246. return records
  247. return self._sort(records, sort_by)[0 : len(records) if top_k == 0 else top_k]
  248. class TimeFuncHelper:
  249. """Time Function Helper for users."""
  250. @staticmethod
  251. def _eval_time(prof_type, end_key, func, opr_prof):
  252. """Eval time
  253. :type prof_type: str
  254. :param prof_type: 'host' or 'device'
  255. :type end_key: str
  256. :param end_key: 'kern' or 'end'
  257. :type func: function
  258. :param func: apply to list of all ``thread`` of ``gpu`` time.
  259. :type opr_prof: `class OprProfRst`
  260. :param opr_prof: operator profiling result
  261. :rtype: float
  262. :return: time
  263. """
  264. if prof_type not in opr_prof.time_dict:
  265. return None
  266. time = [time[end_key] - time["start"] for time in opr_prof.time_dict[prof_type]]
  267. return func(time)
  268. @staticmethod
  269. def eval_time_func(prof_type: str, end_key: str, func: Callable) -> float:
  270. """Eval oprerator profile time.
  271. :param prof_type: 'host' or 'device'
  272. :param end_key: 'kern' or 'end'
  273. :param func: apply to list of all ``thread`` of ``gpu`` time.
  274. :return: Eval time results
  275. """
  276. return functools.partial(TimeFuncHelper._eval_time, prof_type, end_key, func)
  277. @staticmethod
  278. def _min_start(
  279. prof_type, end_key, func, opr_prof
  280. ): # pylint: disable=unused-argument
  281. """Eval minimum start time
  282. :type prof_type: str
  283. :param prof_type: 'host' or 'device'
  284. :type end_key: str
  285. :param end_key: 'kern' or 'end'
  286. :type func: function
  287. :param func: apply to list of all ``thread`` of ``gpu`` time.
  288. :type opr_prof: `class OprProfRst`
  289. :param opr_prof: operator profiling result
  290. :rtype: float
  291. :return: time
  292. """
  293. if prof_type not in opr_prof.time_dict:
  294. return None
  295. time = [time["start"] for time in opr_prof.time_dict[prof_type]]
  296. return np.min(time)
  297. @staticmethod
  298. def min_start_func(
  299. prof_type: str, end_key: str, func: Callable
  300. ) -> float: # pylint: disable=unused-argument
  301. """Eval oprerator profile min start time
  302. :param prof_type: 'host' or 'device'
  303. :param end_key: 'kern' or 'end'
  304. :param func: apply to list of all ``thread`` of ``gpu`` time.
  305. :return: Eval time results
  306. """
  307. return functools.partial(TimeFuncHelper._min_start, prof_type, end_key, func)
  308. @staticmethod
  309. def _max_end(prof_type, end_key, func, opr_prof): # pylint: disable=unused-argument
  310. """Eval maximum end time
  311. :type prof_type: str
  312. :param prof_type: 'host' or 'device'
  313. :type end_key: str
  314. :param end_key: 'kern' or 'end'
  315. :type func: function
  316. :param func: apply to list of all ``thread`` of ``gpu`` time.
  317. :type opr_prof: `class OprProfRst`
  318. :param opr_prof: operator profiling result
  319. :rtype: float
  320. :return: time
  321. """
  322. if prof_type not in opr_prof.time_dict:
  323. return None
  324. time = [time["end"] for time in opr_prof.time_dict[prof_type]]
  325. return np.max(time)
  326. @staticmethod
  327. def max_end_func(prof_type: str, end_key: str, func: Callable) -> float:
  328. """Eval oprerator profile max end time
  329. :param prof_type: 'host' or 'device'
  330. :param end_key: 'kern' or 'end'
  331. :param func: apply to list of all ``thread`` of ``gpu`` time.
  332. :return: Eval time results
  333. """
  334. return functools.partial(TimeFuncHelper._max_end, prof_type, end_key, func)

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台