You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

config.py 14 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. import collections
  10. import os
  11. from . import mgb as _mgb
  12. _default_device_type = "CUDA"
  13. def set_device_map(logical_dev, physical_dev, device_type=None):
  14. """map from *logical_dev* to *physical_dev* for furture comp node
  15. loading
  16. example::
  17. set_device_map(0, 2, 'CPU') # cpu0 -> cpu2
  18. set_device_map('gpu3', 'gpu0') # gpu0 -> gpu0
  19. :param device_type: specify the device type if devices are given by
  20. integers; if devices are given by integers and ``device_type`` is not
  21. given, the default value ``'CUDA'`` would be used. Possible values are
  22. ``'CUDA'`` and ``'CPU'``.
  23. """
  24. if device_type is None:
  25. device_type = _default_device_type
  26. if device_type == "CUDA":
  27. xpu = "gpu"
  28. else:
  29. assert device_type == "CPU"
  30. xpu = "cpu"
  31. def rmxpu(v):
  32. if isinstance(v, str):
  33. assert v.startswith(xpu) or v.startswith("xpu"), (
  34. "bad comp node in set_device_map: "
  35. "device_type={} comp_node={}".format(device_type, v)
  36. )
  37. return v[3:]
  38. return v
  39. logical_dev, physical_dev = map(rmxpu, [logical_dev, physical_dev])
  40. _mgb.CompNode._set_device_map(device_type, int(logical_dev), int(physical_dev))
  41. def set_default_device(physical_dev, device_type=None):
  42. """set physcal device for xpux
  43. when *device_type* is None and *physical_dev* starts with *gpu* or *cpu*,
  44. the default device type would be modified accordingly for future calls to
  45. :func:`set_device_map` when remapping device number.
  46. """
  47. global _default_device_type
  48. if (
  49. device_type is None
  50. and isinstance(physical_dev, str)
  51. and not physical_dev.isdigit()
  52. and not physical_dev.startswith("xpu")
  53. ):
  54. t = physical_dev[:3]
  55. if t == "gpu":
  56. _default_device_type = "CUDA"
  57. else:
  58. assert t == "cpu", "bad physical_dev: {}".format(physical_dev)
  59. _default_device_type = "CPU"
  60. set_default_device_type(_default_device_type)
  61. device_type = _default_device_type
  62. set_device_map(-1, physical_dev, device_type)
  63. def set_default_device_type(device_type):
  64. """set device type for xpu"""
  65. global _default_device_type
  66. device_type = device_type.upper()
  67. _mgb.CompNode._set_unspec_device_type(device_type)
  68. _default_device_type = device_type
  69. def set_fork_cuda_warning_flag(flag):
  70. """set warning to be printed at fork if cuda has been initialized
  71. :type flag: int
  72. :param flag: controls how the warning should be printed:
  73. * 0: disable warning
  74. * 1: print warning to log
  75. * 2: print warning to log and raise exception
  76. """
  77. _mgb._config.set_fork_cuda_warning_flag(int(flag))
  78. def get_device_count(device_type="xpu", warn=True):
  79. """get number of devices installed on this system
  80. :param device_type: device type, one of 'xpu', 'gpu' or 'cpu'
  81. :type device_type: str
  82. """
  83. return _mgb.CompNode._get_device_count(device_type.upper(), warn)
  84. def parse_locator(device_name: str) -> tuple:
  85. """get the tensor locator expression by device name.
  86. :param device_name: device name, like 'cpu0', 'gpu1' and 'xpux'
  87. :type device_name: str
  88. :return: (device_type, dev_num, stream_num)
  89. """
  90. return _mgb.CompNode._parse_locator(device_name)
  91. def set_mem_reserve_size(size):
  92. """set memory reserve size:
  93. * If *size* is greater than 1, it is the absolute amount of memory to
  94. be reserved in MB;
  95. * If *size* is in the range (0, 1), it is the ratio of total memory;
  96. * If *size* is 0, memory reservation and pre-allocation would be
  97. disabled;
  98. * If *size* is -1, disable custom memory allocator and use cuda APIs
  99. directly.
  100. """
  101. _mgb._config.set_mem_reserve_size(float(size))
  102. def set_comp_graph_option(comp_graph, name, val):
  103. """set computing graph option and return its old value
  104. :type comp_graph: :class:`.CompGraph`
  105. :param comp_graph: the computing graph whose option should be modified
  106. :type name: str
  107. :param name: option name
  108. Currently supported options are:
  109. * "no_profiling_on_shape_change": bool;
  110. When execution strategy is set to profiling, always use the
  111. initial profile result and do not re-run profiling even if input
  112. shape changes.
  113. * "seq_opt.enable_mem_plan_opt": bool
  114. * "seq_opt.enable_mem_reuse_alloc": bool
  115. * "seq_opt.enable_seq_comp_node_opt": bool
  116. * "force_dynamic_alloc": bool
  117. * "var_sanity_check_first_run": bool
  118. * "enable_sublinear_memory_opt": bool
  119. * "enable_memory_swap": bool; whether to enable memory swap; it
  120. usually performs worse than sublinear memory
  121. * "enable_var_mem_defragment": bool
  122. * "allocate_static_mem_after_graph_compile": bool
  123. * "enable_grad_var_static_reshape": bool:
  124. If set to ``True``, dynamically-shaped gradients whose original
  125. shape is statically inferrable would be reshaped, so static
  126. shape inference can continue
  127. * "async_exec_level": int
  128. * ``0``: do not dispatch asynchronously
  129. * ``1``: async dispatch if there are more than 1 cuda comp
  130. nodes
  131. * mask ``0b10``: async for comp nodes with unlimited queue
  132. (e.g. CPU comp nodes)
  133. * mask ``0b100``: async for even one comp node
  134. * "log_level": int
  135. * ``0``: no log info for graph construction/compiling
  136. * ``1``: static memory allocation status,
  137. WorkspaceLimitGetter summary, and optimizer summary
  138. * ``2``: optimizer details and duplicated operators tha are
  139. removed
  140. * "graph_opt.jit": whether to enable JIT
  141. * "graph_opt.tensorrt": whether to enable fine-grained automatic
  142. replacement for TensorRT operators
  143. * "graph_opt.android_nn": whether to enable fine-grained automatic
  144. replacement for Android NN operators
  145. * "graph_opt_level": int
  146. * ``0``: disable
  147. * ``1``: level-1: inplace arith transformations during graph
  148. construction
  149. * ``2``: (default) level-2: level-1, plus global optimization
  150. before graph compiling
  151. * ``3``: also enable JIT
  152. :param val: new option value
  153. :return: old option value
  154. """
  155. if name == "log_static_mem_alloc":
  156. name = "log_level"
  157. if name == "enable_async_exec":
  158. name = "async_exec_level"
  159. return _mgb._config.set_comp_graph_option(comp_graph, name, int(val))
  160. def comp_graph_is_eager(comp_graph):
  161. return _mgb._config.comp_graph_is_eager(comp_graph)
  162. def add_extra_vardep(var, dep):
  163. """add *dep* as an extra dependency of *var*, so if *var* is required to
  164. compute the final output when compiling a comp graph, *dep* would also be
  165. included in the computing sequence. Note that the order computing of these
  166. two vars is not guaranteed.
  167. """
  168. assert isinstance(var, _mgb.SymbolVar) and isinstance(dep, _mgb.SymbolVar)
  169. assert var.owner_graph == dep.owner_graph
  170. return _mgb._config.add_extra_vardep(var, dep)
  171. class _GraphPropertyBase:
  172. """helper class for implementing operator property setter context managers"""
  173. _cur_graph = None
  174. _graph2stack = None
  175. """class attribute that maintains mapping from graph to property stack;
  176. should be defined by child classes"""
  177. __prop_setup__ = None
  178. """overwritten by subclass to setup property"""
  179. __prop_clear__ = None
  180. """overwritten by subclass to clear property"""
  181. def __init__(self, comp_graph, prop):
  182. """:param comp_graph: computing graph, or None to not set this
  183. property"""
  184. if comp_graph is not None:
  185. assert isinstance(
  186. comp_graph, _mgb.CompGraph
  187. ), "invalid comp graph: {!r}".format(comp_graph)
  188. self._cur_graph = comp_graph
  189. self._graph2stack.setdefault(comp_graph, []).append(prop)
  190. def __setup(self, prop):
  191. self.__prop_setup__(self._cur_graph, prop)
  192. def __clear(self):
  193. self.__prop_clear__(self._cur_graph)
  194. def __enter__(self):
  195. if self._cur_graph is None:
  196. return
  197. stack = self._graph2stack[self._cur_graph]
  198. if len(stack) > 1:
  199. # clear nested property
  200. self.__clear()
  201. self.__setup(stack[-1])
  202. def __exit__(self, exc_type, exc_value, exc_traceback):
  203. if self._cur_graph is None:
  204. return
  205. stack = self._graph2stack[self._cur_graph]
  206. self.__clear()
  207. stack.pop()
  208. if stack:
  209. # restore nested property
  210. self.__setup(stack[-1])
  211. else:
  212. del self._graph2stack[self._cur_graph]
  213. class exc_opr_tracker_scope(_GraphPropertyBase):
  214. """context manager for associating an object with all operators created
  215. within this context; so when an exception is raised, information about the
  216. corresponding operator could be retrieved from
  217. :attr:`.MegBrainError.tracker`
  218. :param comp_graph: the computing graph where the operators should be tracked
  219. :type comp_graph: :class:`.CompGraph`
  220. :param tracker: an arbitrary python object to track the operators
  221. """
  222. _graph2stack = {}
  223. def __init__(self, comp_graph, tracker):
  224. assert (
  225. tracker is not None
  226. ), "bad args for exc_opr_tracker_scope: {!r} {!r}".format(comp_graph, tracker)
  227. super().__init__(comp_graph, tracker)
  228. __prop_setup__ = staticmethod(_mgb._config.begin_set_exc_opr_tracker)
  229. __prop_clear__ = staticmethod(_mgb._config.end_set_exc_opr_tracker)
  230. class opr_priority_scope(_GraphPropertyBase):
  231. """context manager for setting priority for all operators created in this
  232. context
  233. :param comp_graph: the computing graph for which operator priority should
  234. be set
  235. :type comp_graph: :class:`.CompGraph`
  236. :param priority: operator priority. Smaller number means higher priority.
  237. Default value is 0. Grad operator would use negative priority by
  238. default.
  239. """
  240. _graph2stack = {}
  241. LOWEST_PRIORITY = 2 ** 31 - 1
  242. """lowest prority (i.e. max possible value)"""
  243. HIGHEST_PRIORITY = -LOWEST_PRIORITY
  244. """highest prority (i.e. min possible value)"""
  245. def __init__(self, comp_graph, priority):
  246. super().__init__(comp_graph, int(priority))
  247. __prop_setup__ = staticmethod(_mgb._config.begin_set_opr_priority)
  248. __prop_clear__ = staticmethod(_mgb._config.end_set_opr_priority)
  249. OprTrackerResult = collections.namedtuple(
  250. "OprTrackerResult", ["msg", "tracker", "grad_tracker"]
  251. )
  252. def get_opr_tracker(cg, var_id):
  253. """get the tracking object associated with the owner operator of a var
  254. :param cg: the computing graph
  255. :param var_id: id of the var whose owner opr tracker should be found
  256. :return: if no var is found, ``None`` is returned; otherwise return an
  257. :class:`OprTrackerResult` object
  258. """
  259. assert isinstance(cg, _mgb.CompGraph)
  260. ret = _mgb._config.get_opr_tracker(cg, int(var_id))
  261. if ret is None:
  262. return
  263. return OprTrackerResult(*ret)
  264. def set_opr_sublinear_memory_endpoint(var):
  265. """set the owner operator of a symvar to be endpoint of sublinear memory
  266. optimizer
  267. :type var: :class:`.SymbolVar`
  268. """
  269. _mgb._config.set_opr_sublinear_memory_endpoint(var)
  270. def max_size_t():
  271. """get max value of size_t type on local architecture"""
  272. return _mgb.max_size_t()
  273. def is_cuda_ctx_set():
  274. """return whether current thread has an active cuda driver context"""
  275. return _mgb._config.is_cuda_ctx_set()
  276. def get_include_path():
  277. """get include path for building megbrain extensions"""
  278. return os.path.join(os.path.realpath(os.path.dirname(__file__)), "include")
  279. def get_cuda_gencode(only_cap=False):
  280. """get -gencode options to be passed to nvcc for compiling on local
  281. machine
  282. :param only_cap: if True, return only a list of cuda compute capability
  283. strings (like ``['35', '52']`` )
  284. """
  285. ret = _mgb._config.get_cuda_gencode().split()
  286. if not only_cap:
  287. ret = " ".join(map("-gencode arch=compute_{0},code=sm_{0}".format, ret))
  288. return ret
  289. def get_cuda_lib_path():
  290. """get the cuda lib64 path by locating nvcc
  291. """
  292. return _mgb._config.get_cuda_lib_path()
  293. def get_cuda_include_path():
  294. """get the cuda include path by locating nvcc, including
  295. parent path and `parent path`/include
  296. """
  297. return _mgb._config.get_cuda_include_path()
  298. def get_cuda_version():
  299. """get runtime cuda version
  300. """
  301. return _mgb._config.get_cuda_version()
  302. def is_local_cuda_env_ok():
  303. """check whether local cuda environment ok by locating nvcc
  304. """
  305. return _mgb._config.is_local_cuda_env_ok()
  306. def is_compiled_with_cuda():
  307. """whether cuda is enabled at compile time"""
  308. return _mgb._config.is_compiled_with_cuda()
  309. def load_opr_library(path):
  310. """Load an external operator library. This essentially sets megbrain
  311. symbols as public and load the library.
  312. :param path: path to the shared object; if it is None, then only megbrain
  313. symbols are made public.
  314. """
  315. _mgb._config.load_opr_library(
  316. os.path.realpath(os.path.join(os.path.dirname(__file__), "_mgb.so")), path
  317. )
  318. def dump_registered_oprs():
  319. """
  320. get all registered oprs, return dict(id, name)
  321. """
  322. return dict(_mgb._config.dump_registered_oprs())
  323. def create_mm_server(server_addr, port):
  324. """
  325. create mm server with server address
  326. throw exception if server_addr is already used
  327. """
  328. return _mgb._config.create_mm_server(server_addr, port)
  329. def group_barrier(server_addr, port, size, rank):
  330. """
  331. block until all ranks reach this barrier
  332. """
  333. return _mgb._config.group_barrier(server_addr, port, size, rank)

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台