# -*- coding: utf-8 -*- # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") # # Copyright (c) 2014-2020 Megvii Inc. All rights reserved. # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. import collections import os from . import mgb as _mgb _default_device_type = "CUDA" def set_device_map(logical_dev, physical_dev, device_type=None): """map from *logical_dev* to *physical_dev* for furture comp node loading example:: set_device_map(0, 2, 'CPU') # cpu0 -> cpu2 set_device_map('gpu3', 'gpu0') # gpu0 -> gpu0 :param device_type: specify the device type if devices are given by integers; if devices are given by integers and ``device_type`` is not given, the default value ``'CUDA'`` would be used. Possible values are ``'CUDA'`` and ``'CPU'``. """ if device_type is None: device_type = _default_device_type if device_type == "CUDA": xpu = "gpu" else: assert device_type == "CPU" xpu = "cpu" def rmxpu(v): if isinstance(v, str): assert v.startswith(xpu) or v.startswith("xpu"), ( "bad comp node in set_device_map: " "device_type={} comp_node={}".format(device_type, v) ) return v[3:] return v logical_dev, physical_dev = map(rmxpu, [logical_dev, physical_dev]) _mgb.CompNode._set_device_map(device_type, int(logical_dev), int(physical_dev)) def set_default_device(physical_dev, device_type=None): """set physcal device for xpux when *device_type* is None and *physical_dev* starts with *gpu* or *cpu*, the default device type would be modified accordingly for future calls to :func:`set_device_map` when remapping device number. """ global _default_device_type if ( device_type is None and isinstance(physical_dev, str) and not physical_dev.isdigit() and not physical_dev.startswith("xpu") ): t = physical_dev[:3] if t == "gpu": _default_device_type = "CUDA" else: assert t == "cpu", "bad physical_dev: {}".format(physical_dev) _default_device_type = "CPU" set_default_device_type(_default_device_type) device_type = _default_device_type set_device_map(-1, physical_dev, device_type) def set_default_device_type(device_type): """set device type for xpu""" global _default_device_type device_type = device_type.upper() _mgb.CompNode._set_unspec_device_type(device_type) _default_device_type = device_type def set_fork_cuda_warning_flag(flag): """set warning to be printed at fork if cuda has been initialized :type flag: int :param flag: controls how the warning should be printed: * 0: disable warning * 1: print warning to log * 2: print warning to log and raise exception """ _mgb._config.set_fork_cuda_warning_flag(int(flag)) def get_device_count(device_type="xpu", warn=True): """get number of devices installed on this system :param device_type: device type, one of 'xpu', 'gpu' or 'cpu' :type device_type: str """ return _mgb.CompNode._get_device_count(device_type.upper(), warn) def parse_locator(device_name: str) -> tuple: """get the tensor locator expression by device name. :param device_name: device name, like 'cpu0', 'gpu1' and 'xpux' :type device_name: str :return: (device_type, dev_num, stream_num) """ return _mgb.CompNode._parse_locator(device_name) def set_mem_reserve_size(size): """set memory reserve size: * If *size* is greater than 1, it is the absolute amount of memory to be reserved in MB; * If *size* is in the range (0, 1), it is the ratio of total memory; * If *size* is 0, memory reservation and pre-allocation would be disabled; * If *size* is -1, disable custom memory allocator and use cuda APIs directly. """ _mgb._config.set_mem_reserve_size(float(size)) def set_comp_graph_option(comp_graph, name, val): """set computing graph option and return its old value :type comp_graph: :class:`.CompGraph` :param comp_graph: the computing graph whose option should be modified :type name: str :param name: option name Currently supported options are: * "no_profiling_on_shape_change": bool; When execution strategy is set to profiling, always use the initial profile result and do not re-run profiling even if input shape changes. * "seq_opt.enable_mem_plan_opt": bool * "seq_opt.enable_mem_reuse_alloc": bool * "seq_opt.enable_seq_comp_node_opt": bool * "force_dynamic_alloc": bool * "var_sanity_check_first_run": bool * "enable_sublinear_memory_opt": bool * "enable_memory_swap": bool; whether to enable memory swap; it usually performs worse than sublinear memory * "enable_var_mem_defragment": bool * "allocate_static_mem_after_graph_compile": bool * "enable_grad_var_static_reshape": bool: If set to ``True``, dynamically-shaped gradients whose original shape is statically inferrable would be reshaped, so static shape inference can continue * "async_exec_level": int * ``0``: do not dispatch asynchronously * ``1``: async dispatch if there are more than 1 cuda comp nodes * mask ``0b10``: async for comp nodes with unlimited queue (e.g. CPU comp nodes) * mask ``0b100``: async for even one comp node * "log_level": int * ``0``: no log info for graph construction/compiling * ``1``: static memory allocation status, WorkspaceLimitGetter summary, and optimizer summary * ``2``: optimizer details and duplicated operators tha are removed * "graph_opt.jit": whether to enable JIT * "graph_opt.tensorrt": whether to enable fine-grained automatic replacement for TensorRT operators * "graph_opt.android_nn": whether to enable fine-grained automatic replacement for Android NN operators * "graph_opt_level": int * ``0``: disable * ``1``: level-1: inplace arith transformations during graph construction * ``2``: (default) level-2: level-1, plus global optimization before graph compiling * ``3``: also enable JIT :param val: new option value :return: old option value """ if name == "log_static_mem_alloc": name = "log_level" if name == "enable_async_exec": name = "async_exec_level" return _mgb._config.set_comp_graph_option(comp_graph, name, int(val)) def comp_graph_is_eager(comp_graph): return _mgb._config.comp_graph_is_eager(comp_graph) def add_extra_vardep(var, dep): """add *dep* as an extra dependency of *var*, so if *var* is required to compute the final output when compiling a comp graph, *dep* would also be included in the computing sequence. Note that the order computing of these two vars is not guaranteed. """ assert isinstance(var, _mgb.SymbolVar) and isinstance(dep, _mgb.SymbolVar) assert var.owner_graph == dep.owner_graph return _mgb._config.add_extra_vardep(var, dep) class _GraphPropertyBase: """helper class for implementing operator property setter context managers""" _cur_graph = None _graph2stack = None """class attribute that maintains mapping from graph to property stack; should be defined by child classes""" __prop_setup__ = None """overwritten by subclass to setup property""" __prop_clear__ = None """overwritten by subclass to clear property""" def __init__(self, comp_graph, prop): """:param comp_graph: computing graph, or None to not set this property""" if comp_graph is not None: assert isinstance( comp_graph, _mgb.CompGraph ), "invalid comp graph: {!r}".format(comp_graph) self._cur_graph = comp_graph self._graph2stack.setdefault(comp_graph, []).append(prop) def __setup(self, prop): self.__prop_setup__(self._cur_graph, prop) def __clear(self): self.__prop_clear__(self._cur_graph) def __enter__(self): if self._cur_graph is None: return stack = self._graph2stack[self._cur_graph] if len(stack) > 1: # clear nested property self.__clear() self.__setup(stack[-1]) def __exit__(self, exc_type, exc_value, exc_traceback): if self._cur_graph is None: return stack = self._graph2stack[self._cur_graph] self.__clear() stack.pop() if stack: # restore nested property self.__setup(stack[-1]) else: del self._graph2stack[self._cur_graph] class exc_opr_tracker_scope(_GraphPropertyBase): """context manager for associating an object with all operators created within this context; so when an exception is raised, information about the corresponding operator could be retrieved from :attr:`.MegBrainError.tracker` :param comp_graph: the computing graph where the operators should be tracked :type comp_graph: :class:`.CompGraph` :param tracker: an arbitrary python object to track the operators """ _graph2stack = {} def __init__(self, comp_graph, tracker): assert ( tracker is not None ), "bad args for exc_opr_tracker_scope: {!r} {!r}".format(comp_graph, tracker) super().__init__(comp_graph, tracker) __prop_setup__ = _mgb._config.begin_set_exc_opr_tracker __prop_clear__ = _mgb._config.end_set_exc_opr_tracker class opr_priority_scope(_GraphPropertyBase): """context manager for setting priority for all operators created in this context :param comp_graph: the computing graph for which operator priority should be set :type comp_graph: :class:`.CompGraph` :param priority: operator priority. Smaller number means higher priority. Default value is 0. Grad operator would use negative priority by default. """ _graph2stack = {} LOWEST_PRIORITY = 2 ** 31 - 1 """lowest prority (i.e. max possible value)""" HIGHEST_PRIORITY = -LOWEST_PRIORITY """highest prority (i.e. min possible value)""" def __init__(self, comp_graph, priority): super().__init__(comp_graph, int(priority)) __prop_setup__ = _mgb._config.begin_set_opr_priority __prop_clear__ = _mgb._config.end_set_opr_priority OprTrackerResult = collections.namedtuple( "OprTrackerResult", ["msg", "tracker", "grad_tracker"] ) def get_opr_tracker(cg, var_id): """get the tracking object associated with the owner operator of a var :param cg: the computing graph :param var_id: id of the var whose owner opr tracker should be found :return: if no var is found, ``None`` is returned; otherwise return an :class:`OprTrackerResult` object """ assert isinstance(cg, _mgb.CompGraph) ret = _mgb._config.get_opr_tracker(cg, int(var_id)) if ret is None: return return OprTrackerResult(*ret) def set_opr_sublinear_memory_endpoint(var): """set the owner operator of a symvar to be endpoint of sublinear memory optimizer :type var: :class:`.SymbolVar` """ _mgb._config.set_opr_sublinear_memory_endpoint(var) def max_size_t(): """get max value of size_t type on local architecture""" return _mgb.max_size_t() def is_cuda_ctx_set(): """return whether current thread has an active cuda driver context""" return _mgb._config.is_cuda_ctx_set() def get_include_path(): """get include path for building megbrain extensions""" return os.path.join(os.path.realpath(os.path.dirname(__file__)), "include") def get_cuda_gencode(only_cap=False): """get -gencode options to be passed to nvcc for compiling on local machine :param only_cap: if True, return only a list of cuda compute capability strings (like ``['35', '52']`` ) """ ret = _mgb._config.get_cuda_gencode().split() if not only_cap: ret = " ".join(map("-gencode arch=compute_{0},code=sm_{0}".format, ret)) return ret def get_cuda_lib_path(): """get the cuda root path by locating loaded libcudart.so """ return _mgb._config.get_cuda_lib_path() def get_cuda_include_path(): """get the cuda include path by locating loaded libcudart.so, including libcudart.so's path, parent path and `parent path`/include """ return _mgb._config.get_cuda_include_path() def get_cuda_version(): """get runtime cuda version """ return _mgb._config.get_cuda_version() def is_compiled_with_cuda(): """whether cuda is enabled at compile time""" return _mgb._config.is_compiled_with_cuda() def load_opr_library(path): """Load an external operator library. This essentially sets megbrain symbols as public and load the library. :param path: path to the shared object; if it is None, then only megbrain symbols are made public. """ _mgb._config.load_opr_library( os.path.realpath(os.path.join(os.path.dirname(__file__), "_mgb.so")), path ) def dump_registered_oprs(): """ get all registered oprs, return dict(id, name) """ return dict(_mgb._config.dump_registered_oprs()) def create_mm_server(server_addr, port): """ create mm server with server address throw exception if server_addr is already used """ return _mgb._config.create_mm_server(server_addr, port) def group_barrier(server_addr, port, size, rank): """ block until all ranks reach this barrier """ return _mgb._config.group_barrier(server_addr, port, size, rank)