|
- # -*- coding: utf-8 -*-
- # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
- #
- # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
- #
- # Unless required by applicable law or agreed to in writing,
- # software distributed under the License is distributed on an
- # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-
- """the megbrain python package
-
- Note that all the submodules are automatically imported, so you usually only
- need to ``import megengine._internal as mgb``.
- """
-
- import collections
- import json
- import os
- import sys
-
- import numpy as np
-
- from . import comp_graph_tools as cgtools
- from . import config, craniotome, dtype
- from . import global_init as _global_init
- from . import helper as _helper
- from . import mgb as _detail
- from . import opr, opr_extra, opr_param_defs, plugin
- from .exc import MegBrainError
- from .logconf import get_logger
- from .mgb import (
- CompGraph,
- CompNode,
- SharedND,
- SharedScalar,
- SymbolVar,
- TensorValueDumperContext,
- TensorValueLoaderContext,
- )
- from .mgb import as_comp_node as comp_node
- from .mgb_helper import SharedNDLazyInitializer, callback_lazycopy, copy_output
- from .plugin import CompGraphProfiler
- from .plugin import GlobalInfkernFinder as _GlobalInfkernFinder
- from .plugin import NumRangeChecker
- from .version import __version__, version_info
-
- if sys.version_info.major < 3:
- raise ImportError("megbrain requires python 3")
-
-
- class ProxySharedNDAndSymbolVar(_detail.SymbolVar):
- """this is a :class:`.SymbolVar` with a corresponding :class:`.SharedND`.
- It can participate in graph computating and also provides :meth:`set_value`
- and :meth:`get_value`. It should be constructed by :func:`make_shared`.
- """
-
- __shared_nd = None
- __kwargs = None
-
- def __init__(self, snd, comp_graph, name, **kwargs):
- self.__shared_nd = snd
- self.__kwargs = kwargs
- self.this = snd.symvar(comp_graph=comp_graph, name=name, **kwargs).this
-
- def set_value(self, v, **kwargs):
- ret = self.__shared_nd.set_value(v, **kwargs)
- self._reeval_if_eager_eval()
- return ret
-
- def get_value(self):
- return self.__shared_nd.get_value()
-
- def reset_zero(self):
- self.__shared_nd.reset_zero()
-
-
- def make_shared(
- comp_node,
- *,
- dtype=None,
- shape=None,
- value=None,
- comp_graph=None,
- name=None,
- volatile=None
- ):
- """make a shared tensor which is stored on device and could be modified
- later, either as a :class:`.SymbolVar` or a :class:`.SharedND` object
-
- :param comp_node: computing node
- :type comp_node: :class:`.CompNode`
- :param dtype: data type; if it is None, then dtype of value would be used
- if value is not None, and float32 would be used as default dtype if
- value is None
- :type dtype: :class:`numpy.dtype` compatible
- :param value: initializing value
- :type value: None or :class:`numpy.ndarray`
- :param comp_graph: the computing graph to which this shared value should
- belong; if provided, the retuned object could be used as a
- :class:`.SymbolVar`
- :type comp_graph: None or :class:`.CompGraph`
- :param name: node name to be used in computing graph; only meaningful if
- *comp_graph* is provided
- :param volatile: if *comp_graph* is given then *volatile* indicates whether
- shape or mem ptr of this SharedND can be changed
- :rtype: :class:`.SharedND` if *comp_graph* is not given; or
- :class:`ProxySharedNDAndSymbolVar` otherwise
- """
- if dtype is None:
- if value is not None:
- value = np.ascontiguousarray(value)
- dtype = to_mgb_supported_dtype(value.dtype)
- else:
- dtype = np.float32
- comp_node = _detail.as_comp_node(comp_node)
- rst = _detail.SharedND(comp_node, dtype)
- if value is not None:
- assert shape is None, "could not provide both value and shape"
- rst.set_value(value)
- elif shape is not None:
- rst._set_init_shape(shape)
- if comp_graph is None:
- assert name is None and volatile is None
- return rst
- assert isinstance(comp_graph, CompGraph), "expect CompGraph but got {}".format(
- comp_graph
- )
- if volatile is None:
- volatile = False
- else:
- assert isinstance(volatile, bool)
- return ProxySharedNDAndSymbolVar(rst, comp_graph, name, volatile=volatile)
-
-
- def make_immutable(comp_node, comp_graph, value, *, dtype=None, name=None):
- """make a graph node containing an immutable tensor from host tensor value
-
- :param dtype: required data type; if not None, the data would be converted
- to that type; otherwise
- """
-
- comp_node = _detail.as_comp_node(comp_node)
- assert isinstance(
- comp_graph, _detail.CompGraph
- ), "expect CompGraph but got {!r}".format(comp_graph)
-
- config = _detail.make_opr_config(name, comp_node)
- return _helper.cvt_opr_result(
- _detail._make_immutable(comp_graph, value, dtype, config)
- )
-
-
- def make_arg(
- comp_node,
- comp_graph,
- *,
- dtype=np.float32,
- shape=None,
- name=None,
- value=None,
- enable_static_infer=True
- ):
- """make an argument to be passed to compiled function during runtime;
-
- :type shape: None or tuple of int
- :param shape: expected tensor shape to be used for shape inferring; actual
- tesor shape could be different
- :type name: str
- :param name: name of the generated var node
- :type value: None or ndarray-compatible
- :param value: initial value used for static inference; if not given, static
- infer would be deferred to first graph execution
- :param enable_static_infer: whether to enable static inference for this var
- """
- comp_node = _detail.as_comp_node(comp_node)
- host_val = mgb._HostSharedND(comp_node, dtype)
-
- if value is not None:
- value = np.ascontiguousarray(value, dtype=dtype)
- if shape is None:
- shape = value.shape
- else:
- assert shape == value.shape
- if shape is not None:
- host_val._resize(shape)
-
- if value is not None:
- host_val.set_value(value)
-
- return _helper.cvt_opr_result(
- ProxySharedNDAndSymbolVar(
- host_val, comp_graph, name, enable_static_infer=enable_static_infer
- )
- )
-
-
- def comp_graph(*, extra_opts=None, check_env_var=True):
- """allocate a new computing graph
-
- :param extra_opts: extra options to be set; would be updated (modified
- inplace) from ``MGB_COMP_GRAPH_OPT`` environment var. See
- :func:`.set_comp_graph_option` for list of supported options.
- :type extra_opts: dict
- :param check_env_var: whether to check environment vars
- :type check_env_var: bool
-
- :return: the comp graph object
- :rtype: :class:`.CompGraph`
- """
- cg = _detail.CompGraph()
- if extra_opts is None:
- extra_opts = {}
- if check_env_var:
- setting = os.getenv("MGB_COMP_GRAPH_OPT")
- if setting:
- for item in setting.split(";"):
- k, v = item.split("=", 1)
- extra_opts.setdefault(k, v)
- get_logger().warning(
- "set comp graph option from env: {}".format(extra_opts)
- )
- user_data = os.getenv("MGB_COMP_GRAPH_USER_DATA")
- if user_data:
- storage = cg.user_data
- for ud in user_data.split(";"):
- k, v = ud.split("=", 1)
- storage[k] = eval(v)
- _GlobalInfkernFinder.add_graph(cg)
- for k, v in extra_opts.items():
- cg.set_option(k, v)
- return cg
-
-
- def grad(
- target, wrt, warn_mid_wrt=True, use_virtual_grad=None, return_zero_for_nodep=True
- ):
- r"""compute symbolic grad
-
- :param target: grad target var
- :type target: :class:`.SymbolVar`
- :param wrt: with respect to which to compute the grad
- :type wrt: :class:`.SymbolVar` or Iterable[SymbolVar]
- :param warn_mid_wrt: whether to give warning if *wrt* is not endpoint
- :type warn_mid_wrt: bool
- :param use_virtual_grad: whether to use virtual grad opr, so fwd graph can
- be optimized before applying grad; if ``None`` is given, then virtual
- grad would be used if ``graph_opt_level >= 2``
- :type use_virtual_grad: :class:`bool` or ``None``
- :param return_zero_for_nodep: if *target* does not depend on *wrt*, set to True to return
- a zero-valued `.SymbolVar` rather than ``None``; can't be set to False when using
- virtual grad opr.
- :type return_zero_for_nodep: bool
- :rtype: :class:`.SymbolVar` or None
- :return: :math:`\frac{\partial\text{target}}{\partial\text{wrt}}`
- """
- if use_virtual_grad is None:
- use_virtual_grad = -1
- else:
- use_virtual_grad = 1 if use_virtual_grad else 0
-
- if isinstance(wrt, SymbolVar):
- wrts = [
- wrt,
- ]
- else:
- wrts = wrt
-
- assert isinstance(wrts, collections.Iterable)
- # return a invalid SymbolVar (with nullptr VarNode*) when return_zero_for_nodep is False
- # and target doesn't depend on wrt
- grads = _detail._grad(
- target, wrts, bool(warn_mid_wrt), use_virtual_grad, return_zero_for_nodep
- )
- grads = list(grads)
-
- for i in range(len(grads)):
- if not grads[i].valid:
- assert (
- not return_zero_for_nodep
- ), "invalid grad SymbolVar: target={}, wrt={}".format(target, wrts[i])
- grads[i] = None
-
- if len(grads) == 1:
- grads = grads[0]
-
- return grads
-
-
- def current_grad_target(comp_graph):
- """get current target var to compute grad, used for implementing custom
- gradient"""
- return _detail._current_grad_target(comp_graph)
-
-
- def add_device_map(map_location):
- """add map location while loading models"""
- _detail.CompNode.cn_thread_local.__setattr__("map_location", map_location)
-
-
- def del_device_map():
- """delete map location"""
- _detail.CompNode.cn_thread_local.__delattr__("map_location")
-
-
- def inter_graph_trans_var(dest_graph, src):
- """get the corresponding var of *src* in *dest_graph*; assuming
- *dest_graph* is a copy of owner graph of *src*; usually used in callback of
- set_grad to get grad of vars in loop
-
- :param dest_graph: target computing graph
- :type dest_graph: :class:`.CompGraph`
- :param src: source var node
- :type src: :class:`.SymbolVar`
- :return: corresponding var in *dest_graph*
- :rtype: :class:`.SymbolVar`
- """
- return _detail._inter_graph_trans_var(dest_graph, src)
-
-
- def get_graph_optimizer_replaced_var(src):
- """get optimized var corresponding to given var; usually used in callback
- of set_grad to get grad w.r.t. some var
-
- :param src: source var node
- :type src: :class:`.SymbolVar`
- :rtype: :class:`.SymbolVar`
- """
- return _detail._get_graph_optimizer_replaced_var(src)
-
-
- CompGraphSerializationResult = collections.namedtuple(
- "CompGraphSerializationResult",
- [
- "nr_opr",
- "tot_bytes",
- "tensor_value_bytes",
- "content_hash",
- "inputs",
- "outputs",
- "params",
- ],
- )
-
-
- def serialize_comp_graph_to_file(
- fpath,
- output_vars,
- *,
- keep_var_name=1,
- keep_param_name=False,
- keep_opr_priority=False,
- tensor_value_dumper=None,
- output_strip_info=False,
- append=False,
- format=None,
- **kwargs
- ):
- """serialize this computing graph and write result to a file. Note:
- ``kwargs`` exists for backward compatibility; there is no additional
- arguments.
-
- :parma fpath: path for the output file
- :type fpath: ``str``
- :param output_vars: output variables that need to be retrieved when
- deserializing
-
- .. note::
-
- The underlying C++ API only accepts a var list. If a dict is given,
- the vars would be renamed to given names.
-
- :type output_vars: dict(name => :class:`.SymbolVar`), or a list of vars
- :param keep_var_name: level for keeping variable names:
-
- * 0: none of the names are kept
- * 1: keep names of output vars
- * 2: keep names of all (output and internal) vars
- :param keep_param_name: whether to keep param names, so param values can be
- easily manipulated after loading model
- :param keep_opr_priority: whether to keep priority setting for operators
- :param tensor_value_dumper: a callable to dump tensor values; it should
- only write the tensor value without layout information. It would be
- given a :class:`.TensorValueDumperContext` object as its sole argument.
- :param output_strip_info: if set to True, then a json file containing
- information for code strip would be written to ``fpath+'.json'``
- :param append: whether to open output file in append mode
- :return: an instance of namedtuple :class:`CompGraphSerializationResult`,
- whose fields are:
-
- * ``nr_opr`` number of operators dumped
- * ``tot_bytes`` total bytes for the whole graph
- * ``tensor_value_bytes`` bytes consumed for dumping tensor values
- * ``inputs`` names of input tensors
- * ``params`` list of names of dumped params
- * ``outputs`` names of output vars
- :param format: serialization format of the resulting model, should be either
- "mdl" or "fbs"; none means default.
- :type format: ``str``
- """
-
- assert isinstance(fpath, str), "bad file path: {!r}".format(fpath)
- ov = _detail._VectorSymbolVar()
- SUPPORTED_FORMATS = {
- # default
- None: _detail.GraphDumpFormat_FLATBUFFERS,
- "fbs": _detail.GraphDumpFormat_FLATBUFFERS,
- }
- resolved_fmt = SUPPORTED_FORMATS.get(format, None)
- if resolved_fmt is None:
- raise ValueError(
- "unknown format {} requested, supported ones are {}".format(
- format, list(filter(None, SUPPORTED_FORMATS.keys()))
- )
- )
- if isinstance(output_vars, dict):
- used_vars = set()
- for name, var in output_vars.items():
- assert isinstance(var, _detail.SymbolVar), "bad output var: {!r}".format(
- var
- )
- assert var.id not in used_vars, (
- "var name is associated with a var object, so we can not have "
- "two names given to the same var: {}".format(var)
- )
- used_vars.add(var.id)
- var.rename(name)
- ov.push_back(var)
- else:
- for i in output_vars:
- assert isinstance(i, _detail.SymbolVar), "bad output var: {!r}".format(i)
- ov.push_back(i)
-
- if tensor_value_dumper is not None:
- assert isinstance(tensor_value_dumper, collections.Callable)
-
- class Callback(_detail._TensorValueDumperCallback):
- def call(self, ctx, *, _f=tensor_value_dumper):
- _f(ctx)
-
- tensor_value_dumper = Callback()
-
- # for backward compatibility
- mangle_opr_name = kwargs.pop("mangle_opr_name", ov)
- if mangle_opr_name is not ov:
- get_logger().warning("mangle_opr_name is deprecated; use keep_var_name instead")
- keep_var_name = 1 if mangle_opr_name else 2
- mangle_param_name = kwargs.pop("mangle_param_name", ov)
- assert (
- not kwargs
- ), "extra kwargs provided to serialize_comp_graph_to_file: {}".format(kwargs)
-
- if mangle_param_name is not ov:
- get_logger().warning(
- "mangle_param_name is deprecated; use keep_param_name instead"
- )
- keep_param_name = not mangle_param_name
-
- inputs = _detail._VectorString()
- outputs = _detail._VectorString()
- params = _detail._VectorString()
- stat = _detail._VectorSizeT()
-
- _detail._serialize_comp_graph_to_file(
- fpath,
- append,
- resolved_fmt,
- ov,
- keep_var_name,
- keep_param_name,
- keep_opr_priority,
- tensor_value_dumper,
- stat,
- inputs,
- outputs,
- params,
- )
-
- dump_ret = CompGraphSerializationResult(
- *stat, list(inputs), list(outputs), list(params)
- )
-
- if output_strip_info:
- with open(fpath + ".json", "w") as fout:
- strip_info = _detail._get_info_for_strip(ov)
- strip_info_dict = json.loads(strip_info)
- strip_info_dict["hash"] = dump_ret.content_hash
- json.dump(strip_info_dict, fout)
-
- return dump_ret
-
-
- CompGraphLoadResult = collections.namedtuple(
- "CompGraphLoadResult", ["graph", "output_vars_dict", "output_vars_list"]
- )
-
-
- def load_comp_graph_from_file(
- fpath, *, comp_node_mapper=None, tensor_value_loader=None
- ):
- """Load a serialized computing graph from file.
-
- :parma fpath: Path for the output file
- :type fpath: ``str``
- :param comp_node_mapper: A callable to modify comp node locator, takes old
- locator as argument and returns new locator.
- :type comp_node_mapper: Callable[[str], str]
- :param tensor_value_loader: A callable to load tensor values. It should
- read the tensor value with the given shape and dtype and return it as
- NumPy ndarray. It would be given a :class:`.TensorValueLoaderContext`
- object as its sole argument.
- :type tensor_value_loader: Callable[[TensorValueLoaderContext], numpy.ndarray]
- :return: An instance of namedtuple :class:`CompGraphLoadResult`,
- whose fields are:
-
- * ``graph`` loaded CompGraph
- * ``output_vars_dict`` A Python dict, mapping name to output SymbolVar
- * ``output_vars_list`` A Python list, containing output vars in the
- order passed to serialize_comp_graph_to_file
- """
- assert isinstance(fpath, str), "bad file path: {!r}".format(fpath)
-
- if comp_node_mapper is not None:
- assert isinstance(comp_node_mapper, collections.Callable)
-
- class Callback(_detail._CompNodeMapperCallback):
- def call(self, desc, *, _f=comp_node_mapper):
- return _f(desc)
-
- comp_node_mapper = Callback()
- if tensor_value_loader is not None:
- assert isinstance(tensor_value_loader, collections.Callable)
-
- class Callback(_detail._TensorValueLoaderCallback):
- def call(self, ctx, *, _f=tensor_value_loader):
- return _f(ctx)
-
- tensor_value_loader = Callback()
- output_vars_map = _detail._VectorPairStringSymbolVar()
- output_vars_list = _detail._VectorSymbolVar()
- cg = _detail._load_comp_graph_from_file(
- fpath, comp_node_mapper, tensor_value_loader, output_vars_map, output_vars_list
- )
- return CompGraphLoadResult(cg, dict(list(output_vars_map)), list(output_vars_list))
-
-
- def optimize_for_inference(
- output_vars,
- *,
- f16_io_f32_comp=False,
- f16_io_comp=False,
- use_nhwcd4=False,
- fuse_conv_bias_nonlinearity=False,
- use_nchw32=False,
- fuse_conv_bias_with_z=False,
- use_nchw4=False,
- use_nchw88=False,
- use_nchw44=False,
- use_nchw44_dot=False,
- use_chwn4=False
- ):
- """optimize computing graph for inference
-
- This applies a predefined set of optimization passes. Refer to the mnist
- sdk example and C++ code for fine-grained control.
-
- :param output_vars: output symvars
- :type output_vars: list of :class:`.SymbolVar`
- :param f16_io_f32_comp: whether to use float16 for I/O between oprs and use
- float32 as internal computation precision. Note the output var would be
- changed to float16
- :param f16_io_comp: whether to use float16 for both I/O and computation
- precision
- :param use_nhwcd4: whether to use NHWCD4 data format. This is faster on some
- OpenCL devices
- :param fuse_conv_bias_nonlinearity: whether to fuse conv+bias+nonlinearty
- into one opr. This is supported only in NHWCD4 format.
- :param use_nchw4: whether to use NCHW4 tensor format.
- :param use_nchw88: whether to use NCHW88 tensor format. This maybe faster some
- times.
- :param use_nchw44: whether to use NCHW44 tensor format. This maybe faster some
- times.
- :param use_nchw44_dot: whether to use NCHW44_DOT tensor format. This format is
- optimized for inference in armv8.2
- :param use_nchw32: whether to use NCHW32 tensor format. Mainly used for
- nvidia tensorcore.
- :param use_chwn4: whether to use CHWN4 tensor format. Mainly used for
- nvidia tensorcore.
-
-
- :return: list of transformed vars corresponding to given output vars
- """
-
- assert isinstance(output_vars, (list, tuple))
- opt = _detail._OptimizeForInferenceOptions()
- settings = locals()
- for i in [
- "f16_io_f32_comp",
- "f16_io_comp",
- "fuse_conv_bias_nonlinearity",
- "fuse_conv_bias_with_z",
- ]:
- if settings[i]:
- getattr(opt, "enable_{}".format(i))()
-
- layout_tranform = None
- for k, v in {
- "use_nchw4": "nchw4",
- "use_nhwcd4": "nhwcd4",
- "use_nchw32": "nchw32",
- "use_nchw88": "nchw88",
- "use_nchw44": "nchw44",
- "use_nchw44_dot": "nchw44_dot",
- "use_chwn4": "chwn4",
- }.items():
- if settings[k]:
- assert (
- not layout_tranform
- ), "Only one layout transform supported, both {} and {}".format(
- layout_tranform, k
- )
- getattr(opt, "enable_{}".format(v))()
- layout_tranform = k
-
- vec = _detail._VectorSymbolVar()
- for i in output_vars:
- assert isinstance(i, _detail.SymbolVar), "bad var: {}".format(i)
- vec.push_back(i)
- return list(_detail._optimize_for_inference(vec, opt))
-
-
- def get_opr_fp_graph_exec(comp_graph, output_vars):
- """get opr footprint and graph exec info
-
- This function will recompile the compute graph, the AsyncExecutable compiled
- before will be invalid.
-
- :param comp_graph: ComputingGraph
- :param output_vars: list of :class:'.SymbolVar'
- """
- assert isinstance(output_vars, (list, tuple))
- vec = _detail._VectorSymbolVar()
- for i in output_vars:
- assert isinstance(i, _detail.SymbolVar), "bad var: {}".format(i)
- vec.push_back(i)
- return json.loads(_detail._get_opr_fp_graph_exec(comp_graph, output_vars))
-
-
- def to_mgb_supported_dtype(dtype_):
- """get the dtype supported by megbrain nearest to given dtype"""
- if (
- dtype.is_lowbit(dtype_)
- or dtype.is_quantize(dtype_)
- or dtype.is_bfloat16(dtype_)
- ):
- return dtype_
- return _detail._to_mgb_supported_dtype(dtype_)
-
-
- def return_free_memory():
- """return free memory chunks on all devices.
-
- This function will try it best to free all consecutive free chunks back to
- operating system, small pieces may not be returned.
-
- Please notice that this function will not move any memory in-use.
- """
- _detail.CompNode._try_coalesce_all_free_memory()
|