Browse Source

fix(jit): fix jit doc and add NCHW44_DOT

GitOrigin-RevId: 5f5feae8e7
tags/v0.5.0
Megvii Engine Team Xu Xinran 5 years ago
parent
commit
5257991e68
1 changed files with 32 additions and 10 deletions
  1. +32
    -10
      python_module/megengine/jit/__init__.py

+ 32
- 10
python_module/megengine/jit/__init__.py View File

@@ -442,17 +442,38 @@ class trace:
Serialize trace to file system.

:param fpath: positional only argument. Path of output file.
:param arg_names: names of the input tensors in the traced function
:param append: whether output is appended to ``fpath``
:param f16_io_f32_comp: whether to use float16 for I/O between oprs and use
:param arg_names: names of the input tensors in the traced function.
:param append: whether output is appended to ``fpath``.
:param optimize_for_inference: whether to enable optimize_for_inference
pass before dump.

:param enable_io16xc32: whether to use float16 for I/O between oprs and use
float32 as internal computation precision. Note the output var would be
changed to float16
:param f16_io_comp: whether to use float16 for both I/O and computation
precision
:param use_nhwcd4: whether to use NHWCD4 data format. This is faster on some
OpenCL devices
:param fuse_conv_bias_nonlinearity: whether to fuse conv+bias+nonlinearty
into one opr. This is supported only in NHWCD4 format.
changed to float16.
:param enable_ioc16: whether to use float16 for both I/O and computation
precision.

:param enable_hwcd4: whether to use NHWCD4 data layout. This is faster on some
OpenCL backend.
:param enable_nchw88: whether to use NCHW4 data layout. it currently
used in X86 AVX backend.
:param enable_nchw44: whether to use NCHW4 data layout. it currently
used in arm backend.
:param enable_nchw44_dot: whether to use NCHW4 data layout. it currently
used in armv8.2+dotprod backend.
:param enable_nchw4: whether to use NCHW4 data layout. it currently
used in nvidia backend(based on cudnn).
:param enable_nchw32 whether to use NCHW32 data layout. it currently
used in nvidia backend with tensorcore(based on cudnn).
:param enable_chwn4 whether to use CHWN4 data layout. it currently
used in nvidia backend with tensorcore.

:param enable_fuse_conv_bias_nonlinearity: whether to fuse conv+bias+nonlinearty
into one opr.
:param enable_fuse_conv_bias_with_z: whether to fuse conv_bias with z
input for inference on nvidia backend(this optimization pass will
result in mismatch of the precision of output of training and
inference)
"""
if self._status != self._FINISHED:
raise ValueError("not traced")
@@ -475,6 +496,7 @@ class trace:
"enable_nchw88": "use_nchw88",
"enable_nchw32": "use_nchw32",
"enable_nchw44": "use_nchw44",
"enable_nchw44_dot": "use_nchw44_dot",
"enable_chwn4": "use_chwn4",
"enable_fuse_conv_bias_nonlinearity": "fuse_conv_bias_nonlinearity",
"enable_fuse_conv_bias_with_z": "fuse_conv_bias_with_z",


Loading…
Cancel
Save