feat(mge/quantization): add QParams and QuantDtypeMeta for quantization data structure

GitOrigin-RevId: df3416fe13
4 years ago · 1d7dd00144
--- a/imperative/python/megengine/core/ops/builtin/init.py
+++ b/imperative/python/megengine/core/ops/builtin/init.py
@@ -6,9 +6,6 @@
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 import warnings
 from typing import Union
 from ..._imperative_rt import OpDef, ops
 __all__ = ["OpDef"]
--- a/imperative/python/megengine/core/tensor/dtype.py
+++ b/imperative/python/megengine/core/tensor/dtype.py
@@ -5,22 +5,24 @@
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 import collections
 from collections import namedtuple
 from typing import Union
 import numpy as np
 # normal dtype related
 from .._imperative_rt import bfloat16, intb1, intb2, intb4
 from .._imperative_rt.common import (
    bfloat16,
    get_scale,
    get_zero_point,
    intb1,
    intb2,
    intb4,
    is_dtype_equal,
    is_quantize,
 )
 # normal dtype related
 def is_lowbit(dtype):
    return (dtype is intb1) or (dtype is intb2) or (dtype is intb4)
@@ -30,34 +32,80 @@ def is_bfloat16(dtype):
 # quantization dtype related
 _QuantDtypeMetadata = collections.namedtuple(
    "QuantDtypeMetadata", ["name", "np_dtype_str", "is_unsigned", "qmin", "qmax",]
 )
 _metadata_dict = {
    "quint8": _QuantDtypeMetadata("Quantized8Asymm", "uint8", True, 0, 255),
    "qint8": _QuantDtypeMetadata("QuantizedS8", "int8", False, -128, 127),
    "quint4": _QuantDtypeMetadata("Quantized4Asymm", "uint8", True, 0, 15),
    "qint4": _QuantDtypeMetadata("QuantizedS4", "int8", False, -8, 7),
    "qint32": _QuantDtypeMetadata(
        "QuantizedS32", "int32", False, -(2 ** 31), 2 ** 31 - 1,
 # use namedtuple to make class immutable, comparable and easy to print
 class QuantDtypeMeta(
    namedtuple(
        "QuantDtypeMeta",
        ["name", "cname", "np_dtype_str", "qmin", "qmax", "is_unsigned"],
    )
 ):
    r"""
    Store metadata for quantize dtype. Could be used to create custom quant dtype
    for QAT when the network don't need to be converted for inference, but only
    to export network metadata for third-party platform inference.
    :param name: a unique name string.
    :param cname: used in :func:`~.create_quantized_dtype` for model dump and inference.
    :param np_dtype_str: used in :func:`~.create_quantized_dtype` to generate ``np.dtype``.
    :param qmin: a int number indicating quant dtype's lowerbound.
    :param qmax: a int number indicating quant dtype's upperbound.
    :param is_unsigned: a helper value that could be inference from np_dtype_str.
    """
    def __new__(
        cls,
        name: str,
        cname: str,
        np_dtype_str: str,
        qmin: int,
        qmax: int,
        is_unsigned: bool = None,
    ):
        assert isinstance(np_dtype_str, str)
        is_unsigned = np_dtype_str[0] == "u" if is_unsigned is None else is_unsigned
        return super().__new__(cls, name, cname, np_dtype_str, qmin, qmax, is_unsigned)
    def __copy__(self):
        return self
    def __deepcopy__(self, _):
        """
        Ignore deepcopy so that a dtype meta can be treated as singleton, for more
        strict check in :meth:`~.FakeQuantize.fake_quant_forward`.
        """
        return self
 _builtin_quant_dtypes = {
    "quint8": QuantDtypeMeta("quint8", "Quantized8Asymm", "uint8", 0, 255),
    "qint8": QuantDtypeMeta("qint8", "QuantizedS8", "int8", -128, 127),
    "qint8_narrow": QuantDtypeMeta("qint8_narrow", "QuantizedS8", "int8", -127, 127),
    "quint4": QuantDtypeMeta("quint4", "Quantized4Asymm", "uint8", 0, 15),
    "qint4": QuantDtypeMeta("qint4", "QuantizedS4", "int8", -8, 7),
    "qint32": QuantDtypeMeta(
        "qint32", "QuantizedS32", "int32", -(2 ** 31), 2 ** 31 - 1,
    ),
    # NOTE: int2 is not supported for model dump yet
    "quint2": _QuantDtypeMetadata(None, "uint8", True, 0, 3),
    "qint2": _QuantDtypeMetadata(None, "int8", False, -2, 1),
    "quint2": QuantDtypeMeta("quint2", None, "uint8", 0, 3),
    "qint2": QuantDtypeMeta("qint2", None, "int8", -2, 1),
 }
 def _check_zero_point(zp: int, dtype_str: str):
    qmin = _metadata_dict[dtype_str].qmin
    qmax = _metadata_dict[dtype_str].qmax
 def _check_zero_point(zp: int, dtype_meta: QuantDtypeMeta):
    qmin = dtype_meta.qmin
    qmax = dtype_meta.qmax
    if zp < qmin or zp > qmax:
        raise ValueError(
            "zero_point should be within [{}, {}] for {}".format(qmin, qmax, dtype_str)
            "zero_point should be within [{}, {}] for {}".format(
                qmin, qmax, dtype_meta.name
            )
        )
 def get_quantized_dtype(dtype_str: str, scale: float, zp: Union[int, None]):
 def create_quantized_dtype(
    dtype_meta: QuantDtypeMeta, scale: float, zp: Union[int, None]
 ):
    r"""
    Get quantized dtype with metadata attribute according to _metadata_dict.
@@ -65,32 +113,34 @@ def get_quantized_dtype(dtype_str: str, scale: float, zp: Union[int, None]):
    not have ``zero_point``, to be consitent with tensor generated by calling
    compiled function from `CompGraph.compile(inputs, outspec)`.
    :param dtype: a string indicating which dtype to return
    :param dtype_meta: a QuantDtypeMeta indicating which dtype to return. the
        ``cname`` attribute cannot be ``None``.
    :param scale: a number for scale to store in dtype's metadata
    :param zp: a number for zero_point to store in dtype's metadata
    """
    metadata = _metadata_dict[dtype_str]
    np_dtype_str = metadata.np_dtype_str
    is_unsigned = metadata.is_unsigned
    if is_unsigned:
    if dtype_meta.cname is None:
        raise ValueError("dtype {} without cname attr is not supported.")
    if dtype_meta.is_unsigned:
        if zp is None or int(zp) != zp:
            raise ValueError("zero_point should be an integer")
        zp = int(zp)
        _check_zero_point(zp, dtype_str)
        _check_zero_point(zp, dtype_meta)
        return np.dtype(
            np_dtype_str,
            dtype_meta.np_dtype_str,
            metadata={
                "mgb_dtype": {
                    "name": metadata.name,
                    "name": dtype_meta.cname,
                    "scale": float(scale),
                    "zero_point": zp,
                }
            },
        )
    else:
        # Don't trick to combine with is_unsigned. Metadata should not contain
        # invalid field to keep consistent with c dtype.
        return np.dtype(
            np_dtype_str,
            metadata={"mgb_dtype": {"name": metadata.name, "scale": float(scale)}},
            dtype_meta.np_dtype_str,
            metadata={"mgb_dtype": {"name": dtype_meta.cname, "scale": float(scale)}},
        )
@@ -100,7 +150,7 @@ def quint8(scale, zero_point):
    ``zero_point`` (uint8). The real value represented by a quint8 data type is
    float_val = scale * (uint8_val - zero_point)
    """
    return get_quantized_dtype("quint8", scale, zero_point)
    return create_quantized_dtype(_builtin_quant_dtypes["quint8"], scale, zero_point)
 def qint8(scale):
@@ -108,7 +158,7 @@ def qint8(scale):
    Construct a quantized int8 data type with ``scale`` (float). The real value
    represented by a qint8 data type is float_val = scale * int8_val
    """
    return get_quantized_dtype("qint8", scale, None)
    return create_quantized_dtype(_builtin_quant_dtypes["qint8"], scale, None)
 def qint32(scale):
@@ -116,7 +166,7 @@ def qint32(scale):
    Construct a quantized int32 data type with ``scale`` (float). The real value
    represented by a qint32 data type is float_val = scale * int32_val
    """
    return get_quantized_dtype("qint32", scale, None)
    return create_quantized_dtype(_builtin_quant_dtypes["qint32"], scale, None)
 def quint4(scale, zero_point):
@@ -125,7 +175,7 @@ def quint4(scale, zero_point):
    ``zero_point`` (uint8). The real value represented by a quint4 data type is
    float_val = scale * (uint4_val - zero_point)
    """
    return get_quantized_dtype("quint4", scale, zero_point)
    return create_quantized_dtype(_builtin_quant_dtypes["quint4"], scale, zero_point)
 def qint4(scale):
@@ -133,42 +183,48 @@ def qint4(scale):
    Construct a quantized int4 data type with ``scale`` (float). The real value
    represented by a qint4 data type is float_val = scale * int4_val
    """
    return get_quantized_dtype("qint4", scale, None)
    return create_quantized_dtype(_builtin_quant_dtypes["qint4"], scale, None)
 def _convert_to_quantized_dtype(arr: np.ndarray, dtype: np.dtype, dtype_str: str):
    metadata = _metadata_dict[dtype_str]
    arr_metadata = dtype.metadata["mgb_dtype"]
 def _convert_to_quantized_dtype(
    arr: np.ndarray, dtype: np.dtype, dtype_meta: QuantDtypeMeta
 ):
    if not isinstance(arr, np.ndarray):
        raise ValueError("arr parameter should be instance of np.ndarray")
    if not is_quantize(dtype) or arr_metadata["name"] != metadata.name:
        raise ValueError("dtype parameter should be a {} dtype".format(dtype_str))
    is_unsigned = metadata.is_unsigned
    if is_unsigned:
    if (
        not is_quantize(dtype)
        or dtype.metadata["mgb_dtype"]["name"] != dtype_meta.cname
    ):
        raise ValueError("dtype parameter should be a {} dtype".format(dtype_meta))
    arr_metadata = dtype.metadata["mgb_dtype"]
    if dtype_meta.is_unsigned:
        scale, zp = (
            arr_metadata["scale"],
            arr_metadata["zero_point"],
        )
        return (
            (np.round(arr / scale) + zp)
            .clip(metadata.qmin, metadata.qmax)
            .clip(dtype_meta.qmin, dtype_meta.qmax)
            .astype(dtype)
        )
    else:
        # don't trick to combine with is_unsigned, seeing ``get_quantized_dtype``
        scale = arr_metadata["scale"]
        return np.round(arr / scale).clip(metadata.qmin, metadata.qmax).astype(dtype)
        return (
            np.round(arr / scale).clip(dtype_meta.qmin, dtype_meta.qmax).astype(dtype)
        )
 def _convert_from_quantized_dtype(arr: np.ndarray, dtype_str: str):
    metadata = _metadata_dict[dtype_str]
    arr_metadata = arr.dtype.metadata["mgb_dtype"]
 def _convert_from_quantized_dtype(arr: np.ndarray, dtype_meta: QuantDtypeMeta):
    if not isinstance(arr, np.ndarray):
        raise ValueError("arr parameter should be instance of np.ndarray")
    if not is_quantize(arr.dtype) or arr_metadata["name"] != metadata.name:
        raise ValueError("arr's dtype should be a {} dtype".format(dtype_str))
    is_unsigned = metadata.is_unsigned
    if is_unsigned:
    if (
        not is_quantize(arr.dtype)
        or arr.dtype.metadata["mgb_dtype"]["name"] != dtype_meta.cname
    ):
        raise ValueError("arr's dtype should be a {} dtype".format(dtype_meta))
    arr_metadata = arr.dtype.metadata["mgb_dtype"]
    if dtype_meta.is_unsigned:
        scale, zp = (
            arr_metadata["scale"],
            arr_metadata["zero_point"],
@@ -187,7 +243,7 @@ def convert_to_quint8(arr: np.ndarray, q: np.dtype):
    :param arr: Input ndarray.
    :param q: Target data type, should be a quint8.
    """
    return _convert_to_quantized_dtype(arr, q, "quint8")
    return _convert_to_quantized_dtype(arr, q, _builtin_quant_dtypes["quint8"])
 def convert_from_quint8(arr: np.ndarray):
@@ -196,7 +252,7 @@ def convert_from_quint8(arr: np.ndarray):
    :param arr: Input ndarray.
    """
    return _convert_from_quantized_dtype(arr, "quint8")
    return _convert_from_quantized_dtype(arr, _builtin_quant_dtypes["quint8"])
 def convert_to_qint8(arr: np.ndarray, q: np.dtype):
@@ -206,7 +262,7 @@ def convert_to_qint8(arr: np.ndarray, q: np.dtype):
    :param arr: Input ndarray.
    :param q: Target data type, should be a qint8.
    """
    return _convert_to_quantized_dtype(arr, q, "qint8")
    return _convert_to_quantized_dtype(arr, q, _builtin_quant_dtypes["qint8"])
 def convert_from_qint8(arr: np.ndarray):
@@ -215,7 +271,7 @@ def convert_from_qint8(arr: np.ndarray):
    :param arr: Input ndarray.
    """
    return _convert_from_quantized_dtype(arr, "qint8")
    return _convert_from_quantized_dtype(arr, _builtin_quant_dtypes["qint8"])
 def convert_to_qint32(arr: np.ndarray, q: np.dtype):
@@ -225,7 +281,7 @@ def convert_to_qint32(arr: np.ndarray, q: np.dtype):
    :param arr: Input ndarray.
    :param q: Target data type, should be a qint8.
    """
    return _convert_to_quantized_dtype(arr, q, "qint32")
    return _convert_to_quantized_dtype(arr, q, _builtin_quant_dtypes["qint32"])
 def convert_from_qint32(arr):
@@ -234,7 +290,7 @@ def convert_from_qint32(arr):
    :param arr: Input ndarray.
    """
    return _convert_from_quantized_dtype(arr, "qint32")
    return _convert_from_quantized_dtype(arr, _builtin_quant_dtypes["qint32"])
 def convert_to_quint4(arr: np.ndarray, q: np.dtype):
@@ -244,7 +300,7 @@ def convert_to_quint4(arr: np.ndarray, q: np.dtype):
    :param arr: Input ndarray.
    :param q: Target data type, should be a quint4.
    """
    return _convert_to_quantized_dtype(arr, q, "quint4")
    return _convert_to_quantized_dtype(arr, q, _builtin_quant_dtypes["quint4"])
 def convert_from_quint4(arr: np.ndarray):
@@ -253,7 +309,7 @@ def convert_from_quint4(arr: np.ndarray):
    :param arr: Input ndarray.
    """
    return _convert_from_quantized_dtype(arr, "quint4")
    return _convert_from_quantized_dtype(arr, _builtin_quant_dtypes["quint4"])
 def convert_to_qint4(arr: np.ndarray, q: np.dtype):
@@ -263,7 +319,7 @@ def convert_to_qint4(arr: np.ndarray, q: np.dtype):
    :param arr: Input ndarray.
    :param q: Target data type, should be a qint4.
    """
    return _convert_to_quantized_dtype(arr, q, "qint4")
    return _convert_to_quantized_dtype(arr, q, _builtin_quant_dtypes["qint4"])
 def convert_from_qint4(arr: np.ndarray):
@@ -272,4 +328,4 @@ def convert_from_qint4(arr: np.ndarray):
    :param arr: Input ndarray.
    """
    return _convert_from_quantized_dtype(arr, "qint4")
    return _convert_from_quantized_dtype(arr, _builtin_quant_dtypes["qint4"])
--- a/imperative/python/megengine/functional/nn.py
+++ b/imperative/python/megengine/functional/nn.py
@@ -203,7 +203,7 @@ def conv_transpose2d(
    assert compute_mode == "DEFAULT" or compute_mode.name == "DEFAULT"
    if groups != 1:
        raise NotImplementedError("TODO")
        raise NotImplementedError("group transposed conv2d is not supported yet.")
    stride_h, stride_w = expand_hw(stride)
    pad_h, pad_w = expand_hw(padding)
--- a/imperative/python/megengine/jit/tracing.py
+++ b/imperative/python/megengine/jit/tracing.py
@@ -13,7 +13,6 @@ import itertools
 import json
 import os
 import typing
 import warnings
 import weakref
 import numpy as np
--- a/imperative/python/megengine/module/module.py
+++ b/imperative/python/megengine/module/module.py
@@ -5,7 +5,6 @@
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 import warnings
 from abc import ABCMeta, abstractmethod
 from collections import OrderedDict
 from typing import Any, Callable, Iterable, Optional, Set, Tuple, Union
@@ -204,10 +203,9 @@ class Module(metaclass=ABCMeta):
        if "requires_grad" in kwargs:
            del kwargs["requires_grad"]
            warnings.warn(
            logger.warning(
                "Tensor currently has no requires_grad attribute "
                "so requires_grad argument is ignored here",
                DeprecationWarning,
                "so requires_grad argument is ignored here"
            )
        def predicate(obj) -> bool:
@@ -232,10 +230,9 @@ class Module(metaclass=ABCMeta):
        if "requires_grad" in kwargs:
            del kwargs["requires_grad"]
            warnings.warn(
            logger.warning(
                "Tensor currently has no requires_grad attribute "
                "so requires_grad argument is ignored here",
                DeprecationWarning,
                "so requires_grad argument is ignored here"
            )
        def predicate(obj) -> bool:
--- a/imperative/python/megengine/module/qat/module.py
+++ b/imperative/python/megengine/module/qat/module.py
@@ -7,7 +7,10 @@
 # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 from abc import abstractmethod
 from ...quantization import FakeQuantize, Observer, QConfig
 # avoid circular reference
 from ...quantization.fake_quant import FakeQuantize
 from ...quantization.observer import Observer
 from ...quantization.qconfig import QConfig
 from ...tensor import Tensor
 from ..module import Module
@@ -73,19 +76,19 @@ class QATModule(Module):
        # do observer
        if observer is None:
            oup = target
            q_dict = None
            qparams = None
        else:
            oup = observer(target)
            q_dict = observer.get_qparams()
            qparams = observer.get_qparams()
        # do fake quant
        if fake_quant is not None:
            oup = fake_quant(oup, q_dict)
            oup = fake_quant(oup, qparams)
            # use qparams of fake_quant if have.
            if hasattr(fake_quant, "get_qparams"):
                q_dict = fake_quant.get_qparams()
                qparams = fake_quant.get_qparams()
        # set to tensor qparams.
        if q_dict is not None:
            oup.q_dict.update(q_dict)
        if qparams is not None:
            oup.qparams.update(qparams)
        return oup
    def apply_quant_weight(self, target: Tensor):
@@ -118,7 +121,7 @@ class QATModule(Module):
        Get weight's quantization dtype as the method from ``qconfig``.
        """
        return self._get_method_result(
            "get_dtype", self.weight_fake_quant, self.weight_observer
            "get_quantized_dtype", self.weight_fake_quant, self.weight_observer
        )
    def get_activation_dtype(self):
@@ -126,7 +129,7 @@ class QATModule(Module):
        Get activation's quantization dtype as the method from ``qconfig``.
        """
        return self._get_method_result(
            "get_dtype", self.act_fake_quant, self.act_observer
            "get_quantized_dtype", self.act_fake_quant, self.act_observer
        )
    def get_weight_qparams(self):
--- a/imperative/python/megengine/quantization/init.py
+++ b/imperative/python/megengine/quantization/init.py
@@ -7,8 +7,7 @@
 # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 from .fake_quant import FakeQuantize
 from .internal_fake_quant import *
 from .observer import HistogramObserver, Observer
 from .observer import Observer
 from .qconfig import (
    QConfig,
    calibration_qconfig,
@@ -20,4 +19,15 @@ from .qconfig import (
    sync_ema_fakequant_qconfig,
    tqt_qconfig,
 )
 from .utils import QuantMode
 from .quantize import (
    apply_easy_quant,
    disable_fake_quant,
    disable_observer,
    enable_fake_quant,
    enable_observer,
    propagate_qconfig,
    quantize,
    quantize_qat,
    reset_qconfig,
 )
 from .utils import QParams, QuantMode, create_qparams
--- a/imperative/python/megengine/quantization/fake_quant.py
+++ b/imperative/python/megengine/quantization/fake_quant.py
@@ -6,40 +6,48 @@
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 import math
 from typing import Union
 from .. import functional as F
 from ..core.tensor.dtype import _metadata_dict, get_quantized_dtype
 from ..core.tensor.dtype import QuantDtypeMeta, _builtin_quant_dtypes
 from ..logger import get_logger
 from ..module import Module
 from ..tensor import Parameter, Tensor
 from .utils import QuantMode, fake_quant_tensor, get_qparam_dict, tqt_forward
 from ..tensor import Parameter
 from .utils import (
    QParams,
    QParamsModuleMixin,
    QuantMode,
    create_qparams,
    fake_quant_tensor,
    tqt_forward,
 )
 logger = get_logger(__name__)
 class _FakeQuantize(Module):
    r"""
    A Basic Fake Quant module.
    :param dtype: a string indicating the target quantization type of input.
    :param narrow_range: whether the absolute value of ``qmin`` is the same as ``qmax``,
        instead of 1 greater. Usually True for weight and False for activation.
    :param enable: whether do ``normal_forward`` or ``fake_quant_forward``.
    """
 class _FakeQuantize(Module):
    def __init__(
        self, dtype: str, narrow_range: bool = False, enable: bool = True, **kwargs
        self, dtype: Union[str, QuantDtypeMeta], enable: bool = True, **kwargs
    ):
        super().__init__()
        if not dtype in _metadata_dict.keys():
            raise ValueError(
                "unknown dtype: {}, only support {}".format(
                    dtype, _metadata_dict.keys()
        if isinstance(dtype, str):
            if not dtype in _builtin_quant_dtypes:
                raise ValueError(
                    "unknown dtype: {}, only support {}".format(
                        dtype, _builtin_quant_dtypes.keys()
                    )
                )
            dtype = _builtin_quant_dtypes[dtype]
        if "narrow_range" in kwargs:
            del kwargs["narrow_range"]
            logger.warning(
                "FakeQuantize currently has no narrow_range param "
                "so it is ignored here",
                exc_info=DeprecationWarning,
            )
        self.dtype = dtype
        self.narrow_range = narrow_range
        self.qmin = (
            -_metadata_dict[dtype].qmax if narrow_range else _metadata_dict[dtype].qmin
        )
        self.qmax = _metadata_dict[dtype].qmax
        self.qmin = dtype.qmin
        self.qmax = dtype.qmax
        self.enabled = enable
    def enable(self):
@@ -48,61 +56,64 @@ class _FakeQuantize(Module):
    def disable(self):
        self.enabled = False
    def fake_quant_forward(self, inp, q_dict=None):
        return inp
    def fake_quant_forward(self, inp, qparams: QParams = None):
        raise NotImplementedError
    def normal_foward(self, inp, q_dict=None):
    def normal_foward(self, inp, qparams: QParams = None):
        return inp
    def forward(self, inp, q_dict=None):
    def forward(self, inp, qparams: QParams = None):
        if self.enabled:
            return self.fake_quant_forward(inp, q_dict=q_dict)
            return self.fake_quant_forward(inp, qparams=qparams)
        else:
            return self.normal_foward(inp, q_dict=q_dict)
            return self.normal_foward(inp, qparams=qparams)
 class TQT(_FakeQuantize):
 class TQT(_FakeQuantize, QParamsModuleMixin):
    r"""
    TQT: https://arxiv.org/abs/1903.08066 Trained Quantization Thresholds
    for Accurate and Efficient Fixed-Point Inference of Deep Neural Networks.
    :param dtype: a string or :class:`~.QuantDtypeMeta` indicating the target
    quantization dtype of input.
    :param enable: whether do ``normal_forward`` or ``fake_quant_forward``.
    """
    def __init__(
        self, dtype: str, narrow_range: bool = False, enable: bool = True, **kwargs
        self, dtype: Union[str, QuantDtypeMeta], enable: bool = True, **kwargs
    ):
        super().__init__(dtype, narrow_range, enable, **kwargs)
        super().__init__(dtype, enable, **kwargs)
        self.scale = Parameter(0.0, dtype="float32")
    def fake_quant_forward(self, inp, q_dict=None):
    def fake_quant_forward(self, inp, qparams: QParams = None):
        # when enable, TQT will do fakequant forward, finetune the scale
        return tqt_forward(self.qmin, self.qmax, inp, self.scale)
    def get_qparams(self):
        q_dict = get_qparam_dict(QuantMode.SYMMERTIC)
        q_dict["scale"] = 2 ** self.scale.detach()
        return q_dict
    def set_qparams(self, q_dict):
    def set_qparams(self, qparams: QParams):
        assert (
            q_dict["mode"] == QuantMode.SYMMERTIC
            qparams.mode == QuantMode.SYMMERTIC
        ), "only symmetric quantization is supported by TQT"
        if "scale" not in q_dict or q_dict["scale"] is None:
        if qparams.scale is None:
            raise AssertionError("Can not get an initialized scale")
        self.scale._reset(F.log(q_dict["scale"]) / math.log(2))
        self.scale[...] = F.log(qparams.scale) / math.log(2)
    def get_dtype(self):
        q_dict = self.get_qparams()
        scale = None if "scale" not in q_dict else q_dict["scale"].numpy()
        zero_point = (
            None if "zero_point" not in q_dict else q_dict["zero_point"].numpy()
        )
        return get_quantized_dtype(self.dtype, scale, zero_point)
    def get_qparams(self):
        return create_qparams(QuantMode.SYMMERTIC, self.dtype, scale=2 ** self.scale)
 class FakeQuantize(_FakeQuantize):
    r"""
    A module to do quant and dequant according to observer's scale and zero_point.
    :param dtype: a string or :class:`~.QuantDtypeMeta` indicating the target
    quantization dtype of input.
    :param enable: whether do ``normal_forward`` or ``fake_quant_forward``.
    """
    def fake_quant_forward(self, inp, q_dict=None):
        return fake_quant_tensor(inp, self.qmin, self.qmax, q_dict)
    def fake_quant_forward(self, inp, qparams: QParams = None):
        assert (
            qparams.dtype_meta is self.dtype
        ), "input qparams' dtype is not equal to self.dtype.\nqparams.dtype_meta={}\nself.dtype={}".format(
            qparams.dtype_meta, self.dtype
        )
        return fake_quant_tensor(inp, qparams)
--- a/imperative/python/megengine/quantization/internal_fake_quant.py
+++ b/imperative/python/megengine/quantization/internal_fake_quant.py
@@ -16,4 +16,6 @@ from ..autodiff import Function
 from .fake_quant import _FakeQuantize
 from .observer import MinMaxObserver
 from .qconfig import QConfig
 from .utils import QParams
--- a/imperative/python/megengine/quantization/observer.py
+++ b/imperative/python/megengine/quantization/observer.py
@@ -8,51 +8,51 @@
 import math
 from abc import abstractmethod
 from copy import deepcopy
 from typing import Union
 import numpy as np
 from .. import functional as F
 from ..core.tensor.dtype import _metadata_dict, get_quantized_dtype
 from ..core.tensor.dtype import QuantDtypeMeta, _builtin_quant_dtypes
 from ..distributed import WORLD, get_rank, is_distributed
 from ..functional.distributed import all_reduce_max, all_reduce_min
 from ..logger import get_logger
 from ..module import Module
 from ..tensor import Tensor
 from .utils import QuantMode, get_qparam_dict
 from .utils import QParams, QParamsModuleMixin, QuantMode, create_qparams
 logger = get_logger(__name__)
 class Observer(Module):
 class Observer(Module, QParamsModuleMixin):
    r"""
    A base class for Observer Module.
    :param dtype: a string indicating to collect scale and zero_point of which dtype.
    :param narrow_range: whether the absolute value of ``qmin`` is the same as ``qmax``,
        instead of 1 greater. Usually True for weight and False for activation.
    """
    def __init__(self, dtype: str, narrow_range: bool = False, **kwargs):
    def __init__(self, dtype: Union[str, QuantDtypeMeta], **kwargs):
        super().__init__()
        if dtype not in _metadata_dict.keys():
            raise ValueError(
                "unknown dtype: {}, only support {}".format(
                    dtype, _metadata_dict.keys()
        if isinstance(dtype, str):
            if not dtype in _builtin_quant_dtypes:
                raise ValueError(
                    "unknown dtype: {}, only support {}".format(
                        dtype, _builtin_quant_dtypes.keys()
                    )
                )
            dtype = _builtin_quant_dtypes[dtype]
        if "narrow_range" in kwargs:
            del kwargs["narrow_range"]
            logger.warning(
                "FakeQuantize currently has no narrow_range param "
                "so it is ignored here",
                exc_info=DeprecationWarning,
            )
        self.dtype = dtype
        self.narrow_range = narrow_range
        self.qmin = (
            -_metadata_dict[dtype].qmax if narrow_range else _metadata_dict[dtype].qmin
        )
        self.qmax = _metadata_dict[dtype].qmax
        self.qmin = dtype.qmin
        self.qmax = dtype.qmax
        self.enabled = True
    def get_dtype(self):
        q_dict = self.get_qparams()
        numpy_scale = None if "scale" not in q_dict else q_dict["scale"].numpy()
        numpy_zero_point = (
            None if "zero_point" not in q_dict else q_dict["zero_point"].numpy()
        )
        return get_quantized_dtype(self.dtype, numpy_scale, numpy_zero_point)
    def enable(self):
        self.enabled = True
@@ -70,21 +70,16 @@ class Observer(Module):
    def forward(self, x):
        pass
    @abstractmethod
    def get_qparams(self, **kwargs):
        pass
 class MinMaxObserver(Observer):
    def __init__(
        self,
        mode=QuantMode.SYMMERTIC,
        eps=0.00001,
        dtype="qint8",
        narrow_range: bool = False,
        mode: QuantMode = QuantMode.SYMMERTIC,
        eps: float = 0.00001,
        dtype: Union[str, QuantDtypeMeta] = "qint8",
        **kwargs
    ):
        super().__init__(dtype, narrow_range, **kwargs)
        super().__init__(dtype, **kwargs)
        self.mode = mode
        self.min_val = Tensor(np.finfo(np.float32).max, dtype=np.float32)
        self.max_val = Tensor(np.finfo(np.float32).min, dtype=np.float32)
@@ -93,26 +88,22 @@ class MinMaxObserver(Observer):
    def _calculate_qparams(self, inp_min_val, inp_max_val):
        min_val = F.minimum(0.0, inp_min_val)
        max_val = F.maximum(0.0, inp_max_val)
        q_dict = get_qparam_dict(self.mode)
        q_dict["min_val"] = inp_min_val
        q_dict["max_val"] = inp_max_val
        q_dict["enable_observer"] = self.enable
        if self.mode == QuantMode.SYMMERTIC:
            symmetric_max_vals = F.maximum(-min_val, max_val)
            # use maximun to avoid scale too small at the begin
            q_dict["scale"] = F.maximum(
            scale = F.maximum(
                symmetric_max_vals / ((self.qmax - self.qmin) / 2), self.scale_limit
            )
            # zero_point = self.zero_point
            zero_point = None
        else:
            # use maximun to avoid scale too small at the begin
            q_dict["scale"] = F.maximum(
            scale = F.maximum(
                (max_val - min_val) / (self.qmax - self.qmin), self.scale_limit
            )
            # caculate zero_point
            q_dict["zero_point"] = self.qmin - F.round(min_val / q_dict["scale"])
            zero_point = self.qmin - F.round((min_val / scale))
        return q_dict
        return create_qparams(self.mode, self.dtype, scale=scale, zero_point=zero_point)
    def get_qparams(self):
        return self._calculate_qparams(self.min_val, self.max_val)
@@ -122,8 +113,8 @@ class MinMaxObserver(Observer):
            # stop gradient
            x = x_orig.detach()
            # find max and min
            self.min_val._reset(F.minimum(self.min_val, x.min()))
            self.max_val._reset(F.maximum(self.max_val, x.max()))
            self.min_val[...] = F.minimum(self.min_val, x.min())
            self.max_val[...] = F.maximum(self.max_val, x.max())
        return x_orig
@@ -137,42 +128,43 @@ class SyncMinMaxObserver(MinMaxObserver):
            else:
                min_x = x.min()
                max_x = x.max()
            self.min_val._reset(F.minimum(self.min_val, min_x))
            self.max_val._reset(F.maximum(self.max_val, max_x))
            self.min_val[...] = F.minimum(self.min_val, min_x)
            self.max_val[...] = F.maximum(self.max_val, max_x)
        return x_orig
 class ExponentialMovingAverageObserver(MinMaxObserver):
    def __init__(
        self,
        momentum=0.9,
        mode=QuantMode.SYMMERTIC,
        eps=0.00001,
        dtype="qint8",
        narrow_range: bool = False,
        momentum: float = 0.9,
        mode: QuantMode = QuantMode.SYMMERTIC,
        eps: float = 0.00001,
        dtype: Union[str, QuantDtypeMeta] = "qint8",
        **kwargs
    ):
        super().__init__(mode, eps, dtype, narrow_range, **kwargs)
        super().__init__(mode, eps, dtype, **kwargs)
        self.momentum = Tensor(momentum, dtype="float32")
        # used to avoid if-clauses in the first forward which is not supported
        # in trace mode.
        self.runtime_momentum = Tensor(0.0)
    def set_momentum(self, momentum):
        self.momentum = Tenosr(momentum, dtype="float32")
        self.momentum = Tensor(momentum, dtype="float32")
    def forward(self, x_orig):
        if self.enabled:
            # stop gradient
            x = x_orig.detach()
            # Exponential Moving Average
            self.min_val._reset(
            self.min_val[...] = (
                self.min_val * self.runtime_momentum
                + (1 - self.runtime_momentum) * x.min()
            )
            self.max_val._reset(
            self.max_val[...] = (
                self.max_val * self.runtime_momentum
                + (1 - self.runtime_momentum) * x.max()
            )
            self.runtime_momentum = self.momentum
            self.runtime_momentum[...] = self.momentum
        return x_orig
@@ -187,33 +179,34 @@ class SyncExponentialMovingAverageObserver(ExponentialMovingAverageObserver):
            else:
                min_x = x.min()
                max_x = x.max()
            self.min_val._reset(
            self.min_val[...] = (
                self.min_val * self.runtime_momentum
                + (1 - self.runtime_momentum) * min_x
            )
            self.max_val._reset(
            self.max_val[...] = (
                self.max_val * self.runtime_momentum
                + (1 - self.runtime_momentum) * max_x
            )
            self.runtime_momentum = self.momentum
            self.runtime_momentum[...] = self.momentum
        return x_orig
 class HistogramObserver(MinMaxObserver):
    def __init__(
        self,
        bins=2048,
        upsample_rate=128,
        mode=QuantMode.SYMMERTIC,
        eps=0.00001,
        dtype="qint8",
        narrow_range: bool = False,
        bins: int = 2048,
        upsample_rate: int = 128,
        mode: QuantMode = QuantMode.SYMMERTIC,
        eps: float = 0.00001,
        dtype: Union[str, QuantDtypeMeta] = "qint8",
        **kwargs
    ):
        super().__init__(mode, eps, dtype, narrow_range, **kwargs)
        super().__init__(mode, eps, dtype, **kwargs)
        self.bins = bins
        self.upsample_rate = upsample_rate
        self.dst_nbins = _metadata_dict[dtype].qmax - _metadata_dict[dtype].qmin + 1
        self.dst_nbins = (
            _builtin_quant_dtypes[dtype].qmax - _builtin_quant_dtypes[dtype].qmin + 1
        )
        self.histogram = Tensor([-1] + [0.0] * (bins - 1), dtype="float32")
    def _non_linear_param_search(self):
@@ -450,34 +443,45 @@ class HistogramObserver(MinMaxObserver):
 class PassiveObserver(Observer):
    r"""
    This class can be set :attr:`scale` derectly.
    An Observer that supports setting :attr:`scale` directly.
    """
    def __init__(self, dtype: str, narrow_range: bool = False, **kwargs):
        super().__init__(dtype, narrow_range, **kwargs)
        self.q_dict = None
    def __init__(self, dtype: Union[str, QuantDtypeMeta], **kwargs):
        super().__init__(dtype, **kwargs)
        self.qparams = None
        self.orig_scale = None
    @property
    def scale(self):
        return self.q_dict["scale"]
        return self.qparams.scale
    @scale.setter
    def scale(self, value):
        assert value > 0
        self.q_dict["scale"][...] = Tensor(value)
    def scale(self, value: np.ndarray):
        assert np.all(value > 0)
        self.qparams.scale[...] = Tensor(value)
    def get_qparams(self):
        return self.q_dict
        return self.qparams
    def set_qparams(self, q_dict):
        self.q_dict = deepcopy(q_dict)
        if "scale" not in q_dict or q_dict["scale"] is None:
    def set_qparams(self, qparams: QParams):
        """
        :param qparams: used to set initial scale.
        """
        self.qparams = deepcopy(qparams)
        if qparams.scale is None:
            raise AssertionError("Can not get an initialized scale")
        self.orig_scale = q_dict["scale"].numpy()
        if qparams.dtype_meta is None:
            qparams.dtype_meta = self.dtype
        else:
            assert (
                qparams.dtype_meta is self.dtype
            ), "input qparams' dtype is not equal to self.dtype.\nqparams.dtype_meta={}\nself.dtype={}".format(
                qparams.dtype_meta, self.dtype
            )
        self.orig_scale = qparams.scale.numpy()
    def forward(self, x):
        r"""
        Just return input because :attr:`q_dict` is set by :func:`~.apply_easy_quant`.
        Just return input because :attr:`qparams` is set by :func:`~.apply_easy_quant`.
        """
        return x
--- a/imperative/python/megengine/quantization/qconfig.py
+++ b/imperative/python/megengine/quantization/qconfig.py
@@ -5,6 +5,7 @@
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 from collections import namedtuple
 from functools import partial
 from ..module import Module
@@ -19,7 +20,13 @@ from .observer import (
 )
 class QConfig:
 # use namedtuple to make class immutable, comparable and easy to print
 class QConfig(
    namedtuple(
        "QConfig",
        ["weight_observer", "act_observer", "weight_fake_quant", "act_fake_quant"],
    )
 ):
    r"""
    A config class indicating how to do quantize toward :class:`~.QATModule`'s
    ``activation`` and ``weight``. See :meth:`~.QATModule.set_qconfig` for detail usage.
@@ -37,90 +44,66 @@ class QConfig:
        # Default EMA QConfig for QAT.
        ema_fakequant_qconfig = QConfig(
            weight_observer=partial(MinMaxObserver, dtype="qint8", narrow_range=True),
            act_observer=partial(ExponentialMovingAverageObserver, dtype="qint8", narrow_range=False),
            weight_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=True),
            act_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=False),
            weight_observer=partial(MinMaxObserver, dtype="qint8_narrow"),
            act_observer=partial(ExponentialMovingAverageObserver, dtype="qint8"),
            weight_fake_quant=partial(FakeQuantize, dtype="qint8_narrow"),
            act_fake_quant=partial(FakeQuantize, dtype="qint8"),
        )
    Each parameter is a ``class`` rather than an instance. And we recommand using ``functools.partial``
    to add initialization parameters of the ``class``, so that don't need to provide parameters in
    :meth:`~.QATModule.set_qconfig`.
    Usually we set ``narrow_range`` of weight related paramters to ``True`` and of activation related
    parameters to ``False``. For the result of multiplication and addition as ``a * b + c * d``, if
    four variables are all -128 of dtype ``qint8``, then the result will be ``2^15`` and cause overflow.
    Weights are commonly calculated in this way, so needed to narrow the range.
    Usually we choose narrow version dtype (like ``qint8_narrow``) for weight related
    paramters and normal version for activation related ones. For the result of
    multiplication and addition as ``a * b + c * d``, if four variables are all -128 of
    dtype ``qint8``, then the result will be ``2^15`` and cause overflow.
    Weights are commonly calculated in this way, so need to narrow qmin to -127.
    """
    def __init__(
        self, weight_observer, act_observer, weight_fake_quant, act_fake_quant
    ):
    def __new__(cls, weight_observer, act_observer, weight_fake_quant, act_fake_quant):
        if isinstance(act_observer, Module) or isinstance(weight_observer, Module):
            raise ValueError(
                "QConfig must not receive observer instance, please pass observer"
                " class generator using `partial(Observer, ...)` instead. Use"
                " partial(MyObserver, x=1) to override arguments to constructor if needed"
            )
        self.weight_observer = weight_observer
        self.act_observer = act_observer
        self.weight_fake_quant = weight_fake_quant
        self.act_fake_quant = act_fake_quant
    def __eq__(self, other):
        def eq(a, b):
            if isinstance(a, partial) and isinstance(b, partial):
                return all(
                    [a.func == b.func, a.args == b.args, a.keywords == b.keywords]
                )
            else:
                return a == b
        return (
            eq(self.weight_observer, other.weight_observer)
            and eq(self.act_observer, other.act_observer)
            and eq(self.weight_fake_quant, other.weight_fake_quant)
            and eq(self.act_fake_quant, other.act_fake_quant)
        return super().__new__(
            cls, weight_observer, act_observer, weight_fake_quant, act_fake_quant
        )
 min_max_fakequant_qconfig = QConfig(
    weight_observer=partial(MinMaxObserver, dtype="qint8", narrow_range=True),
    act_observer=partial(MinMaxObserver, dtype="qint8", narrow_range=False),
    weight_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=True),
    act_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=False),
    weight_observer=partial(MinMaxObserver, dtype="qint8_narrow"),
    act_observer=partial(MinMaxObserver, dtype="qint8"),
    weight_fake_quant=partial(FakeQuantize, dtype="qint8_narrow"),
    act_fake_quant=partial(FakeQuantize, dtype="qint8"),
 )
 ema_fakequant_qconfig = QConfig(
    weight_observer=partial(MinMaxObserver, dtype="qint8", narrow_range=True),
    act_observer=partial(
        ExponentialMovingAverageObserver, dtype="qint8", narrow_range=False
    ),
    weight_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=True),
    act_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=False),
    weight_observer=partial(MinMaxObserver, dtype="qint8_narrow"),
    act_observer=partial(ExponentialMovingAverageObserver, dtype="qint8"),
    weight_fake_quant=partial(FakeQuantize, dtype="qint8_narrow"),
    act_fake_quant=partial(FakeQuantize, dtype="qint8"),
 )
 sync_ema_fakequant_qconfig = QConfig(
    weight_observer=partial(SyncMinMaxObserver, dtype="qint8", narrow_range=True),
    act_observer=partial(
        SyncExponentialMovingAverageObserver, dtype="qint8", narrow_range=False
    ),
    weight_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=True),
    act_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=False),
    weight_observer=partial(SyncMinMaxObserver, dtype="qint8_narrow"),
    act_observer=partial(SyncExponentialMovingAverageObserver, dtype="qint8"),
    weight_fake_quant=partial(FakeQuantize, dtype="qint8_narrow"),
    act_fake_quant=partial(FakeQuantize, dtype="qint8"),
 )
 ema_lowbit_fakequant_qconfig = QConfig(
    weight_observer=partial(MinMaxObserver, dtype="qint4", narrow_range=False),
    act_observer=partial(
        ExponentialMovingAverageObserver, dtype="qint4", narrow_range=False
    ),
    weight_fake_quant=partial(FakeQuantize, dtype="qint4", narrow_range=False),
    act_fake_quant=partial(FakeQuantize, dtype="qint4", narrow_range=False),
    weight_observer=partial(MinMaxObserver, dtype="qint4"),
    act_observer=partial(ExponentialMovingAverageObserver, dtype="qint4"),
    weight_fake_quant=partial(FakeQuantize, dtype="qint4"),
    act_fake_quant=partial(FakeQuantize, dtype="qint4"),
 )
 calibration_qconfig = QConfig(
    weight_observer=partial(MinMaxObserver, dtype="qint8", narrow_range=True),
    act_observer=partial(HistogramObserver, dtype="qint8", narrow_range=False),
    weight_observer=partial(MinMaxObserver, dtype="qint8_narrow"),
    act_observer=partial(HistogramObserver, dtype="qint8"),
    weight_fake_quant=None,
    act_fake_quant=None,
 )
@@ -128,15 +111,15 @@ calibration_qconfig = QConfig(
 tqt_qconfig = QConfig(
    weight_observer=None,
    act_observer=None,
    weight_fake_quant=partial(TQT, dtype="qint8", narrow_range=True),
    act_fake_quant=partial(TQT, dtype="qint8", narrow_range=False),
    weight_fake_quant=partial(TQT, dtype="qint8_narrow"),
    act_fake_quant=partial(TQT, dtype="qint8"),
 )
 passive_qconfig = QConfig(
    weight_observer=partial(PassiveObserver, dtype="qint8", narrow_range=True),
    act_observer=partial(PassiveObserver, dtype="qint8", narrow_range=False),
    weight_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=True),
    act_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=False),
    weight_observer=partial(PassiveObserver, dtype="qint8_narrow"),
    act_observer=partial(PassiveObserver, dtype="qint8"),
    weight_fake_quant=partial(FakeQuantize, dtype="qint8_narrow"),
    act_fake_quant=partial(FakeQuantize, dtype="qint8"),
 )
 easyquant_qconfig = passive_qconfig
--- a/imperative/python/megengine/quantization/quantize.py
+++ b/imperative/python/megengine/quantization/quantize.py
@@ -18,6 +18,7 @@ from ..module import qat as QAT
 from ..module import quantized as Quantized
 from ..module.qat import QATModule
 from ..module.quantized import QuantizedModule
 from ..tensor import Tensor
 from .qconfig import QConfig, ema_fakequant_qconfig
@@ -147,10 +148,10 @@ def reset_qconfig(module: Module, qconfig: QConfig, inplace: bool = True):
    if not inplace:
        module = deepcopy(module)
    def safe_call(func, q_dict):
    def safe_call(func, qparams):
        inst = func() if func is not None else None
        if inst is not None and getattr(inst, "set_qparams", None) is not None:
            inst.set_qparams(q_dict)
            inst.set_qparams(qparams)
        return inst
    def is_qat(mod: Module):
@@ -158,13 +159,13 @@ def reset_qconfig(module: Module, qconfig: QConfig, inplace: bool = True):
    for m in list(module._flatten(predicate=is_qat)):
        if m.with_weight:
            weight_q_dict = m.get_weight_qparams()
            m.weight_observer = safe_call(qconfig.weight_observer, weight_q_dict)
            m.weight_fake_quant = safe_call(qconfig.weight_fake_quant, weight_q_dict)
            weight_params = m.get_weight_qparams()
            m.weight_observer = safe_call(qconfig.weight_observer, weight_params)
            m.weight_fake_quant = safe_call(qconfig.weight_fake_quant, weight_params)
        if m.with_act:
            act_q_dict = m.get_activation_qparams()
            m.act_observer = safe_call(qconfig.act_observer, act_q_dict)
            m.act_fake_quant = safe_call(qconfig.act_fake_quant, act_q_dict)
            act_params = m.get_activation_qparams()
            m.act_observer = safe_call(qconfig.act_observer, act_params)
            m.act_fake_quant = safe_call(qconfig.act_fake_quant, act_params)
    return module
@@ -202,7 +203,9 @@ def hook_qat_module(module: Module, func: Callable):
    return hooks
 def apply_easy_quant(module, data, start=0.8, stop=1.2, num=40):
 def apply_easy_quant(
    module: Module, data: Tensor, start: float = 0.8, stop: float = 1.2, num: int = 40
 ):
    r"""
    Implementation of ``EasyQuant``: https://arxiv.org/pdf/2006.16669.
    Search for optimal scales.
--- a/imperative/python/megengine/quantization/utils.py
+++ b/imperative/python/megengine/quantization/utils.py
@@ -5,9 +5,10 @@
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 import abc
 from enum import Enum
 from functools import partial, update_wrapper, wraps
 from typing import Dict
 from typing import Union
 import numpy as np
@@ -15,7 +16,11 @@ from .. import functional as F
 from ..autodiff import Function
 from ..core._imperative_rt.core2 import apply
 from ..core.ops import builtin
 from ..core.tensor.dtype import _metadata_dict
 from ..core.tensor.dtype import (
    QuantDtypeMeta,
    _builtin_quant_dtypes,
    create_quantized_dtype,
 )
 from ..tensor import Tensor
@@ -61,37 +66,100 @@ class QuantMode(Enum):
    ASYMMERTIC = 2
 qparam_dict = {
    QuantMode.SYMMERTIC: {"mode": QuantMode.SYMMERTIC, "scale": None},
    QuantMode.ASYMMERTIC: {
        "mode": QuantMode.ASYMMERTIC,
        "scale": None,
        "zero_point": None,
    },
 class QParams:
    """
    To standardize FakeQuant, Observer and Tensor's qparams format. If custom
    qparams is needed, inherit this class and add custom ``__slots__``.
    """
    __slots__ = "mode", "dtype_meta", "scale", "zero_point"
    def __init__(
        self,
        mode: QuantMode,
        dtype_meta: QuantDtypeMeta,
        scale: Tensor,
        zero_point: Tensor,
    ):
        self.mode = mode
        self.dtype_meta = dtype_meta
        self.scale = scale
        self.zero_point = zero_point
    def update(self, qparams: "QParams"):
        for key in self.__slots__:
            setattr(self, key, getattr(qparams, key))
    def __eq__(self, other):
        if len(self.__slots__) != len(other.__slots__):
            return False
        for key in self.__slots__:
            if not hasattr(other, key) or getattr(self, key) != getattr(other, key):
                return False
        return True
    def __repr__(self):
        content = ", ".join(
            ["{}={}".format(key, getattr(self, key)) for key in self.__slots__]
        )
        return "QParams({})".format(content)
 class QParamsModuleMixin(abc.ABC):
    def get_quantized_dtype(self):
        qparams = self.get_qparams()
        dtype = qparams.dtype_meta
        scale = float(qparams.scale.numpy()) if qparams.scale is not None else None
        zero_point = (
            int(qparams.zero_point.numpy()) if qparams.zero_point is not None else None
        )
        return create_quantized_dtype(dtype, scale, zero_point)
    @abc.abstractmethod
    def get_qparams(self) -> QParams:
        pass
 _builtin_qparams = {
    QuantMode.SYMMERTIC: partial(QParams, mode=QuantMode.SYMMERTIC),
    QuantMode.ASYMMERTIC: partial(QParams, mode=QuantMode.ASYMMERTIC),
 }
 def get_qparam_dict(mode: QuantMode):
 def create_qparams(
    mode: QuantMode = QuantMode.SYMMERTIC,
    dtype_meta: Union[str, QuantDtypeMeta] = None,
    scale: Tensor = None,
    zero_point: Tensor = None,
 ):
    """
    Return the quantization parameters dictionary according to the mode.
    Return :class:`~.QParams` according to the mode.
    """
    return qparam_dict.get(mode, None)
    if isinstance(dtype_meta, str):
        dtype_meta = _builtin_quant_dtypes[dtype_meta]
    if mode is None:
        return QParams(mode, dtype_meta, scale, zero_point)
    assert isinstance(mode, QuantMode)
    return _builtin_qparams[mode](
        dtype_meta=dtype_meta, scale=scale, zero_point=zero_point
    )
 def fake_quant_tensor(inp: Tensor, qmin: int, qmax: int, q_dict: Dict) -> Tensor:
 def fake_quant_tensor(inp: Tensor, qparams: QParams) -> Tensor:
    """
    Apply fake quantization to the inp tensor.
    :param inp: the input tensor which need to be faked.
    :param qmin: the minimum value which the integer limit to.
    :param qmax: the maximum value which the integer limit to.
    :param q_dict: the quantization parameter dict.
    :param qparams: to get mode, qmin, qmax, scale and zero_point from.
    """
    scale = q_dict["scale"]
    zero_point = Tensor([0.0], dtype=np.float32)
    if q_dict["mode"] == QuantMode.ASYMMERTIC:
        zero_point = q_dict["zero_point"]
    scale = qparams.scale
    if qparams.mode == QuantMode.ASYMMERTIC:
        zero_point = qparams.zero_point
    else:
        zero_point = Tensor([0.0], dtype=np.float32)
    qmin = qparams.dtype_meta.qmin
    qmax = qparams.dtype_meta.qmax
    op = builtin.FakeQuant(qmin=qmin, qmax=qmax)
    return apply(op, inp, scale, zero_point)[0]
@@ -104,22 +172,34 @@ def fake_quant_bias(bias: Tensor, inp: Tensor, w_qat: Tensor) -> Tensor:
    :param bias: the bias tensor which need to be faked.
    :param inp:  the input tensor which contain the quantization parameters.
    :param qmax: the weight tensor which contain the quantization parameters.
    :param w_qat: the weight tensor which contain the quantization parameters.
    .. warning::
        Only work for symmetric quantization method now.
    """
    b_qat = bias
    if hasattr(inp, "q_dict") and b_qat is not None:
        if inp.q_dict["scale"] is not None and w_qat.q_dict["scale"] is not None:
            # use the same mode with weight.
            b_dict = get_qparam_dict(w_qat.q_dict["mode"])
            b_dict["scale"] = inp.q_dict["scale"] * w_qat.q_dict["scale"]
            # TODO: add zero_point for ASYMMERTIC mode.
            qmax = _metadata_dict["qint32"].qmax
            qmin = _metadata_dict["qint32"].qmin
            b_qat = fake_quant_tensor(b_qat, qmin, qmax, b_dict)
            b_qat.q_dict.update(b_dict)
    if (
        getattr(inp, "qparams", None) is not None
        and getattr(w_qat, "qparams", None) is not None
        and bias is not None
    ):
        inp_params = inp.qparams
        w_params = w_qat.qparams
        if inp_params.scale is not None and w_params.scale is not None:
            assert inp_params.mode == w_params.mode, "incompatible QuantMode"
            # TODO: support quint8 dtype.
            assert (
                inp_params.dtype_meta.np_dtype_str == "int8"
                and w_params.dtype_meta.np_dtype_str == "int8"
            ), "fake_quant_bias only support int8 like dtype now"
            # use the same mode with weight.
            # TODO: avoid hardcode
            b_dtype = _builtin_quant_dtypes["qint32"]
            b_param = create_qparams(
                w_params.mode, b_dtype, scale=inp_params.scale * w_params.scale
            )
            b_qat = fake_quant_tensor(bias, b_param)
            b_qat.qparams.update(b_param)
    return b_qat
--- a/imperative/python/megengine/tensor.py
+++ b/imperative/python/megengine/tensor.py
@@ -22,6 +22,8 @@ from .logger import get_logger
 from .utils.deprecation import deprecated
 from .utils.naming import auto_naming
 logger = get_logger(__name__)
 class Tensor(_Tensor, ArrayMethodMixin):
    r"""
@@ -30,7 +32,7 @@ class Tensor(_Tensor, ArrayMethodMixin):
    grad = None
    dmap_callback = None
    _q_dict = None
    _qparams = None
    def __new__(
        cls, data, dtype=None, device=None, is_const=False, no_cache=False, name=None
@@ -50,7 +52,7 @@ class Tensor(_Tensor, ArrayMethodMixin):
        if isinstance(data, _Tensor):
            if dtype is not None:
                get_logger().warning(
                logger.warning(
                    "dtype does not work when creating a new Tensor with another Tensor"
                )
            obj = _Tensor.__new__(cls, data)
@@ -101,10 +103,12 @@ class Tensor(_Tensor, ArrayMethodMixin):
        return super().dtype
    @property
    def q_dict(self):
        if self._q_dict is None:
            self._q_dict = {"mode": None, "scale": None, "zero_point": None}
        return self._q_dict
    def qparams(self):
        from .quantization.utils import create_qparams  # pylint: disable=all
        if self._qparams is None:
            self._qparams = create_qparams()
        return self._qparams
    def numpy(self) -> np.ndarray:
        r"""
@@ -185,14 +189,29 @@ class Tensor(_Tensor, ArrayMethodMixin):
    def __getstate__(self):
        r""" __getstate__ will be called for pickle serialization or deep copy
        """
        state = {
            "qdict": self.q_dict,
            "numpy": self.numpy(),
            "dtype": self.dtype,
            "device": self.device.logical_name,
        }
        if self._qparams is not None:
            state["qparams"] = self._qparams
        return state
    def __setstate__(self, state):
        self._q_dict = state.pop("qdict")
        from .quantization.utils import create_qparams  # pylint: disable=all
        if "qdict" in state:
            qparams = state.pop("qdict")
            logger.warning(
                "Tensor's 'qdict' state is depreciated. Use 'qparams' instead"
            )
        elif "qparams" in state:
            qparams = state.pop("qparams")
        else:
            qparams = None
        self._reset(Tensor(state.pop("numpy"), state.pop("dtype"), state.pop("device")))
        self._qparams = qparams
 tensor = Tensor
--- a/imperative/python/test/unit/core/test_dtype_quant.py
+++ b/imperative/python/test/unit/core/test_dtype_quant.py
@@ -14,7 +14,7 @@ import pytest
 import megengine.core.tensor.megbrain_graph as G
 from megengine.core.ops import builtin as ops
 from megengine.core.tensor.dtype import (
    _metadata_dict,
    _builtin_quant_dtypes,
    convert_from_qint4,
    convert_from_qint8,
    convert_from_quint4,
@@ -76,10 +76,10 @@ def _get_compiled_result(inp, dtype, shape, device, calc_func=None):
 def _check_result_attr(oup, dtype, dtype_str, is_unsigned=True):
    metadata = _metadata_dict[dtype_str]
    metadata = _builtin_quant_dtypes[dtype_str]
    assert "mgb_dtype" in oup.dtype.metadata
    assert is_quantize(oup.dtype)
    np.testing.assert_equal(oup.dtype.metadata["mgb_dtype"]["name"], metadata.name)
    np.testing.assert_equal(oup.dtype.metadata["mgb_dtype"]["name"], metadata.cname)
    np.testing.assert_allclose(get_scale(oup.dtype), get_scale(dtype))
    if is_unsigned:
        np.testing.assert_equal(get_zero_point(oup.dtype), get_zero_point(dtype))
--- a/imperative/python/test/unit/core/test_serialization.py
+++ b/imperative/python/test/unit/core/test_serialization.py
@@ -65,9 +65,9 @@ def test_tensor_serialization():
    with TemporaryFile() as f:
        a = Tensor(0)
        a.q_dict["scale"] = Tensor(1.0)
        a.qparams.scale = Tensor(1.0)
        pickle.dump(a, f)
        f.seek(0)
        b = pickle.load(f)
        assert isinstance(b.q_dict["scale"], Tensor)
        np.testing.assert_equal(b.q_dict["scale"].numpy(), 1.0)
        assert isinstance(b.qparams.scale, Tensor)
        np.testing.assert_equal(b.qparams.scale.numpy(), 1.0)
--- a/imperative/python/test/unit/core/test_tensor_wrapper.py
+++ b/imperative/python/test/unit/core/test_tensor_wrapper.py
@@ -6,6 +6,8 @@
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 import copy
 import numpy as np
 from megengine.core.tensor.dtype import get_scale, get_zero_point, qint8, quint8
@@ -86,3 +88,23 @@ def test_as_type():
    b = a.astype(quint8(0.3, 128))
    np.testing.assert_almost_equal(get_scale(b.dtype), 0.3)
    np.testing.assert_equal(get_zero_point(b.dtype), 128)
 def test_qparams():
    x = Tensor(1)
    assert x.qparams.scale is None
    x.qparams.scale = Tensor(1.0)
    assert x.qparams.scale.numpy() == 1.0
    x2 = copy.copy(x)
    assert x.qparams is x2.qparams and x2.qparams.scale.numpy() == 1.0
    x3 = copy.deepcopy(x)
    assert x.qparams is not x3.qparams and x3.qparams.scale.numpy() == 1.0
 def test_name():
    x = Tensor(0)
    assert x.name == ""
    x.name = "x"
    assert x.name == "x"
    x = Tensor(0, name="x")
    assert x.name == "x"
--- a/imperative/python/test/unit/functional/test_tensor.py
+++ b/imperative/python/test/unit/functional/test_tensor.py
@@ -406,28 +406,3 @@ def test_copy_d2h():
 def test_copy_d2d():
    copy_test("gpu0", "gpu1")
    copy_test("gpu0:0", "gpu0:1")
 def test_name():
    x = tensor(0)
    assert x.name == ""
    x.name = "x"
    assert x.name == "x"
    x = tensor(0, name="x")
    assert x.name == "x"
 def test_q_dict():
    x = tensor(1)
    assert x.q_dict["scale"] is None
    x.q_dict["scale"] = tensor(1.0)
    y = tensor(1)
    assert y.q_dict["scale"] is None
    y.q_dict["scale"] = tensor(2.0)
    assert x.q_dict["scale"].numpy() == 1.0
    assert y.q_dict["scale"].numpy() == 2.0
    z = x + y
    assert z.q_dict["scale"] is None
--- a/imperative/python/test/unit/quantization/test_fake_quant.py
+++ b/imperative/python/test/unit/quantization/test_fake_quant.py
@@ -12,9 +12,15 @@ import pytest
 import megengine as mge
 from megengine import tensor
 from megengine.core.autodiff.grad import Function, Grad
 from megengine.core.tensor.dtype import QuantDtypeMeta
 from megengine.core.tensor.utils import make_shape_tuple
 from megengine.quantization.internal_fake_quant import *
 from megengine.quantization.utils import QuantMode, fake_quant_tensor, tqt_forward
 from megengine.quantization.utils import (
    QuantMode,
    create_qparams,
    fake_quant_tensor,
    tqt_forward,
 )
 class TQT_numpy:
@@ -111,16 +117,14 @@ def fake_quant_tensor_gt(inp, scale, zero_point, qmin, qmax):
 def test_fakequant():
    qmin = -126
    qmax = 129
    test_dtype = QuantDtypeMeta("test_qint8", None, "int8", qmin, qmax)
    def run(zero_point, scale):
        q_dict = {}
        q_dict["mode"] = QuantMode.ASYMMERTIC
        q_dict["scale"] = scale
        q_dict["zero_point"] = zero_point
        qparams = create_qparams(QuantMode.ASYMMERTIC, test_dtype, scale, zero_point)
        inp_data = np.random.uniform(low=-512.0, high=512.0, size=(1, 32, 32, 32))
        inp = tensor(inp_data, dtype=np.float32)
        # test forward
        oup = fake_quant_tensor(inp, qmin, qmax, q_dict).numpy()
        oup = fake_quant_tensor(inp, qparams).numpy()
        oup_gt = fake_quant_tensor_gt(inp, scale, zero_point, qmin, qmax).numpy()
        assert np.allclose(oup, oup_gt)
        assert oup.shape == oup_gt.shape
@@ -128,7 +132,7 @@ def test_fakequant():
        # test backward
        x = tensor(inp_data, dtype=np.float32)
        grad = Grad().wrt(x, callback=_save_to(x))
        y = fake_quant_tensor(x, qmin, qmax, q_dict)
        y = fake_quant_tensor(x, qparams)
        grad(y, tensor(F.ones_like(x)))
        x1 = tensor(inp_data, dtype=np.float32)
--- a/imperative/python/test/unit/quantization/test_module.py
+++ b/imperative/python/test/unit/quantization/test_module.py
@@ -10,7 +10,13 @@ import megengine.module.qat as QAT
 import megengine.module.quantized as Q
 from megengine import Parameter, Tensor
 from megengine.core.tensor import dtype
 from megengine.quantization import FakeQuantize, MinMaxObserver, QConfig
 from megengine.quantization import (
    FakeQuantize,
    MinMaxObserver,
    QConfig,
    QuantMode,
    create_qparams,
 )
 from megengine.quantization.quantize import (
    disable_fake_quant,
    disable_observer,
@@ -18,10 +24,10 @@ from megengine.quantization.quantize import (
 )
 min_max_fakequant_qconfig = QConfig(
    weight_observer=partial(MinMaxObserver, dtype="qint8", narrow_range=True),
    act_observer=partial(MinMaxObserver, dtype="qint8", narrow_range=False),
    weight_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=True),
    act_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=False),
    weight_observer=partial(MinMaxObserver, dtype="qint8_narrow"),
    act_observer=partial(MinMaxObserver, dtype="qint8"),
    weight_fake_quant=partial(FakeQuantize, dtype="qint8_narrow"),
    act_fake_quant=partial(FakeQuantize, dtype="qint8"),
 )
 inp_scale = np.float32(np.random.rand() + 1)
@@ -111,7 +117,7 @@ def test_dequant_stub():
    x = mge.tensor(np.random.normal(size=(3, 3)).astype("float32"))
    x = fake_quant_act(x, inp_scale)
    x.q_dict["scale"] = inp_scale
    x.qparams.scale = inp_scale
    normal = normal_net(x)
    qat_without_fakequant = qat_from_float(x)
@@ -146,12 +152,12 @@ def test_elemwise(kind):
    x1_scale = np.float32(np.random.rand() + 1)
    x1 = mge.tensor(np.random.normal(size=(3, 3)).astype("float32"))
    x1 = fake_quant_act(x1, x1_scale)
    x1.q_dict["scale"] = x1_scale
    x1.qparams.scale = x1_scale
    x2_scale = np.float32(np.random.rand() + 1)
    x2 = mge.tensor(np.random.normal(size=(3, 3)).astype("float32"))
    x2 = fake_quant_act(x2, x2_scale)
    x2.q_dict["scale"] = x2_scale
    x2.qparams.scale = x2_scale
    x1_int8 = quant(x1, x1_scale)
    x2_int8 = quant(x2, x2_scale)
@@ -187,7 +193,7 @@ def test_linear():
    x = mge.tensor(np.random.normal(size=(3, 3)).astype("float32"))
    x = fake_quant_act(x, inp_scale)
    x.q_dict["scale"] = inp_scale
    x.qparams.update(create_qparams(QuantMode.SYMMERTIC, "qint8", inp_scale))
    x_int8 = quant(x, inp_scale)
@@ -230,7 +236,7 @@ def test_conv(module):
    x = mge.tensor(np.random.normal(size=(1, 3, 3, 3)).astype("float32"))
    x = fake_quant_act(x, inp_scale)
    x.q_dict["scale"] = inp_scale
    x.qparams.update(create_qparams(QuantMode.SYMMERTIC, "qint8", inp_scale))
    x_int8 = quant(x, inp_scale)
--- a/imperative/python/test/unit/quantization/test_observer.py
+++ b/imperative/python/test/unit/quantization/test_observer.py
@@ -6,6 +6,7 @@ import pytest
 import megengine as mge
 import megengine.distributed as dist
 from megengine.distributed.helper import get_device_count_by_fork
 from megengine.quantization import QuantMode, create_qparams
 from megengine.quantization.observer import (
    ExponentialMovingAverageObserver,
    HistogramObserver,
@@ -56,14 +57,14 @@ def test_histogram_observer():
 def test_passive_observer():
    q_dict = {"scale": mge.tensor(1.0)}
    qparams = create_qparams(QuantMode.SYMMERTIC, "qint8", mge.tensor(1.0))
    m = PassiveObserver("qint8")
    m.set_qparams(q_dict)
    m.set_qparams(qparams)
    assert m.orig_scale == 1.0
    assert m.scale == 1.0
    m.scale = 2.0
    assert m.scale == 2.0
    assert m.get_qparams() == {"scale": mge.tensor(2.0)}
    assert m.scale.numpy() == 1.0
    assert m.get_qparams().dtype_meta == qparams.dtype_meta
    assert m.get_qparams().scale == qparams.scale
    assert m.get_qparams() == qparams
@pytest.mark.require_ngpu(2)
--- a/imperative/python/test/unit/quantization/test_op.py
+++ b/imperative/python/test/unit/quantization/test_op.py
@@ -6,6 +6,7 @@ import megengine.functional as F
 from megengine.core.tensor import dtype
 from megengine.distributed.helper import get_device_count_by_fork
 from megengine.functional.elemwise import _elemwise_multi_type, _elwise
 from megengine.quantization import QuantMode, create_qparams
 def quant(x, scale):
@@ -26,13 +27,13 @@ def test_elemwise(kind):
    x1 = mge.tensor(np.random.normal(size=(3, 3)).astype("float32"))
    x1_scale = np.float32(np.random.rand() + 1)
    x1 = fake_quant(x1, x1_scale)
    x1.q_dict["scale"] = x1_scale
    x1.qparams.update(create_qparams(QuantMode.SYMMERTIC, "qint8", x1_scale))
    x1_int8 = quant(x1, x1_scale)
    x2 = mge.tensor(np.random.normal(size=(3, 3)).astype("float32"))
    x2_scale = np.float32(np.random.rand() + 1)
    x2 = fake_quant(x2, x2_scale)
    x2.q_dict["scale"] = x2_scale
    x2.qparams.update(create_qparams(QuantMode.SYMMERTIC, "qint8", x2_scale))
    x2_int8 = quant(x2, x2_scale)
    output_scale = np.float32(np.random.rand() + 1)
--- a/imperative/python/test/unit/quantization/test_qconfig.py
+++ b/imperative/python/test/unit/quantization/test_qconfig.py
@@ -1,14 +0,0 @@
 from functools import partial
 from megengine.quantization import QConfig, tqt_qconfig
 from megengine.quantization.fake_quant import TQT
 def test_equal():
    qconfig = QConfig(
        weight_observer=None,
        act_observer=None,
        weight_fake_quant=partial(TQT, dtype="qint8", narrow_range=True),
        act_fake_quant=partial(TQT, dtype="qint8", narrow_range=False),
    )
    assert qconfig == tqt_qconfig
--- a/imperative/python/test/unit/quantization/test_quantize.py
+++ b/imperative/python/test/unit/quantization/test_quantize.py
@@ -33,7 +33,7 @@ from megengine.quantization.quantize import (
 )
 class Net(Float.Module):
 class FloatNet(Float.Module):
    def __init__(self):
        super().__init__()
        self.quant = Float.QuantStub()
@@ -113,25 +113,25 @@ def test_reset_qconfig():
 def test_enable_and_disable_observer():
    net = init_qat_net()
    enable_observer(net)
    assert net.quant.act_observer.enabled == True
    assert net.linear.weight_observer.enabled == True
    assert net.linear.act_observer.enabled == True
    assert net.quant.act_observer.enabled is True
    assert net.linear.weight_observer.enabled is True
    assert net.linear.act_observer.enabled is True
    disable_observer(net)
    assert net.quant.act_observer.enabled == False
    assert net.linear.weight_observer.enabled == False
    assert net.linear.act_observer.enabled == False
    assert net.quant.act_observer.enabled is False
    assert net.linear.weight_observer.enabled is False
    assert net.linear.act_observer.enabled is False
 def test_enable_and_disable_fake_quant():
    net = init_qat_net()
    disable_fake_quant(net)
    assert net.quant.act_fake_quant.enabled == False
    assert net.linear.weight_fake_quant.enabled == False
    assert net.linear.act_fake_quant.enabled == False
    assert net.quant.act_fake_quant.enabled is False
    assert net.linear.weight_fake_quant.enabled is False
    assert net.linear.act_fake_quant.enabled is False
    enable_fake_quant(net)
    assert net.quant.act_fake_quant.enabled == True
    assert net.linear.weight_fake_quant.enabled == True
    assert net.linear.act_fake_quant.enabled == True
    assert net.quant.act_fake_quant.enabled is True
    assert net.linear.weight_fake_quant.enabled is True
    assert net.linear.act_fake_quant.enabled is True
 def init_observer(module, data):
@@ -144,7 +144,7 @@ def init_observer(module, data):
 def test_enable_and_disable_all():
    x = Tensor(np.random.randint(1, 10, size=(3, 3)).astype(np.float32))
    net = Net()
    net = FloatNet()
    y1 = net(x).numpy()
    net = quantize_qat(net, min_max_fakequant_qconfig)
@@ -162,7 +162,7 @@ def test_enable_and_disable_all():
 def test_quantize_qat():
    net = Net()
    net = FloatNet()
    qat_net = quantize_qat(net, inplace=False, qconfig=min_max_fakequant_qconfig)
    assert isinstance(qat_net.quant, QAT.QuantStub)
    assert isinstance(qat_net.linear, QAT.Linear)