You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

fake_quant.py 4.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  2. #
  3. # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4. #
  5. # Unless required by applicable law or agreed to in writing,
  6. # software distributed under the License is distributed on an
  7. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  8. import math
  9. from typing import Union
  10. from .. import functional as F
  11. from ..core.tensor.dtype import QuantDtypeMeta, _builtin_quant_dtypes
  12. from ..logger import get_logger
  13. from ..module import Module
  14. from ..tensor import Parameter
  15. from .utils import (
  16. QParams,
  17. QParamsModuleMixin,
  18. QuantMode,
  19. create_qparams,
  20. fake_quant_tensor,
  21. tqt_forward,
  22. )
  23. logger = get_logger(__name__)
  24. class _FakeQuantize(Module):
  25. def __init__(
  26. self, dtype: Union[str, QuantDtypeMeta], enable: bool = True, **kwargs
  27. ):
  28. super().__init__()
  29. if isinstance(dtype, str):
  30. if not dtype in _builtin_quant_dtypes:
  31. raise ValueError(
  32. "unknown dtype: {}, only support {}".format(
  33. dtype, _builtin_quant_dtypes.keys()
  34. )
  35. )
  36. dtype = _builtin_quant_dtypes[dtype]
  37. if "narrow_range" in kwargs:
  38. del kwargs["narrow_range"]
  39. logger.warning(
  40. "FakeQuantize currently has no narrow_range param "
  41. "so it is ignored here",
  42. exc_info=DeprecationWarning,
  43. )
  44. self.dtype = dtype
  45. self.qmin = dtype.qmin
  46. self.qmax = dtype.qmax
  47. self.enabled = enable
  48. def enable(self):
  49. self.enabled = True
  50. def disable(self):
  51. self.enabled = False
  52. def fake_quant_forward(self, inp, qparams: QParams = None):
  53. raise NotImplementedError
  54. def normal_foward(self, inp, qparams: QParams = None):
  55. return inp
  56. def forward(self, inp, qparams: QParams = None):
  57. if self.enabled:
  58. return self.fake_quant_forward(inp, qparams=qparams)
  59. else:
  60. return self.normal_foward(inp, qparams=qparams)
  61. class TQT(_FakeQuantize, QParamsModuleMixin):
  62. r"""
  63. TQT: https://arxiv.org/abs/1903.08066 Trained Quantization Thresholds
  64. for Accurate and Efficient Fixed-Point Inference of Deep Neural Networks.
  65. :param dtype: a string or :class:`~.QuantDtypeMeta` indicating the target
  66. quantization dtype of input.
  67. :param enable: whether do ``normal_forward`` or ``fake_quant_forward``.
  68. """
  69. def __init__(
  70. self, dtype: Union[str, QuantDtypeMeta], enable: bool = True, **kwargs
  71. ):
  72. super().__init__(dtype, enable, **kwargs)
  73. self.scale = Parameter(0.0, dtype="float32")
  74. def fake_quant_forward(self, inp, qparams: QParams = None):
  75. # when enable, TQT will do fakequant forward, finetune the scale
  76. return tqt_forward(self.qmin, self.qmax, inp, self.scale)
  77. def set_qparams(self, qparams: QParams):
  78. assert (
  79. qparams.mode == QuantMode.SYMMERTIC
  80. ), "only symmetric quantization is supported by TQT"
  81. if qparams.scale is None:
  82. raise AssertionError("Can not get an initialized scale")
  83. self.scale[...] = F.log(qparams.scale) / math.log(2)
  84. def get_qparams(self):
  85. return create_qparams(QuantMode.SYMMERTIC, self.dtype, scale=2 ** self.scale)
  86. class FakeQuantize(_FakeQuantize):
  87. r"""
  88. A module to do quant and dequant according to observer's scale and zero_point.
  89. :param dtype: a string or :class:`~.QuantDtypeMeta` indicating the target
  90. quantization dtype of input.
  91. :param enable: whether do ``normal_forward`` or ``fake_quant_forward``.
  92. """
  93. def fake_quant_forward(self, inp, qparams: QParams = None):
  94. assert (
  95. qparams.dtype_meta is self.dtype
  96. ), "input qparams' dtype is not equal to self.dtype.\nqparams.dtype_meta={}\nself.dtype={}".format(
  97. qparams.dtype_meta, self.dtype
  98. )
  99. return fake_quant_tensor(inp, qparams)

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台