You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

fake_quant.py 3.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  2. #
  3. # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  4. #
  5. # Unless required by applicable law or agreed to in writing,
  6. # software distributed under the License is distributed on an
  7. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  8. import math
  9. from typing import Iterable
  10. import numpy as np
  11. from .. import functional as F
  12. from ..core.autodiff.grad import Function
  13. from ..core.tensor.dtype import _metadata_dict, get_quantized_dtype
  14. from ..module import Module
  15. from ..tensor import Parameter, Tensor
  16. from .utils import QuantMode, fake_quant_tensor, get_qparam_dict, tqt_forward
  17. class _FakeQuantize(Module):
  18. r"""
  19. A Basic Fake Quant module.
  20. :param dtype: a string indicating the target quantization type of input.
  21. :param narrow_range: whether the absolute value of ``qmin`` is the same as ``qmax``,
  22. instead of 1 greater. Usually True for weight and False for activation.
  23. :param enable: whether do ``normal_forward`` or ``fake_quant_forward``.
  24. """
  25. def __init__(
  26. self, dtype: str, narrow_range: bool = False, enable: bool = True, **kwargs
  27. ):
  28. super().__init__()
  29. if not dtype in _metadata_dict.keys():
  30. raise ValueError(
  31. "unknown dtype: {}, only support {}".format(
  32. dtype, _metadata_dict.keys()
  33. )
  34. )
  35. self.dtype = dtype
  36. self.narrow_range = narrow_range
  37. self.qmin = (
  38. -_metadata_dict[dtype].qmax if narrow_range else _metadata_dict[dtype].qmin
  39. )
  40. self.qmax = _metadata_dict[dtype].qmax
  41. self.enabled = enable
  42. def enable(self):
  43. self.enabled = True
  44. def disable(self):
  45. self.enabled = False
  46. def fake_quant_forward(self, inp, q_dict=None):
  47. return inp
  48. def normal_foward(self, inp, q_dict=None):
  49. return inp
  50. def forward(self, inp, q_dict=None):
  51. if self.enabled:
  52. return self.fake_quant_forward(inp, q_dict=q_dict)
  53. else:
  54. return self.normal_foward(inp, q_dict=q_dict)
  55. class TQT(_FakeQuantize):
  56. r"""
  57. TQT: https://arxiv.org/abs/1903.08066 Trained Quantization Thresholds
  58. for Accurate and Efficient Fixed-Point Inference of Deep Neural Networks.
  59. """
  60. def __init__(
  61. self,
  62. q_dict,
  63. dtype: str,
  64. narrow_range: bool = False,
  65. enable: bool = True,
  66. **kwargs
  67. ):
  68. super().__init__(dtype, narrow_range, enable, **kwargs)
  69. assert (
  70. q_dict["mode"] == QuantMode.SYMMERTIC
  71. ), "only symmetric quantization is supported by TQT"
  72. if "scale" not in q_dict or q_dict["scale"] is None:
  73. raise AssertionError("Can not get an initialized scale")
  74. self.scale = Tensor(F.log(q_dict["scale"]) / math.log(2))
  75. def fake_quant_forward(self, inp, q_dict=None):
  76. # when enable, TQT will do fakequant forward, finetune the scale
  77. return tqt_forward(self.qmin, self.qmax, inp, self.scale)
  78. def get_qparams(self):
  79. q_dict = get_qparam_dict(QuantMode.SYMMERTIC)
  80. q_dict["scale"] = 2 ** self.scale
  81. return q_dict
  82. def get_dtype(self):
  83. q_dict = self.get_qparams()
  84. scale = None if "scale" not in q_dict else q_dict["scale"].numpy()[0]
  85. zero_point = (
  86. None if "zero_point" not in q_dict else q_dict["zero_point"].numpy()[0]
  87. )
  88. return get_quantized_dtype(self.dtype, scale, zero_point)
  89. class FakeQuantize(_FakeQuantize):
  90. r"""
  91. A module to do quant and dequant according to observer's scale and zero_point.
  92. """
  93. def fake_quant_forward(self, inp, q_dict=None):
  94. return fake_quant_tensor(inp, self.qmin, self.qmax, q_dict)

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台