You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

fake_quant.py 5.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  2. #
  3. # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  4. #
  5. # Unless required by applicable law or agreed to in writing,
  6. # software distributed under the License is distributed on an
  7. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  8. import math
  9. from typing import Iterable
  10. import numpy as np
  11. from .. import functional as F
  12. from ..core.tensor.dtype import _metadata_dict, get_quantized_dtype
  13. from ..core.tensor.function import Function
  14. from ..module import Module
  15. from ..tensor import Tensor
  16. from ..tensor_nn import Parameter
  17. from .utils import QuantMode, fake_quant_tensor, get_qparam_dict
  18. class _FakeQuantize(Module):
  19. r"""
  20. A Basic Fake Quant module.
  21. :param dtype: A string indicating the target quantization type of input.
  22. :param narrow_range: Whether the absolute value of ``qmin`` is the same as ``qmax``,
  23. instead of 1 greater. Usually True for weight and False for activation.
  24. :param enable: Whether do ``normal_forward`` or ``fake_quant_forward``.
  25. """
  26. def __init__(self, dtype: str, narrow_range: bool = False, enable: bool = True):
  27. super().__init__()
  28. if not dtype in _metadata_dict.keys():
  29. raise ValueError(
  30. "unknown dtype: {}, only support {}".format(
  31. dtype, _metadata_dict.keys()
  32. )
  33. )
  34. self.dtype = dtype
  35. self.narrow_range = narrow_range
  36. self.qmin = (
  37. -_metadata_dict[dtype].qmax if narrow_range else _metadata_dict[dtype].qmin
  38. )
  39. self.qmax = _metadata_dict[dtype].qmax
  40. self.enabled = enable
  41. def enable(self):
  42. self.enabled = True
  43. def disable(self):
  44. self.enabled = False
  45. def fake_quant_forward(self, inp, q_dict=None):
  46. return inp
  47. def normal_foward(self, inp, q_dict=None):
  48. return inp
  49. def forward(self, inp, q_dict=None):
  50. if self.enabled:
  51. return self.fake_quant_forward(inp, q_dict=q_dict)
  52. else:
  53. return self.normal_foward(inp, q_dict=q_dict)
  54. class TQT_Function(Function):
  55. def __init__(self, lowerbound, upperbound):
  56. super().__init__()
  57. self.lowerbound = lowerbound
  58. self.upperbound = upperbound
  59. self.saved_tensors = ()
  60. def save_for_backward(self, *tensors: Iterable[Tensor]):
  61. """
  62. Saves tensors needed for gradient computation. This method should be called only
  63. once in :meth:`~.function.Function.forward`, additional calls will replace values saved previously.
  64. The saved tensors can be accessed through the ``saved_tensors`` attribute.
  65. """
  66. self.saved_tensors = tensors
  67. def forward(self, inp, scale):
  68. t = 2 ** scale
  69. # t = F.maximum(t, 1e-4)
  70. inp_scaled = inp / t
  71. inp_clipped = F.maximum(F.minimum(inp_scaled, self.upperbound), self.lowerbound)
  72. inp_rounded = F.round(inp_clipped)
  73. inp_flq = inp_rounded * t
  74. self.save_for_backward(inp_scaled, inp_rounded, t)
  75. return inp_flq
  76. def backward(self, grad_inp_flq):
  77. (inp_scaled, inp_rounded, t) = self.saved_tensors
  78. mask_clip = F.logical_and(
  79. inp_scaled < -0.5 + self.lowerbound, inp_scaled > self.upperbound + 0.5
  80. ) # mask for accumulating the gradients of |data_scaled|>L
  81. mask_quant = F.logical_not(mask_clip)
  82. grad_quant = (
  83. grad_inp_flq * mask_quant * (inp_rounded - inp_scaled)
  84. ) # gradient within |data_scaled|<=L
  85. grad_clip = (
  86. grad_inp_flq * mask_clip * inp_rounded
  87. ) # gradient with | data_scaled|>L
  88. grad_s = grad_clip.sum() + grad_quant.sum()
  89. # dL/ds = dL/dt * t * ln(2)
  90. grad_s = grad_s * t * math.log(2)
  91. grad_inp = grad_inp_flq * mask_quant
  92. return grad_inp, grad_s
  93. class TQT(_FakeQuantize):
  94. r"""
  95. TQT: https://arxiv.org/abs/1903.08066 Trained Quantization Thresholds
  96. for Accurate and Efficient Fixed-Point Inference of Deep Neural Networks.
  97. """
  98. def __init__(self, dtype: str, narrow_range: bool = False, enable: bool = True):
  99. super().__init__(dtype, narrow_range, enable)
  100. self.scale = Parameter(0.0, dtype=np.float32)
  101. def fake_quant_forward(self, inp, q_dict=None):
  102. # when enable, TQT will do fakequant forward, finetune the scale
  103. return TQT_Function(self.qmin, self.qmax)(inp, self.scale)
  104. def normal_foward(self, inp, q_dict=None):
  105. if q_dict["enable_observer"]:
  106. # when disable, TQT will do normal forward, initialize scale weight
  107. tmp_scale = F.maximum(F.abs(q_dict["min_val"]), F.abs(q_dict["max_val"]))
  108. tmp_scale = F.log(tmp_scale / 127) / math.log(2)
  109. F.add_update(self.scale, tmp_scale, alpha=0.0, beta=1.0, bias=0.0)
  110. return inp
  111. def get_qparams(self):
  112. q_dict = get_qparam_dict(QuantMode.TQT)
  113. q_dict["scale"] = 2 ** self.scale
  114. return q_dict
  115. def get_dtype(self):
  116. q_dict = self.get_qparams()
  117. scale = None if "scale" not in q_dict else q_dict["scale"].numpy()[0]
  118. zero_point = (
  119. None if "zero_point" not in q_dict else q_dict["zero_point"].numpy()[0]
  120. )
  121. return get_quantized_dtype(self.dtype, scale, zero_point)
  122. class FakeQuantize(_FakeQuantize):
  123. r"""
  124. A module to do quant and dequant according to observer's scale and zero_point.
  125. """
  126. def fake_quant_forward(self, inp, q_dict=None):
  127. return fake_quant_tensor(inp, self.qmin, self.qmax, q_dict)

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台