You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

fake_quant.py 4.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  2. #
  3. # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  4. #
  5. # Unless required by applicable law or agreed to in writing,
  6. # software distributed under the License is distributed on an
  7. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  8. import copy
  9. import math
  10. import numpy as np
  11. from .. import functional as F
  12. from .._internal.dtype import _metadata_dict, get_quantized_dtype
  13. from ..core import Buffer, Function, Parameter
  14. from ..jit import sideeffect
  15. from ..module import Module
  16. from .observer import ObserverMode, Round
  17. class _FakeQuantize(Module):
  18. def __init__(self, dtype: str, enable: bool = True):
  19. super().__init__()
  20. if not dtype in _metadata_dict.keys():
  21. raise ValueError(
  22. "unknown dtype: {}, only support {}".format(
  23. dtype, _metadata_dict.keys()
  24. )
  25. )
  26. self.dtype = dtype
  27. self.qmin = _metadata_dict[dtype].qmin
  28. self.qmax = _metadata_dict[dtype].qmax
  29. self.enabled = enable
  30. def enable(self):
  31. self.enabled = True
  32. def disable(self):
  33. self.enabled = False
  34. def fake_quant_forward(self, inp, q_dict):
  35. return inp
  36. def normal_foward(self, inp, q_dict):
  37. return inp
  38. def forward(self, inp, q_dict):
  39. if self.enabled:
  40. return self.fake_quant_forward(inp, q_dict)
  41. else:
  42. return self.normal_foward(inp, q_dict)
  43. class TQT_Function(Function):
  44. def __init__(self, lowerbound, upperbound):
  45. super().__init__()
  46. self.lowerbound = lowerbound
  47. self.upperbound = upperbound
  48. def forward(self, inp, scale):
  49. t = 2 ** scale
  50. # t = F.maximum(t, 1e-4)
  51. inp_scaled = inp / t
  52. inp_clipped = F.maximum(F.minimum(inp_scaled, self.upperbound), self.lowerbound)
  53. inp_rounded = F.round(inp_clipped)
  54. inp_flq = inp_rounded * t
  55. self.save_for_backward(inp_scaled, inp_rounded, t)
  56. return inp_flq
  57. def backward(self, grad_inp_flq):
  58. (inp_scaled, inp_rounded, t) = self.saved_tensors
  59. mask_clip = (inp_scaled < -0.5 + self.lowerbound) + (
  60. inp_scaled > self.upperbound + 0.5
  61. ) # mask for accumulating the gradients of |data_scaled|>L
  62. mask_quant = F.abs(
  63. mask_clip - 1
  64. ) # mask for accumulating the gradients with |data_scaled|<=L
  65. grad_quant = (
  66. grad_inp_flq * mask_quant * (inp_rounded - inp_scaled)
  67. ) # gradient within |data_scaled|<=L
  68. grad_clip = (
  69. grad_inp_flq * mask_clip * inp_rounded
  70. ) # gradient with | data_scaled|>L
  71. grad_s = grad_clip.sum() + grad_quant.sum()
  72. # dL/ds = dL/dt * t * ln(2)
  73. grad_s = grad_s * t * math.log(2)
  74. grad_inp = grad_inp_flq * mask_quant
  75. return grad_inp, grad_s
  76. class TQT(_FakeQuantize):
  77. """
  78. TQT: https://arxiv.org/abs/1903.08066 Trained Quantization Thresholds
  79. for Accurate and Efficient Fixed-Point Inference of Deep Neural Networks
  80. """
  81. def __init__(self, dtype: str, enable: bool = True):
  82. super().__init__(dtype, enable)
  83. self.scale = Parameter(0.0, dtype=np.float32)
  84. def fake_quant_forward(self, inp, q_dict):
  85. # when enable, TQT will do fakequant forward, finetune the scale
  86. return TQT_Function(self.qmin, self.qmax)(inp, self.scale)
  87. def normal_foward(self, inp, q_dict):
  88. # when disable, TQT will do normal forward, initialize scale weight
  89. tmp_scale = F.maximum(F.abs(q_dict["min_val"]), F.abs(q_dict["max_val"]))
  90. tmp_scale = F.log(tmp_scale / 127) / F.log(2)
  91. F.add_update(self.scale, tmp_scale, alpha=0.0, beta=1.0, bias=0.0)
  92. return inp
  93. def get_dtype(self):
  94. return get_quantized_dtype(self.dtype, 2 ** self.scale.numpy()[0], None)
  95. class FakeQuantize(_FakeQuantize):
  96. r"""
  97. A module to do quant and dequant according to observer's scale and zero_point.
  98. """
  99. def fake_quant_forward(self, inp, q_dict):
  100. if q_dict["mode"] == ObserverMode.SYMMERTIC:
  101. scale = q_dict["scale"]
  102. # Quant
  103. oup = Round()(inp / scale)
  104. # clip
  105. oup = F.minimum(F.maximum(oup, self.qmin), self.qmax)
  106. # DeQuant
  107. oup = (oup) * scale
  108. return oup
  109. else:
  110. scale = q_dict["scale"]
  111. zero_point = q_dict["zero_point"]
  112. # Quant
  113. oup = Round()(inp / scale) + zero_point
  114. # clip
  115. oup = F.minimum(F.maximum(oup, self.qmin), self.qmax)
  116. # DeQuant
  117. oup = (oup - zero_point) * scale
  118. return oup

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台