Browse Source

feat(mge/dtype): add int2 lowbit support and example

GitOrigin-RevId: 67c14ac959
tags/v1.0.0-rc1
Megvii Engine Team Xinran Xu 4 years ago
parent
commit
d4b86b844e
3 changed files with 13 additions and 0 deletions
  1. +3
    -0
      python_module/megengine/_internal/dtype.py
  2. +1
    -0
      python_module/megengine/quantization/__init__.py
  3. +9
    -0
      python_module/megengine/quantization/qconfig.py

+ 3
- 0
python_module/megengine/_internal/dtype.py View File

@@ -25,6 +25,9 @@ _metadata_dict = {
"qint32": _QuantDtypeMetadata(
"QuantizedS32", "int32", False, -(2 ** 31), 2 ** 31 - 1,
),
# NOTE: int2 is not supported for model dump yet
"quint2": _QuantDtypeMetadata(None, "uint8", True, 0, 3),
"qint2": _QuantDtypeMetadata(None, "int8", False, -2, 1),
}




+ 1
- 0
python_module/megengine/quantization/__init__.py View File

@@ -13,6 +13,7 @@ from .qconfig import (
QConfig,
calibration_qconfig,
ema_fakequant_qconfig,
ema_lowbit_fakequant_qconfig,
min_max_fakequant_qconfig,
tqt_quant_qconfig,
)


+ 9
- 0
python_module/megengine/quantization/qconfig.py View File

@@ -92,6 +92,15 @@ ema_fakequant_qconfig = QConfig(
act_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=False),
)

ema_lowbit_fakequant_qconfig = QConfig(
weight_observer=partial(MinMaxObserver, dtype="qint4", narrow_range=False),
act_observer=partial(
ExponentialMovingAverageObserver, dtype="qint4", narrow_range=False
),
weight_fake_quant=partial(FakeQuantize, dtype="qint4", narrow_range=False),
act_fake_quant=partial(FakeQuantize, dtype="qint4", narrow_range=False),
)

calibration_qconfig = QConfig(
weight_observer=partial(MinMaxObserver, dtype="qint8", narrow_range=True),
act_observer=partial(HistogramObserver, dtype="qint8", narrow_range=False),


Loading…
Cancel
Save