# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") # # Copyright (c) 2014-2020 Megvii Inc. All rights reserved. # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. import numpy as np import megengine._internal as mgb from ... import functional as F from ... import module as Float from ...core import Parameter from ...quantization.utils import register_method_to_class from ..module import Module class Linear(Module): r"""Applies a quantized linear transformation to the input. The module usually convert from QAT module by to_quantized method. :param dtype: output data type. """ def __init__( self, dtype: np.dtype = None, ): super().__init__() self.weight = None self.bias = None self.output_dtype = dtype def forward(self, inp): if self.training: raise ValueError("quantized module only support inference.") inp_scale = mgb.dtype.get_scale(inp.dtype) w_scale = mgb.dtype.get_scale(self.weight.dtype) bias_dtype = mgb.dtype.qint32(inp_scale * w_scale) return F.linear( inp, self.weight, None if self.bias is None else self.bias.astype(bias_dtype), ).astype(self.output_dtype) @register_method_to_class(Float.Linear) def to_quantized(float_module): r""" Replace :class:`~.module.QATModule`'s ``to_quantized`` method. implemented here to avoid circular import. """ output_dtype = float_module.act_observer.get_dtype() qmod = Linear(dtype=output_dtype,) weight = float_module.weight.astype(float_module.weight_observer.get_dtype()) qmod.weight = Parameter(weight.numpy()) if float_module.bias is not None: qmod.bias = Parameter(float_module.bias.numpy()) return qmod