OpenI
/
MegEngine

# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import numpy as np

import megengine._internal as mgb

from ... import functional as F
from ... import module as Float
from ...core import Parameter
from ...quantization.utils import register_method_to_class
from ..module import Module


class Linear(Module):
    r"""Applies a quantized linear transformation to the input. The module
    usually convert from QAT module by to_quantized method.

    :param dtype: output data type.

    """

    def __init__(
        self, dtype: np.dtype = None,
    ):
        super().__init__()
        self.weight = None
        self.bias = None
        self.output_dtype = dtype

    def forward(self, inp):
        if self.training:
            raise ValueError("quantized module only support inference.")
        inp_scale = mgb.dtype.get_scale(inp.dtype)
        w_scale = mgb.dtype.get_scale(self.weight.dtype)
        bias_dtype = mgb.dtype.qint32(inp_scale * w_scale)
        return F.linear(
            inp,
            self.weight,
            None if self.bias is None else self.bias.astype(bias_dtype),
        ).astype(self.output_dtype)


@register_method_to_class(Float.Linear)
def to_quantized(float_module):
    r"""
    Replace :class:`~.module.QATModule`'s ``to_quantized`` method.
    implemented here to avoid circular import.
    """
    output_dtype = float_module.act_observer.get_dtype()
    qmod = Linear(dtype=output_dtype,)
    weight = float_module.weight.astype(float_module.weight_observer.get_dtype())
    qmod.weight = Parameter(weight.numpy())
    if float_module.bias is not None:
        qmod.bias = Parameter(float_module.bias.numpy())
    return qmod