# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") # # Copyright (c) 2014-2020 Megvii Inc. All rights reserved. # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. from functools import partial from typing import Tuple, Union import megengine._internal as mgb from ... import module as Float from ...core import Parameter from ...functional import conv_bias_activation from ...module import Conv2d from ...quantization.utils import register_method_to_class class _ConvBnActivation2d(Conv2d): r"""Applies a 2D convolution over an quantized input tensor, inference only. The parameter is same with :class: `~.Conv2d` """ def __init__( self, in_channels: int, out_channels: int, kernel_size: Union[int, Tuple[int, int]], stride: Union[int, Tuple[int, int]] = 1, padding: Union[int, Tuple[int, int]] = 0, dilation: Union[int, Tuple[int, int]] = 1, groups: int = 1, conv_mode: str = "CROSS_CORRELATION", compute_mode: str = "DEFAULT", dtype=None, ): super().__init__( in_channels, out_channels, kernel_size, stride, padding, dilation, groups, True, conv_mode, compute_mode, ) self.output_dtype = dtype def calc_conv_quantized(self, inp, nonlinear_mode="IDENTITY"): inp_scale = mgb.dtype.get_scale(inp.dtype) w_scale = mgb.dtype.get_scale(self.weight.dtype) bias_scale = inp_scale * w_scale return conv_bias_activation( inp, self.weight, self.bias.astype(mgb.dtype.qint32(bias_scale)), self.output_dtype, self.stride, self.padding, self.dilation, self.groups, conv_mode=self.conv_mode, compute_mode=self.compute_mode, nonlinear_mode=nonlinear_mode, ) class ConvBn2d(_ConvBnActivation2d): def forward(self, inp): if self.training: raise ValueError("quantized module only support inference.") return self.calc_conv_quantized(inp, nonlinear_mode="IDENTITY") class ConvBnRelu2d(_ConvBnActivation2d): def forward(self, inp): if self.training: raise ValueError("quantized module only support inference.") return self.calc_conv_quantized(inp, nonlinear_mode="RELU") def to_quantized(quantized_class, float_module): output_dtype = float_module.act_observer.get_dtype() qconv = quantized_class( float_module.conv.in_channels, float_module.conv.out_channels, float_module.conv.kernel_size, float_module.conv.stride, float_module.conv.padding, float_module.conv.dilation, float_module.conv.groups, dtype=output_dtype, ) w_fold, b_fold = float_module.fold_weight_bias( float_module.bn.running_mean, float_module.bn.running_var ) weight = w_fold.astype(float_module.weight_observer.get_dtype()) qconv.weight = Parameter(weight.numpy()) qconv.bias = Parameter(b_fold.numpy()) return qconv # replace :class:`~.module.QATModule`'s ``to_quantized`` method. # implemented here to avoid circular import. register_method_to_class(Float.ConvBn2d)(partial(to_quantized, ConvBn2d)) register_method_to_class(Float.ConvBnRelu2d)(partial(to_quantized, ConvBnRelu2d))