You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

conv.py 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. from abc import abstractmethod
  10. from typing import Tuple, Union
  11. import numpy as np
  12. import megengine._internal as mgb
  13. from ..core import Parameter
  14. from ..functional import conv2d, conv_transpose2d
  15. from ..utils.types import _pair, _pair_nonzero
  16. from . import init
  17. from .module import Module
  18. class _ConvNd(Module):
  19. """base class for convolution modules, including transposed conv"""
  20. def __init__(
  21. self,
  22. in_channels: int,
  23. out_channels: int,
  24. kernel_size: Union[int, Tuple[int, int]],
  25. stride: Union[int, Tuple[int, int]],
  26. padding: Union[int, Tuple[int, int]],
  27. dilation: Union[int, Tuple[int, int]],
  28. groups: int,
  29. bias: bool = True,
  30. ):
  31. super().__init__()
  32. if in_channels % groups != 0:
  33. raise ValueError("in_channels must be divisible by groups")
  34. if out_channels % groups != 0:
  35. raise ValueError("out_channels must be divisible by groups")
  36. self.in_channels = in_channels
  37. self.out_channels = out_channels
  38. self.kernel_size = kernel_size
  39. self.stride = stride
  40. self.padding = padding
  41. self.dilation = dilation
  42. self.groups = groups
  43. self.weight = Parameter(np.zeros(self._infer_weight_shape(), dtype=np.float32))
  44. self.bias = None
  45. if bias:
  46. self.bias = Parameter(np.zeros(self._infer_bias_shape(), dtype=np.float32))
  47. self.reset_parameters()
  48. @abstractmethod
  49. def _get_fanin(self):
  50. pass
  51. def reset_parameters(self) -> None:
  52. fanin = self._get_fanin()
  53. std = np.sqrt(1 / fanin)
  54. init.normal_(self.weight, 0.0, std)
  55. if self.bias is not None:
  56. init.zeros_(self.bias)
  57. @abstractmethod
  58. def _infer_weight_shape(self):
  59. pass
  60. @abstractmethod
  61. def _infer_bias_shape(self):
  62. pass
  63. class Conv2d(_ConvNd):
  64. r"""Applies a 2D convolution over an input tensor.
  65. For instance, given an input of the size :math:`(N, C_{\text{in}}, H, W)`,
  66. this layer generates an output of the size
  67. :math:`(N, C_{\text{out}}, H_{\text{out}}, W_{\text{out}})` through the
  68. process described as below:
  69. .. math::
  70. \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
  71. \sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}(C_{\text{out}_j}, k) \star \text{input}(N_i, k)
  72. where :math:`\star` is the valid 2D cross-correlation operator,
  73. :math:`N` is a batch size, :math:`C` denotes a number of channels,
  74. :math:`H` is a height of input planes in pixels, and :math:`W` is
  75. width in pixels.
  76. When ``groups == in_channels`` and ``out_channels == K * in_channels``,
  77. where `K` is a positive integer, this operation is also known as depthwise
  78. convolution.
  79. In other words, for an input of size :math:`(N, C_{in}, H_{in}, W_{in})`,
  80. a depthwise convolution with a depthwise multiplier `K`, can be constructed
  81. by arguments :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`.
  82. :param in_channels: number of input channels.
  83. :param out_channels: number of output channels.
  84. :param kernel_size: size of weight on spatial dimensions. If ``kernel_size`` is
  85. an :class:`int`, the actual kernel size would be
  86. ``(kernel_size, kernel_size)``. Default: 1
  87. :param stride: stride of the 2D convolution operation. Default: 1
  88. :param padding: size of the paddings added to the input on both sides of its
  89. spatial dimensions. Only zero-padding is supported. Default: 0
  90. :param dilation: dilation of the 2D convolution operation. Default: 1
  91. :param groups: number of groups to divide input and output channels into,
  92. so as to perform a "grouped convolution". When ``groups`` is not 1,
  93. ``in_channels`` and ``out_channels`` must be divisible by ``groups``,
  94. and there would be an extra dimension at the beginning of the weight's
  95. shape. Specifically, the shape of weight would be ``(groups,
  96. out_channel // groups, in_channels // groups, *kernel_size)``.
  97. :param bias: whether to add a bias onto the result of convolution. Default:
  98. True
  99. :param conv_mode: Supports `CROSS_CORRELATION` or `CONVOLUTION`. Default:
  100. `CROSS_CORRELATION`.
  101. :param compute_mode: When set to `DEFAULT`, no special requirements will be
  102. placed on the precision of intermediate results. When set to `FLOAT32`,
  103. float32 would be used for accumulator and intermediate result, but only
  104. effective when input and output are of float16 dtype.
  105. """
  106. _conv_mode_type = mgb.opr_param_defs.Convolution.Mode
  107. _compute_mode_type = mgb.opr_param_defs.Convolution.ComputeMode
  108. def __init__(
  109. self,
  110. in_channels: int,
  111. out_channels: int,
  112. kernel_size: Union[int, Tuple[int, int]],
  113. stride: Union[int, Tuple[int, int]] = 1,
  114. padding: Union[int, Tuple[int, int]] = 0,
  115. dilation: Union[int, Tuple[int, int]] = 1,
  116. groups: int = 1,
  117. bias: bool = True,
  118. conv_mode: str = "CROSS_CORRELATION",
  119. compute_mode: str = "DEFAULT",
  120. ):
  121. kernel_size = _pair_nonzero(kernel_size)
  122. stride = _pair_nonzero(stride)
  123. padding = _pair(padding)
  124. dilation = _pair_nonzero(dilation)
  125. self.conv_mode = self._conv_mode_type.convert(conv_mode)
  126. self.compute_mode = self._compute_mode_type.convert(compute_mode)
  127. super().__init__(
  128. in_channels,
  129. out_channels,
  130. kernel_size,
  131. stride,
  132. padding,
  133. dilation,
  134. groups,
  135. bias,
  136. )
  137. def _get_fanin(self):
  138. kh, kw = self.kernel_size
  139. ic = self.in_channels
  140. return kh * kw * ic
  141. def _infer_weight_shape(self):
  142. group = self.groups
  143. ichl = self.in_channels
  144. ochl = self.out_channels
  145. kh, kw = self.kernel_size
  146. if group == 1:
  147. # Assume format is NCHW
  148. return (ochl, ichl, kh, kw)
  149. assert (
  150. ichl % group == 0 and ochl % group == 0
  151. ), "invalid config: input_channels={} output_channels={} group={}".format(
  152. ichl, ochl, group
  153. )
  154. # Assume format is NCHW
  155. return (group, ochl // group, ichl // group, kh, kw)
  156. def _infer_bias_shape(self):
  157. # Assume format is NCHW
  158. return (1, self.out_channels, 1, 1)
  159. def forward(self, inp):
  160. return conv2d(
  161. inp,
  162. self.weight,
  163. self.bias,
  164. self.stride,
  165. self.padding,
  166. self.dilation,
  167. self.groups,
  168. self.conv_mode,
  169. self.compute_mode,
  170. )
  171. class ConvTranspose2d(_ConvNd):
  172. r"""Applies a 2D transposed convolution over an input tensor.
  173. This module is also known as a deconvolution or a fractionally-strided convolution.
  174. :class:`ConvTranspose2d` can ben seen as the gradient of :class:`Conv2d` operation
  175. with respect to its input.
  176. Convolution usually reduces the size of input, while transposed convolution works
  177. the opposite way, transforming a smaller input to a larger output while preserving the
  178. connectivity pattern.
  179. :param in_channels: number of input channels.
  180. :param out_channels: number of output channels.
  181. :param kernel_size: size of weight on spatial dimensions. If ``kernel_size`` is
  182. an :class:`int`, the actual kernel size would be
  183. ``(kernel_size, kernel_size)``. Default: 1
  184. :param stride: stride of the 2D convolution operation. Default: 1
  185. :param padding: size of the paddings added to the input on both sides of its
  186. spatial dimensions. Only zero-padding is supported. Default: 0
  187. :param dilation: dilation of the 2D convolution operation. Default: 1
  188. :param groups: number of groups to divide input and output channels into,
  189. so as to perform a "grouped convolution". When ``groups`` is not 1,
  190. ``in_channels`` and ``out_channels`` must be divisible by ``groups``,
  191. and there would be an extra dimension at the beginning of the weight's
  192. shape. Specifically, the shape of weight would be ``(groups,
  193. out_channel // groups, in_channels // groups, *kernel_size)``. Default: 1
  194. :param bias: wether to add a bias onto the result of convolution. Default:
  195. True
  196. :param conv_mode: Supports `CROSS_CORRELATION` or `CONVOLUTION`. Default:
  197. `CROSS_CORRELATION`.
  198. :param compute_mode: When set to `DEFAULT`, no special requirements will be
  199. placed on the precision of intermediate results. When set to `FLOAT32`,
  200. float32 would be used for accumulator and intermediate result, but only
  201. effective when input and output are of float16 dtype.
  202. """
  203. _conv_mode_type = mgb.opr_param_defs.Convolution.Mode
  204. _compute_mode_type = mgb.opr_param_defs.Convolution.ComputeMode
  205. def __init__(
  206. self,
  207. in_channels: int,
  208. out_channels: int,
  209. kernel_size: Union[int, Tuple[int, int]],
  210. stride: Union[int, Tuple[int, int]] = 1,
  211. padding: Union[int, Tuple[int, int]] = 0,
  212. dilation: Union[int, Tuple[int, int]] = 1,
  213. groups: int = 1,
  214. bias: bool = True,
  215. conv_mode: str = "CROSS_CORRELATION",
  216. compute_mode: str = "DEFAULT",
  217. ):
  218. kernel_size = _pair_nonzero(kernel_size)
  219. stride = _pair_nonzero(stride)
  220. padding = _pair(padding)
  221. dilation = _pair_nonzero(dilation)
  222. self.conv_mode = self._conv_mode_type.convert(conv_mode)
  223. self.compute_mode = self._compute_mode_type.convert(compute_mode)
  224. super().__init__(
  225. in_channels,
  226. out_channels,
  227. kernel_size,
  228. stride,
  229. padding,
  230. dilation,
  231. groups,
  232. bias,
  233. )
  234. def _get_fanin(self):
  235. kh, kw = self.kernel_size
  236. oc = self.out_channels
  237. return kh * kw * oc
  238. def _infer_weight_shape(self):
  239. group = self.groups
  240. ichl = self.in_channels
  241. ochl = self.out_channels
  242. kh, kw = self.kernel_size
  243. if group == 1:
  244. # Assume format is NCHW
  245. return (ichl, ochl, kh, kw)
  246. assert (
  247. ichl % group == 0 and ochl % group == 0
  248. ), "invalid config: input_channels={} output_channels={} group={}".format(
  249. ichl, ochl, group
  250. )
  251. # Assume format is NCHW
  252. return (group, ichl // group, ochl // group, kh, kw)
  253. def _infer_bias_shape(self):
  254. # Assume format is NCHW
  255. return (1, self.out_channels, 1, 1)
  256. def forward(self, inp):
  257. return conv_transpose2d(
  258. inp,
  259. self.weight,
  260. self.bias,
  261. self.stride,
  262. self.padding,
  263. self.dilation,
  264. self.groups,
  265. self.conv_mode,
  266. self.compute_mode,
  267. )

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台