You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

conv.py 43 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165
  1. from abc import abstractmethod
  2. from typing import Tuple, Union
  3. import numpy as np
  4. from ..functional import (
  5. conv1d,
  6. conv2d,
  7. conv3d,
  8. conv_transpose2d,
  9. conv_transpose3d,
  10. deformable_conv2d,
  11. local_conv2d,
  12. pad,
  13. region_restricted_conv,
  14. relu,
  15. )
  16. from ..tensor import Parameter
  17. from ..utils.tuple_function import _pair, _pair_nonzero, _triple, _triple_nonzero
  18. from . import init
  19. from .module import Module
  20. class _ConvNd(Module):
  21. """base class for convolution modules, including transposed conv"""
  22. def __init__(
  23. self,
  24. in_channels: int,
  25. out_channels: int,
  26. kernel_size: Union[int, Tuple[int, int]],
  27. stride: Union[int, Tuple[int, int]],
  28. padding: Union[int, Tuple[int, int]],
  29. output_padding: Union[int, Tuple[int, int]],
  30. dilation: Union[int, Tuple[int, int]],
  31. groups: int,
  32. bias: bool = True,
  33. **kwargs
  34. ):
  35. super().__init__(**kwargs)
  36. if in_channels % groups != 0:
  37. raise ValueError("in_channels must be divisible by groups")
  38. if out_channels % groups != 0:
  39. raise ValueError("out_channels must be divisible by groups")
  40. self.in_channels = in_channels
  41. self.out_channels = out_channels
  42. self.kernel_size = kernel_size
  43. self.stride = stride
  44. self.padding = padding
  45. self.output_padding = output_padding
  46. self.dilation = dilation
  47. self.groups = groups
  48. self.weight = Parameter(np.zeros(self._infer_weight_shape(), dtype=np.float32))
  49. self.bias = None
  50. if bias:
  51. self.bias = Parameter(np.zeros(self._infer_bias_shape(), dtype=np.float32))
  52. self.reset_parameters()
  53. @abstractmethod
  54. def _get_fanin(self):
  55. pass
  56. def reset_parameters(self) -> None:
  57. fanin = self._get_fanin()
  58. std = np.sqrt(1 / fanin)
  59. init.normal_(self.weight, 0.0, std)
  60. if self.bias is not None:
  61. init.zeros_(self.bias)
  62. @abstractmethod
  63. def _infer_weight_shape(self):
  64. pass
  65. @abstractmethod
  66. def _infer_bias_shape(self):
  67. pass
  68. def _module_info_string(self):
  69. s = "{in_channels}, {out_channels}, kernel_size={kernel_size}"
  70. if self.stride != (1,) * len(self.stride):
  71. s += ", stride={stride}"
  72. if self.padding != (0,) * len(self.padding):
  73. s += ", padding={padding}"
  74. if self.dilation != (1,) * len(self.dilation):
  75. s += ", dilation={dilation}"
  76. if self.groups != 1:
  77. s += ", groups={groups}"
  78. if self.bias is None:
  79. s += ", bias=False"
  80. return s.format(**self.__dict__)
  81. class Conv1d(_ConvNd):
  82. r"""Applies a 1D convolution over an input tensor.
  83. For instance, given an input of the size :math:`(N, C_{\text{in}}, H)`,
  84. this layer generates an output of the size
  85. :math:`(N, C_{\text{out}}, H_{\text{out}})` through the
  86. process described as below:
  87. .. math::
  88. \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
  89. \sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}(C_{\text{out}_j}, k) \star \text{input}(N_i, k)
  90. where :math:`\star` is the valid 1D cross-correlation operator,
  91. :math:`N` is batch size, :math:`C` denotes number of channels, and
  92. :math:`H` is length of 1D data element.
  93. When `groups == in_channels` and `out_channels == K * in_channels`,
  94. where K is a positive integer, this operation is also known as depthwise
  95. convolution.
  96. In other words, for an input of size :math:`(N, C_{in}, H_{in})`,
  97. a depthwise convolution with a depthwise multiplier `K`, can be constructed
  98. by arguments :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`.
  99. Args:
  100. in_channels: number of input channels.
  101. out_channels: number of output channels.
  102. kernel_size: size of weight on spatial dimensions.
  103. stride: stride of the 1D convolution operation.
  104. padding: size of the paddings added to the input on both sides of its
  105. spatial dimensions. Default: 0
  106. dilation: dilation of the 1D convolution operation. Default: 1
  107. groups: number of groups to divide input and output channels into,
  108. so as to perform a "grouped convolution". When ``groups`` is not 1,
  109. ``in_channels`` and ``out_channels`` must be divisible by ``groups``,
  110. and the shape of weight should be ``(groups, out_channel // groups,
  111. in_channels // groups, kernel_size)``. Default: 1
  112. bias: whether to add a bias onto the result of convolution. Default: True
  113. conv_mode: Supports `cross_correlation`. Default: `cross_correlation`
  114. compute_mode: When set to "default", no special requirements will be
  115. placed on the precision of intermediate results. When set to "float32",
  116. "float32" would be used for accumulator and intermediate result, but only
  117. effective when input and output are of float16 dtype.
  118. padding_mode: "zeros", "reflect" or "replicate". Default: "zeros".
  119. Refer to :class:`~.module.padding.Pad` for more information.
  120. Note:
  121. * ``weight`` usually has shape ``(out_channels, in_channels, kernel_size)`` ,
  122. if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, kernel_size)``
  123. * ``bias`` usually has shape ``(1, out_channels, 1)``
  124. Examples:
  125. >>> import numpy as np
  126. >>> m = M.Conv1d(in_channels=3, out_channels=1, kernel_size=3)
  127. >>> inp = mge.tensor(np.arange(0, 24).astype("float32").reshape(2, 3, 4))
  128. >>> oup = m(inp)
  129. >>> oup.numpy().shape
  130. (2, 1, 2)
  131. """
  132. def __init__(
  133. self,
  134. in_channels: int,
  135. out_channels: int,
  136. kernel_size: int,
  137. stride: int = 1,
  138. padding: int = 0,
  139. dilation: int = 1,
  140. groups: int = 1,
  141. bias: bool = True,
  142. conv_mode: str = "cross_correlation",
  143. compute_mode: str = "default",
  144. padding_mode: str = "zeros",
  145. **kwargs
  146. ):
  147. kernel_size = kernel_size
  148. stride = stride
  149. padding = padding
  150. dilation = dilation
  151. self.conv_mode = conv_mode
  152. self.compute_mode = compute_mode
  153. self.padding_mode = padding_mode
  154. super().__init__(
  155. in_channels,
  156. out_channels,
  157. kernel_size,
  158. stride,
  159. padding,
  160. 0,
  161. dilation,
  162. groups,
  163. bias,
  164. **kwargs,
  165. )
  166. def _get_fanin(self):
  167. kh = self.kernel_size
  168. ic = self.in_channels
  169. return kh * ic
  170. def _infer_weight_shape(self):
  171. group = self.groups
  172. ichl = self.in_channels
  173. ochl = self.out_channels
  174. kh = self.kernel_size
  175. if group == 1:
  176. # Assume format is NCH(W=1)
  177. return (ochl, ichl, kh)
  178. assert (
  179. ichl % group == 0 and ochl % group == 0
  180. ), "invalid config: in_channels={} out_channels={} group={}".format(
  181. ichl, ochl, group
  182. )
  183. # Assume format is NCH(W=1)
  184. return (group, ochl // group, ichl // group, kh)
  185. def _infer_bias_shape(self):
  186. # Assume format is NCH(W=1)
  187. return (1, self.out_channels, 1)
  188. def get_pad_witdth(self):
  189. return ((0, 0), (0, 0), (self.padding, self.padding))
  190. def calc_conv(self, inp, weight, bias):
  191. assert self.padding_mode in [
  192. "zeros",
  193. "reflect",
  194. "replicate",
  195. ]
  196. if self.padding_mode != "zeros":
  197. return conv1d(
  198. pad(inp, self.get_pad_witdth(), self.padding_mode),
  199. weight,
  200. bias,
  201. self.stride,
  202. 0,
  203. self.dilation,
  204. self.groups,
  205. self.conv_mode,
  206. self.compute_mode,
  207. )
  208. return conv1d(
  209. inp,
  210. weight,
  211. bias,
  212. self.stride,
  213. self.padding,
  214. self.dilation,
  215. self.groups,
  216. self.conv_mode,
  217. self.compute_mode,
  218. )
  219. def forward(self, inp):
  220. return self.calc_conv(inp, self.weight, self.bias)
  221. class Conv2d(_ConvNd):
  222. r"""Applies a 2D convolution over an input tensor.
  223. For instance, given an input of the size :math:`(N, C_{\text{in}}, H, W)`,
  224. this layer generates an output of the size
  225. :math:`(N, C_{\text{out}}, H_{\text{out}}, W_{\text{out}})` through the
  226. process described as below:
  227. .. math::
  228. \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
  229. \sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}(C_{\text{out}_j}, k) \star \text{input}(N_i, k)
  230. where :math:`\star` is the valid 2D cross-correlation operator,
  231. :math:`N` is batch size, :math:`C` denotes number of channels,
  232. :math:`H` is height of input planes in pixels, and :math:`W` is
  233. width in pixels.
  234. In general, output feature maps' shapes can be inferred as follows:
  235. input: :math:`(N, C_{\text{in}}, H_{\text{in}}, W_{\text{in}})`
  236. output: :math:`(N, C_{\text{out}}, H_{\text{out}}, W_{\text{out}})` where
  237. .. math::
  238. \text{H}_{out} = \lfloor \frac{\text{H}_{in} + 2 * \text{padding[0]} -
  239. \text{dilation[0]} * (\text{kernel_size[0]} - 1) - 1}{\text{stride[0]}} + 1 \rfloor
  240. .. math::
  241. \text{W}_{out} = \lfloor \frac{\text{W}_{in} + 2 * \text{padding[1]} -
  242. \text{dilation[1]} * (\text{kernel_size[1]} - 1) - 1}{\text{stride[1]}} + 1 \rfloor
  243. When `groups == in_channels` and `out_channels == K * in_channels`,
  244. where K is a positive integer, this operation is also known as depthwise
  245. convolution.
  246. In other words, for an input of size :math:`(N, C_{in}, H_{in}, W_{in})`,
  247. a depthwise convolution with a depthwise multiplier `K`, can be constructed
  248. by arguments :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`.
  249. Args:
  250. in_channels: number of input channels.
  251. out_channels: number of output channels.
  252. kernel_size: size of weight on spatial dimensions. If kernel_size is
  253. an :class:`int`, the actual kernel size would be
  254. ``(kernel_size, kernel_size)``.
  255. stride: stride of the 2D convolution operation. Default: 1
  256. padding: size of the paddings added to the input on both sides of its
  257. spatial dimensions. Default: 0
  258. dilation: dilation of the 2D convolution operation. Default: 1
  259. groups: number of groups into which the input and output channels are divided,
  260. so as to perform a ``grouped convolution``. When ``groups`` is not 1,
  261. ``in_channels`` and ``out_channels`` must be divisible by ``groups``,
  262. and the shape of weight should be ``(groups, out_channel // groups,
  263. in_channels // groups, height, width)``. Default: 1
  264. bias: whether to add a bias onto the result of convolution. Default: True
  265. conv_mode: Supports `cross_correlation`. Default: `cross_correlation`
  266. compute_mode: When set to "default", no special requirements will be
  267. placed on the precision of intermediate results. When set to "float32",
  268. "float32" would be used for accumulator and intermediate result, but only
  269. effective when input and output are of float16 dtype.
  270. padding_mode: "zeros", "reflect" or "replicate". Default: "zeros".
  271. Refer to :class:`~.module.padding.Pad` for more information.
  272. Note:
  273. * ``weight`` usually has shape ``(out_channels, in_channels, height, width)`` ,
  274. if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, height, width)``
  275. * ``bias`` usually has shape ``(1, out_channels, *1)``
  276. Examples:
  277. >>> import numpy as np
  278. >>> m = M.Conv2d(in_channels=3, out_channels=1, kernel_size=3)
  279. >>> inp = mge.tensor(np.arange(0, 96).astype("float32").reshape(2, 3, 4, 4))
  280. >>> oup = m(inp)
  281. >>> oup.numpy().shape
  282. (2, 1, 2, 2)
  283. """
  284. def __init__(
  285. self,
  286. in_channels: int,
  287. out_channels: int,
  288. kernel_size: Union[int, Tuple[int, int]],
  289. stride: Union[int, Tuple[int, int]] = 1,
  290. padding: Union[int, Tuple[int, int]] = 0,
  291. dilation: Union[int, Tuple[int, int]] = 1,
  292. groups: int = 1,
  293. bias: bool = True,
  294. conv_mode: str = "cross_correlation",
  295. compute_mode: str = "default",
  296. padding_mode: str = "zeros",
  297. **kwargs
  298. ):
  299. kernel_size = _pair_nonzero(kernel_size)
  300. stride = _pair_nonzero(stride)
  301. padding = _pair(padding)
  302. dilation = _pair_nonzero(dilation)
  303. self.conv_mode = conv_mode
  304. self.compute_mode = compute_mode
  305. self.padding_mode = padding_mode
  306. super().__init__(
  307. in_channels,
  308. out_channels,
  309. kernel_size,
  310. stride,
  311. padding,
  312. 0,
  313. dilation,
  314. groups,
  315. bias,
  316. **kwargs,
  317. )
  318. def _get_fanin(self):
  319. kh, kw = self.kernel_size
  320. ic = self.in_channels
  321. return kh * kw * ic
  322. def _infer_weight_shape(self):
  323. group = self.groups
  324. ichl = self.in_channels
  325. ochl = self.out_channels
  326. kh, kw = self.kernel_size
  327. if group == 1:
  328. # Assume format is NCHW
  329. return (ochl, ichl, kh, kw)
  330. assert (
  331. ichl % group == 0 and ochl % group == 0
  332. ), "invalid config: in_channels={} out_channels={} group={}".format(
  333. ichl, ochl, group
  334. )
  335. # Assume format is NCHW
  336. return (group, ochl // group, ichl // group, kh, kw)
  337. def _infer_bias_shape(self):
  338. # Assume format is NCHW
  339. return (1, self.out_channels, 1, 1)
  340. def get_pad_witdth(self):
  341. return (
  342. (0, 0),
  343. (0, 0),
  344. (self.padding[0], self.padding[0]),
  345. (self.padding[1], self.padding[1]),
  346. )
  347. def calc_conv(self, inp, weight, bias):
  348. assert self.padding_mode in [
  349. "zeros",
  350. "reflect",
  351. "replicate",
  352. ]
  353. if self.padding_mode != "zeros":
  354. return conv2d(
  355. pad(inp, self.get_pad_witdth(), self.padding_mode),
  356. weight,
  357. bias,
  358. self.stride,
  359. 0,
  360. self.dilation,
  361. self.groups,
  362. self.conv_mode,
  363. self.compute_mode,
  364. )
  365. return conv2d(
  366. inp,
  367. weight,
  368. bias,
  369. self.stride,
  370. self.padding,
  371. self.dilation,
  372. self.groups,
  373. self.conv_mode,
  374. self.compute_mode,
  375. )
  376. def forward(self, inp):
  377. return self.calc_conv(inp, self.weight, self.bias)
  378. class Conv3d(_ConvNd):
  379. r"""Applies a 3D convolution over an input tensor.
  380. For instance, given an input of the size :math:`(N, C_{\text{in}}, T, H, W)`,
  381. this layer generates an output of the size
  382. :math:`(N, C_{\text{out}}, T_{\text{out}}, H_{\text{out}}, W_{\text{out}})` through the
  383. process described as below:
  384. .. math::
  385. \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
  386. \sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}(C_{\text{out}_j}, k) \star \text{input}(N_i, k)
  387. where :math:`\star` is the valid 3D cross-correlation operator,
  388. :math:`N` is batch size, :math:`C` denotes number of channels.
  389. When `groups == in_channels` and `out_channels == K * in_channels`,
  390. where K is a positive integer, this operation is also known as depthwise
  391. convolution.
  392. In other words, for an input of size :math:`(N, C_{in}, T_{int}, H_{in}, W_{in})`,
  393. a depthwise convolution with a depthwise multiplier `K`, can be constructed
  394. by arguments :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`.
  395. Args:
  396. in_channels: number of input channels.
  397. out_channels: number of output channels.
  398. kernel_size: size of weight on spatial dimensions. If kernel_size is
  399. an :class:`int`, the actual kernel size would be
  400. `(kernel_size, kernel_size, kernel_size)`.
  401. stride: stride of the 3D convolution operation. Default: 1
  402. padding: size of the paddings added to the input on both sides of its
  403. spatial dimensions. Only zero-padding is supported. Default: 0
  404. dilation: dilation of the 3D convolution operation. Default: 1
  405. groups: number of groups into which the input and output channels are divided,
  406. so as to perform a ``grouped convolution``. When ``groups`` is not 1,
  407. ``in_channels`` and ``out_channels`` must be divisible by ``groups``,
  408. and the shape of weight should be ``(groups, out_channel // groups,
  409. in_channels // groups, depth, height, width)``. Default: 1
  410. bias: whether to add a bias onto the result of convolution. Default: True
  411. conv_mode: Supports `cross_correlation`. Default: `cross_correlation`
  412. Note:
  413. * ``weight`` usually has shape ``(out_channels, in_channels, depth, height, width)`` ,
  414. if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, depth, height, width)``
  415. * ``bias`` usually has shape ``(1, out_channels, *1)``
  416. Examples:
  417. >>> import numpy as np
  418. >>> m = M.Conv3d(in_channels=3, out_channels=1, kernel_size=3)
  419. >>> inp = mge.tensor(np.arange(0, 384).astype("float32").reshape(2, 3, 4, 4, 4))
  420. >>> oup = m(inp)
  421. >>> oup.numpy().shape
  422. (2, 1, 2, 2, 2)
  423. """
  424. def __init__(
  425. self,
  426. in_channels: int,
  427. out_channels: int,
  428. kernel_size: Union[int, Tuple[int, int, int]],
  429. stride: Union[int, Tuple[int, int, int]] = 1,
  430. padding: Union[int, Tuple[int, int, int]] = 0,
  431. dilation: Union[int, Tuple[int, int, int]] = 1,
  432. groups: int = 1,
  433. bias: bool = True,
  434. conv_mode: str = "cross_correlation",
  435. ):
  436. kernel_size = _triple_nonzero(kernel_size)
  437. stride = _triple_nonzero(stride)
  438. padding = _triple(padding)
  439. dilation = _triple_nonzero(dilation)
  440. self.conv_mode = conv_mode
  441. super().__init__(
  442. in_channels,
  443. out_channels,
  444. kernel_size,
  445. stride,
  446. padding,
  447. 0,
  448. dilation,
  449. groups,
  450. bias,
  451. )
  452. def _get_fanin(self):
  453. kt, kh, kw = self.kernel_size
  454. ic = self.in_channels
  455. return kt * kh * kw * ic
  456. def _infer_weight_shape(self):
  457. group = self.groups
  458. ichl = self.in_channels
  459. ochl = self.out_channels
  460. kt, kh, kw = self.kernel_size
  461. if group == 1:
  462. # Assume format is NCTHW
  463. return (ochl, ichl, kt, kh, kw)
  464. assert (
  465. ichl % group == 0 and ochl % group == 0
  466. ), "invalid config: in_channels={} out_channels={} group={}".format(
  467. ichl, ochl, group
  468. )
  469. # Assume format is NCTHW
  470. return (group, ochl // group, ichl // group, kt, kh, kw)
  471. def _infer_bias_shape(self):
  472. # Assume format is NCTHW
  473. return (1, self.out_channels, 1, 1, 1)
  474. def calc_conv(self, inp, weight, bias):
  475. return conv3d(
  476. inp,
  477. weight,
  478. bias,
  479. self.stride,
  480. self.padding,
  481. self.dilation,
  482. self.groups,
  483. self.conv_mode,
  484. )
  485. def forward(self, inp):
  486. return self.calc_conv(inp, self.weight, self.bias)
  487. class ConvTranspose2d(_ConvNd):
  488. r"""Applies a 2D transposed convolution over an input tensor.
  489. This module is also known as a deconvolution or a fractionally-strided convolution.
  490. :class:`ConvTranspose2d` can be seen as the gradient of :class:`Conv2d` operation
  491. with respect to its input.
  492. Convolution usually reduces the size of input, while transposed convolution works
  493. the opposite way, transforming a smaller input to a larger output while preserving the
  494. connectivity pattern.
  495. Args:
  496. in_channels: number of input channels.
  497. out_channels: number of output channels.
  498. kernel_size: size of weight on spatial dimensions. If ``kernel_size`` is
  499. an :class:`int`, the actual kernel size would be
  500. ``(kernel_size, kernel_size)``.
  501. stride: stride of the 2D convolution operation. Default: 1
  502. padding: size of the paddings added to the input on both sides of its
  503. spatial dimensions. Only zero-padding is supported. Default: 0
  504. output_padding: size of paddings appended to output. Default: 0
  505. dilation: dilation of the 2D convolution operation. Default: 1
  506. groups: number of groups into which the input and output channels are divided,
  507. so as to perform a ``grouped convolution``. When ``groups`` is not 1,
  508. ``in_channels`` and ``out_channels`` must be divisible by groups,
  509. and the shape of weight should be ``(groups, in_channels // groups,
  510. out_channels // groups, height, width)``. Default: 1
  511. bias: wether to add a bias onto the result of convolution. Default: True
  512. conv_mode: Supports `cross_correlation`. Default: `cross_correlation`
  513. compute_mode: When set to "default", no special requirements will be
  514. placed on the precision of intermediate results. When set to "float32",
  515. "float32" would be used for accumulator and intermediate result, but only
  516. effective when input and output are of float16 dtype.
  517. Note:
  518. * ``weight`` usually has shape ``(in_channels, out_channels, height, width)`` ,
  519. if groups is not 1, shape will be ``(groups, in_channels // groups, out_channels // groups, height, width)``
  520. * ``bias`` usually has shape ``(1, out_channels, *1)``
  521. """
  522. output_padding = 0
  523. def __init__(
  524. self,
  525. in_channels: int,
  526. out_channels: int,
  527. kernel_size: Union[int, Tuple[int, int]],
  528. stride: Union[int, Tuple[int, int]] = 1,
  529. padding: Union[int, Tuple[int, int]] = 0,
  530. output_padding: Union[int, Tuple[int, int]] = 0,
  531. dilation: Union[int, Tuple[int, int]] = 1,
  532. groups: int = 1,
  533. bias: bool = True,
  534. conv_mode: str = "cross_correlation",
  535. compute_mode: str = "default",
  536. **kwargs
  537. ):
  538. kernel_size = _pair_nonzero(kernel_size)
  539. stride = _pair_nonzero(stride)
  540. padding = _pair(padding)
  541. output_padding = _pair(output_padding)
  542. dilation = _pair_nonzero(dilation)
  543. self.conv_mode = conv_mode
  544. self.compute_mode = compute_mode
  545. super().__init__(
  546. in_channels,
  547. out_channels,
  548. kernel_size,
  549. stride,
  550. padding,
  551. output_padding,
  552. dilation,
  553. groups,
  554. bias,
  555. **kwargs,
  556. )
  557. def _get_fanin(self):
  558. kh, kw = self.kernel_size
  559. oc = self.out_channels
  560. return kh * kw * oc
  561. def _infer_weight_shape(self):
  562. group = self.groups
  563. ichl = self.in_channels
  564. ochl = self.out_channels
  565. kh, kw = self.kernel_size
  566. if group == 1:
  567. # Assume format is NCHW
  568. return (ichl, ochl, kh, kw)
  569. assert (
  570. ichl % group == 0 and ochl % group == 0
  571. ), "invalid config: in_channels={} out_channels={} group={}".format(
  572. ichl, ochl, group
  573. )
  574. # Assume format is NCHW
  575. return (group, ichl // group, ochl // group, kh, kw)
  576. def _infer_bias_shape(self):
  577. # Assume format is NCHW
  578. return (1, self.out_channels, 1, 1)
  579. def calc_conv_transpose2d(self, inp, weight, bias):
  580. return conv_transpose2d(
  581. inp,
  582. weight,
  583. bias,
  584. self.stride,
  585. self.padding,
  586. self.output_padding,
  587. self.dilation,
  588. self.groups,
  589. self.conv_mode,
  590. self.compute_mode,
  591. )
  592. def forward(self, inp):
  593. return self.calc_conv_transpose2d(inp, self.weight, self.bias)
  594. class LocalConv2d(Conv2d):
  595. r"""Applies a spatial convolution with untied kernels over an groupped channeled input 4D tensor.
  596. It is also known as the locally connected layer.
  597. Args:
  598. in_channels: number of input channels.
  599. out_channels: number of output channels.
  600. input_height: the height of the input images.
  601. input_width: the width of the input images.
  602. kernel_size: size of weight on spatial dimensions. If kernel_size is
  603. an :class:`int`, the actual kernel size would be
  604. ``(kernel_size, kernel_size)``.
  605. stride: stride of the 2D convolution operation. Default: 1
  606. padding: size of the paddings added to the input on both sides of its
  607. spatial dimensions. Only zero-padding is supported. Default: 0
  608. dilation: dilation of the 2D convolution operation. Default: 1
  609. groups: number of groups into which the input and output channels are divided,
  610. so as to perform a "grouped convolution". When ``groups`` is not 1,
  611. ``in_channels`` and ``out_channels`` must be divisible by ``groups``. Default: 1
  612. Note:
  613. * ``weight`` usually has shape ``(out_height, out_width, in_channels, height, width, in_channels)`` ,
  614. if groups is not 1, shape will be ``(groups, out_height, out_width, in_channels // groups, height, width, out_channels // groups)``
  615. * ``bias`` usually has shape ``(1, out_channels, *1)``
  616. """
  617. def __init__(
  618. self,
  619. in_channels: int,
  620. out_channels: int,
  621. input_height: int,
  622. input_width: int,
  623. kernel_size: Union[int, Tuple[int, int]],
  624. stride: Union[int, Tuple[int, int]] = 1,
  625. padding: Union[int, Tuple[int, int]] = 0,
  626. dilation: Union[int, Tuple[int, int]] = 1,
  627. groups: int = 1,
  628. conv_mode: str = "cross_correlation",
  629. **kwargs
  630. ):
  631. self.input_height = input_height
  632. self.input_width = input_width
  633. super().__init__(
  634. in_channels,
  635. out_channels,
  636. kernel_size,
  637. stride,
  638. padding,
  639. dilation,
  640. groups,
  641. bias=False,
  642. **kwargs,
  643. )
  644. def _infer_weight_shape(self):
  645. group = self.groups
  646. out_height = (
  647. self.input_height + self.padding[0] * 2 - self.kernel_size[0]
  648. ) // self.stride[0] + 1
  649. out_width = (
  650. self.input_width + self.padding[1] * 2 - self.kernel_size[1]
  651. ) // self.stride[1] + 1
  652. # Assume format is NCHW
  653. return (
  654. group,
  655. out_height,
  656. out_width,
  657. self.in_channels // group,
  658. self.kernel_size[0],
  659. self.kernel_size[1],
  660. self.out_channels // group,
  661. )
  662. def forward(self, inp):
  663. return local_conv2d(
  664. inp,
  665. self.weight,
  666. None,
  667. self.stride,
  668. self.padding,
  669. self.dilation,
  670. self.conv_mode,
  671. )
  672. class ConvRelu2d(Conv2d):
  673. r"""A fused :class:`~.Module` including :class:`~.module.Conv2d` and :func:`~.relu`.
  674. Could be replaced with :class:`~.QATModule` version :class:`~.qat.ConvRelu2d` using :func:`~.quantize.quantize_qat`.
  675. """
  676. def forward(self, inp):
  677. return relu(self.calc_conv(inp, self.weight, self.bias))
  678. class ConvTransposeRelu2d(ConvTranspose2d):
  679. r"""A fused :class:`~.Module` including :class:`~.module.ConvTranspose2d` and :func:`~.relu`.
  680. Could be replaced with :class:`~.QATModule` version :class:`~.qat.ConvTransposeRelu2d` using :func:`~.quantize.quantize_qat`.
  681. """
  682. def forward(self, inp):
  683. return relu(self.calc_conv_transpose2d(inp, self.weight, self.bias))
  684. class DeformableConv2d(_ConvNd):
  685. r"""Deformable Convolution.
  686. Args:
  687. in_channels: number of input channels.
  688. out_channels: number of output channels.
  689. kernel_size: size of weight on spatial dimensions. If kernel_size is
  690. an :class:`int`, the actual kernel size would be
  691. ``(kernel_size, kernel_size)``.
  692. stride: stride of the 2D convolution operation. Default: 1
  693. padding: size of the paddings added to the input on both sides of its
  694. spatial dimensions. Only zero-padding is supported. Default: 0
  695. dilation: dilation of the 2D convolution operation. Default: 1
  696. groups: number of groups into which the input and output channels are divided,
  697. so as to perform a ``grouped convolution``. When ``groups`` is not 1,
  698. ``in_channels`` and ``out_channels`` must be divisible by groups,
  699. and the shape of weight should be ``(groups, out_channel // groups,
  700. in_channels // groups, height, width)``. Default: 1
  701. bias: whether to add a bias onto the result of convolution. Default: True
  702. conv_mode: Supports `cross_correlation`. Default: `cross_correlation`
  703. compute_mode: When set to "default", no special requirements will be
  704. placed on the precision of intermediate results. When set to "float32",
  705. "float32" would be used for accumulator and intermediate result, but only
  706. effective when input and output are of float16 dtype.
  707. Note:
  708. * ``weight`` usually has shape ``(out_channels, in_channels, height, width)`` ,
  709. if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, height, width)``
  710. * ``bias`` usually has shape ``(1, out_channels, *1)``
  711. """
  712. def __init__(
  713. self,
  714. in_channels: int,
  715. out_channels: int,
  716. kernel_size: Union[int, Tuple[int, int]],
  717. stride: Union[int, Tuple[int, int]] = 1,
  718. padding: Union[int, Tuple[int, int]] = 0,
  719. dilation: Union[int, Tuple[int, int]] = 1,
  720. groups: int = 1,
  721. bias: bool = True,
  722. conv_mode: str = "cross_correlation",
  723. compute_mode: str = "default",
  724. **kwargs
  725. ):
  726. kernel_size = _pair_nonzero(kernel_size)
  727. stride = _pair_nonzero(stride)
  728. padding = _pair(padding)
  729. dilation = _pair_nonzero(dilation)
  730. self.conv_mode = conv_mode
  731. self.compute_mode = compute_mode
  732. super().__init__(
  733. in_channels,
  734. out_channels,
  735. kernel_size,
  736. stride,
  737. padding,
  738. 0,
  739. dilation,
  740. groups,
  741. bias,
  742. **kwargs,
  743. )
  744. def _get_fanin(self):
  745. kh, kw = self.kernel_size
  746. ic = self.in_channels
  747. return kh * kw * ic
  748. def _infer_weight_shape(self):
  749. group = self.groups
  750. ichl = self.in_channels
  751. ochl = self.out_channels
  752. kh, kw = self.kernel_size
  753. if group == 1:
  754. # Assume format is NCHW
  755. return (ochl, ichl, kh, kw)
  756. assert (
  757. ichl % group == 0 and ochl % group == 0
  758. ), "invalid config: in_channels={} out_channels={} group={}".format(
  759. ichl, ochl, group
  760. )
  761. # Assume format is NCHW
  762. return (group, ochl // group, ichl // group, kh, kw)
  763. def _infer_bias_shape(self):
  764. # Assume format is NCHW
  765. return (1, self.out_channels, 1, 1)
  766. def calc_conv(self, inp, weight, offset, mask, bias):
  767. return deformable_conv2d(
  768. inp,
  769. weight,
  770. offset,
  771. mask,
  772. bias,
  773. self.stride,
  774. self.padding,
  775. self.dilation,
  776. self.groups,
  777. self.conv_mode,
  778. self.compute_mode,
  779. )
  780. def forward(self, inp, offset, mask):
  781. return self.calc_conv(inp, self.weight, offset, mask, self.bias)
  782. class ConvTranspose3d(_ConvNd):
  783. r"""Applies a 3D transposed convolution over an input tensor.
  784. Only support the case that groups = 1 and conv_mode = "cross_correlation".
  785. :class:`ConvTranspose3d` can be seen as the gradient of :class:`Conv3d` operation
  786. with respect to its input.
  787. Convolution3D usually reduces the size of input, while transposed convolution3d
  788. works the opposite way, transforming a smaller input to a larger output while
  789. preserving the connectivity pattern.
  790. Args:
  791. in_channels: number of input channels.
  792. out_channels: number of output channels.
  793. kernel_size: size of weight on spatial dimensions. If ``kernel_size`` is
  794. an :class:`int`, the actual kernel size would be
  795. ``(kernel_size, kernel_size, kernel_size)``.
  796. stride: stride of the 3D convolution operation. Default: 1
  797. padding: size of the paddings added to the input on all sides of its
  798. spatial dimensions. Only zero-padding is supported. Default: 0
  799. output_padding: size of paddings appended to output. Default: 0
  800. dilation: dilation of the 3D convolution operation. Default: 1
  801. groups: number of groups into which the input and output channels are divided,
  802. so as to perform a ``grouped convolution``. When ``groups`` is not 1,
  803. ``in_channels`` and ``out_channels`` must be divisible by groups,
  804. and the shape of weight should be ``(groups, in_channels // groups,
  805. out_channels // groups, depth, height, width)``. Default: 1
  806. bias: wether to add a bias onto the result of convolution. Default: True
  807. Note:
  808. * ``weight`` usually has shape ``(in_channels, out_channels, depth, height, width)`` .
  809. * ``bias`` usually has shape ``(1, out_channels, *1)``
  810. """
  811. output_padding = 0
  812. def __init__(
  813. self,
  814. in_channels: int,
  815. out_channels: int,
  816. kernel_size: Union[int, Tuple[int, int, int]],
  817. stride: Union[int, Tuple[int, int, int]] = 1,
  818. padding: Union[int, Tuple[int, int, int]] = 0,
  819. output_padding: Union[int, Tuple[int, int, int]] = 0,
  820. dilation: Union[int, Tuple[int, int, int]] = 1,
  821. groups: int = 1,
  822. bias: bool = True,
  823. ):
  824. kernel_size = _triple_nonzero(kernel_size)
  825. stride = _triple_nonzero(stride)
  826. padding = _triple(padding)
  827. dilation = _triple_nonzero(dilation)
  828. super().__init__(
  829. in_channels=in_channels,
  830. out_channels=out_channels,
  831. kernel_size=kernel_size,
  832. stride=stride,
  833. padding=padding,
  834. output_padding=output_padding,
  835. dilation=dilation,
  836. groups=groups,
  837. bias=bias,
  838. )
  839. def _get_fanin(self):
  840. kt, kh, kw = self.kernel_size
  841. ic = self.in_channels
  842. return kt * kh * kw * ic
  843. def _infer_weight_shape(self):
  844. group = self.groups
  845. ichl = self.in_channels
  846. ochl = self.out_channels
  847. kt, kh, kw = self.kernel_size
  848. if group == 1:
  849. # Assume format is NCHW
  850. return (ichl, ochl, kt, kh, kw)
  851. assert (
  852. ichl % group == 0 and ochl % group == 0
  853. ), "invalid config: in_channels={} out_channels={} group={}".format(
  854. ichl, ochl, group
  855. )
  856. # Assume format is NCHW
  857. return (group, ichl // group, ochl // group, kt, kh, kw)
  858. def _infer_bias_shape(self):
  859. # Assume format is NCTHW
  860. return (1, self.out_channels, 1, 1, 1)
  861. def forward(self, inp):
  862. return conv_transpose3d(
  863. inp,
  864. self.weight,
  865. self.bias,
  866. self.stride,
  867. self.padding,
  868. self.output_padding,
  869. self.dilation,
  870. )
  871. class RegionRestrictedConv(_ConvNd):
  872. r"""Applies a 2D RegionRestricted Convolution over an input tensor.
  873. For instance, given an input of the size :math:`(N, C_{\text{in}}, H, W)`,
  874. this layer generates an output of the size
  875. :math:`(N, C_{\text{out}}, H_{\text{out}}, W_{\text{out}})` through the
  876. process described as below:
  877. .. math::
  878. \text{out}(N_i, C_{\text{out}_j}) =
  879. \sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}(C_{\text{out}_j}, k) \star \text{input}(N_i, k)
  880. where :math:`\star` is the valid 2D cross-correlation operator,
  881. :math:`N` is batch size, :math:`C` denotes number of channels,
  882. :math:`H` is height of input planes in pixels, and :math:`W` is
  883. width in pixels.
  884. In general, output feature maps' shapes can be inferred as follows:
  885. input: :math:`(N, C_{\text{in}}, H_{\text{in}}, W_{\text{in}})`
  886. output: :math:`(N, C_{\text{out}}, H_{\text{out}}, W_{\text{out}})` where
  887. .. math::
  888. \text{H}_{out} = \lfloor \frac{\text{H}_{in} + 2 * \text{padding[0]} -
  889. \text{dilation[0]} * (\text{kernel_size[0]} - 1) - 1}{\text{stride[0]}} + 1 \rfloor
  890. .. math::
  891. \text{W}_{out} = \lfloor \frac{\text{W}_{in} + 2 * \text{padding[1]} -
  892. \text{dilation[1]} * (\text{kernel_size[1]} - 1) - 1}{\text{stride[1]}} + 1 \rfloor
  893. When `groups == in_channels` and `out_channels == K * in_channels`,
  894. where K is a positive integer, this operation is also known as depthwise
  895. convolution.
  896. In other words, for an input of size :math:`(N, C_{in}, H_{in}, W_{in})`,
  897. a depthwise convolution with a depthwise multiplier `K`, can be constructed
  898. by arguments :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`.
  899. Args:
  900. in_channels: number of input channels.
  901. out_channels: number of output channels.
  902. kernel_size: size of weight on spatial dimensions. If kernel_size is
  903. an :class:`int`, the actual kernel size would be
  904. ``(kernel_size, kernel_size)``.
  905. stride: stride of the 2D convolution operation. Default: 1
  906. padding: size of the paddings added to the input on both sides of its
  907. spatial dimensions. Default: 0
  908. dilation: dilation of the 2D convolution operation. Default: 1
  909. groups: number of groups into which the input and output channels are divided,
  910. so as to perform a ``grouped convolution``. When ``groups`` is not 1,
  911. ``in_channels`` and ``out_channels`` must be divisible by ``groups``,
  912. and the shape of weight should be ``(groups, out_channel // groups,
  913. in_channels // groups, height, width)``. Default: 1
  914. bias: whether to add a bias onto the result of convolution. Default: True
  915. conv_mode: Supports `cross_correlation`. Default: `cross_correlation`
  916. compute_mode: When set to "default", no special requirements will be
  917. placed on the precision of intermediate results. When set to "float32",
  918. "float32" would be used for accumulator and intermediate result, but only
  919. effective when input and output are of float16 dtype.
  920. padding_mode: "zeros", "reflect" or "replicate". Default: "zeros".
  921. Refer to :class:`~.module.padding.Pad` for more information.
  922. Note:
  923. * weight shape will be ``(groups, out_channels // groups, in_channels // groups, height, width)``,
  924. becasue RegionRestrictedConv support grouped conv only.
  925. Examples:
  926. >>> import numpy as np
  927. >>> import megengine as mge
  928. >>> import megengine.module as M
  929. >>> rrconv = M.RegionRestrictedConv(in_channels=2, out_channels=2, kernel_size=2, groups=2)
  930. >>> inp = mge.tensor(np.random.randn(1, 2, 2, 2).astype(np.float32))
  931. >>> rin = mge.tensor(np.random.randn(1, 2, 2).astype(np.int32))
  932. >>> rout = mge.tensor(np.random.randn(1, 1, 1).astype(np.int32))
  933. >>> oup = rrconv(inp, rin, rout)
  934. >>> oup.numpy().shape
  935. (1, 2, 1, 1)
  936. """
  937. def __init__(
  938. self,
  939. in_channels: int,
  940. out_channels: int,
  941. kernel_size: Union[int, Tuple[int, int]],
  942. groups: int = 1,
  943. bias: bool = True,
  944. stride: Union[int, Tuple[int, int]] = 1,
  945. padding: Union[int, Tuple[int, int]] = 0,
  946. dilation: Union[int, Tuple[int, int]] = 1,
  947. conv_mode: str = "cross_correlation",
  948. compute_mode: str = "default",
  949. padding_mode: str = "zeros",
  950. **kwargs
  951. ):
  952. kernel_size = _pair_nonzero(kernel_size)
  953. stride = _pair_nonzero(stride)
  954. padding = _pair(padding)
  955. dilation = _pair_nonzero(dilation)
  956. self.conv_mode = conv_mode
  957. self.compute_mode = compute_mode
  958. self.padding_mode = padding_mode
  959. super().__init__(
  960. in_channels,
  961. out_channels,
  962. kernel_size,
  963. stride,
  964. padding,
  965. 0,
  966. dilation,
  967. groups,
  968. bias,
  969. **kwargs,
  970. )
  971. def _get_fanin(self):
  972. kh, kw = self.kernel_size
  973. ic = self.in_channels
  974. return kh * kw * ic
  975. def _infer_weight_shape(self):
  976. group = self.groups
  977. ichl = self.in_channels
  978. ochl = self.out_channels
  979. kh, kw = self.kernel_size
  980. assert (
  981. ichl % group == 0 and ochl % group == 0
  982. ), "invalid config: in_channels={} out_channels={} group={}".format(
  983. ichl, ochl, group
  984. )
  985. # Assume format is NCHW
  986. return (group, ochl // group, ichl // group, kh, kw)
  987. def _infer_bias_shape(self):
  988. # Assume format is NCHW
  989. return (1, self.out_channels, 1, 1)
  990. def get_pad_width(self):
  991. return (
  992. (0, 0),
  993. (0, 0),
  994. (self.padding[0], self.padding[0]),
  995. (self.padding[1], self.padding[1]),
  996. )
  997. def calc_conv(self, inp, weight, rin, rout, bias):
  998. assert self.padding_mode in [
  999. "zeros",
  1000. "reflect",
  1001. "replicate",
  1002. ]
  1003. return region_restricted_conv(
  1004. inp,
  1005. weight,
  1006. rin,
  1007. rout,
  1008. bias,
  1009. self.stride,
  1010. self.padding,
  1011. self.dilation,
  1012. self.groups,
  1013. self.conv_mode,
  1014. self.compute_mode,
  1015. )
  1016. def forward(self, inp, rin, rout):
  1017. return self.calc_conv(inp, self.weight, rin, rout, self.bias)