You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convolution.cpp 14 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. /**
  2. * \file dnn/test/naive/convolution.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/naive/fixture.h"
  12. #include "megdnn/oprs/nn.h"
  13. #include "test/common/benchmarker.h"
  14. #include "test/common/checker.h"
  15. #include "test/common/convolution.h"
  16. #include "test/common/extra_impl_helper.h"
  17. #include "test/common/random_state.h"
  18. using namespace megdnn;
  19. using namespace test;
  20. #if MEGDNN_WITH_BENCHMARK
  21. TEST_F(NAIVE, BENCHMARK_CONVOLUTION_BACKWARD_DATA) {
  22. using Param = ConvolutionBackwardData::Param;
  23. auto run = [&](const TensorLayoutArray& tensors, Param param) {
  24. Benchmarker<ConvolutionBackwardData> benchmarker_naive(handle());
  25. size_t RUN = 500;
  26. auto tfloat = benchmarker_naive.set_display(false)
  27. .set_dtype(0, dtype::Float32{})
  28. .set_dtype(1, dtype::Float32{})
  29. .set_times(RUN)
  30. .set_param(param)
  31. .exec(tensors);
  32. size_t IC = tensors[0][1];
  33. size_t FH = tensors[0][2];
  34. size_t FW = tensors[0][3];
  35. printf("fp32 flops: %.3f mflops\n",
  36. (IC * tensors[1].total_nr_elems() * FH * FW * 2) /
  37. (tfloat / RUN * 1000));
  38. };
  39. auto profile = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  40. size_t fw, size_t stride = 1, size_t padding = 0) {
  41. Param param;
  42. param.pad_h = param.pad_w = padding;
  43. param.stride_h = param.stride_w = stride;
  44. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n", oc, ic,
  45. ow, oh, stride, fh);
  46. TensorLayout diff = TensorLayout{{n, oc, oh, ow}, dtype::Float32()};
  47. TensorLayout filter = TensorLayout{{oc, ic, fh, fw}, dtype::Float32()};
  48. TensorLayout grad;
  49. {
  50. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  51. opr->param() = param;
  52. opr->deduce_layout(filter, diff, grad);
  53. }
  54. run(TensorLayoutArray{filter, diff, grad}, param);
  55. };
  56. profile(1, 1, 2, 2, 1, 3, 3);
  57. profile(1, 1, 4, 4, 1, 3, 3);
  58. profile(1, 1, 8, 8, 1, 3, 3);
  59. profile(1, 1, 16, 16, 1, 3, 3);
  60. profile(1, 1, 32, 32, 1, 3, 3);
  61. profile(1, 1, 64, 64, 1, 3, 3);
  62. profile(1, 1, 128, 128, 1, 3, 3);
  63. }
  64. #endif
  65. TEST_F(NAIVE, CONVOLUTION_QUANTIZED8x8x32) {
  66. Checker<Convolution> checker(handle(), /* check_dispatch */ false);
  67. Convolution::Param param;
  68. param.format = Convolution::Param::Format::NCHW;
  69. checker.set_param(param).exect(
  70. Testcase{
  71. TensorValue(
  72. {1, 1, 4, 4}, dtype::Quantized8Asymm(0.1f, (uint8_t)128),
  73. {90, 136, 85, 204, 48, 9, 226, 25, 118, 109, 87, 132, 104,
  74. 163, 25, 90}),
  75. TensorValue(
  76. {3, 1, 3, 3}, dtype::Quantized8Asymm(0.2f, (uint8_t)124),
  77. {153, 170, 102, 103, 23, 213, 116, 195, 191,
  78. 44, 50, 247, 172, 42, 32, 233, 163, 247,
  79. 120, 241, 209, 83, 201, 115, 32, 140, 147}),
  80. {}},
  81. Testcase{
  82. {},
  83. {},
  84. TensorValue(
  85. {1, 3, 2, 2}, dtype::QuantizedS32(0.1f * 0.2f),
  86. {18617, -22475, -15694, -1920,
  87. -12813, 4440, 18190, -13195,
  88. -9659, 15933, -5558, -4969})});
  89. }
  90. TEST_F(NAIVE, DECONVOLUTION_QUANTIZED8x8x32) {
  91. Checker<ConvolutionBackwardData> checker(handle(), /* check_dispatch */ false);
  92. ConvolutionBackwardData::Param param;
  93. param.format = ConvolutionBackwardData::Param::Format::NCHW;
  94. checker.set_param(param).exect(
  95. Testcase{
  96. TensorValue(
  97. {1, 3, 3, 3}, dtype::Quantized8Asymm(0.0084f, (uint8_t)135),
  98. {131, 155, 190, 255, 43, 155, 97, 238, 127,
  99. 157, 72, 161, 157, 0, 69, 204, 167, 180,
  100. 108, 47, 203, 179, 136, 83, 143, 182, 105}),
  101. TensorValue(
  102. {1, 1, 4, 4}, dtype::Quantized8Asymm(0.1f, (uint8_t)157),
  103. {126, 49, 99, 0, 173, 19, 129, 19, 161, 180, 32, 255, 203,
  104. 120, 208, 96}),
  105. {}},
  106. Testcase{
  107. {},
  108. {},
  109. TensorValue(
  110. {1, 3, 6, 6}, dtype::QuantizedS32(0.1f * 0.0084f),
  111. {124, -188, -3633, -6472, -6330, -8635,
  112. -3784, -9236, 588, -23262, 8984, -10730,
  113. 3082, -17133, 2164, -17515, -8486, 3886,
  114. -312, 10352, -28728, 26413, -23921, -291,
  115. 5368, -9134, 17531, -29535, 17726, -2004,
  116. -1748, 6144, -6117, 7867, -6691, 488,
  117. -682, -423, 4722, -2608, 8383, -4082,
  118. -330, -2235, 23844, 6644, 32989, 6774,
  119. -1699, -13386, 4010, 2932, 3420, 4591,
  120. 2204, -12756, -7098, -4632, -5487, -14264,
  121. 1288, -5309, -4628, -1988, 2380, 8436,
  122. 3174, -1081, 4405, -4242, 343, -2745,
  123. 837, 5644, 8962, 1999, 9872, -10676,
  124. -1796, -2465, 12940, -4544, 13099, -1220,
  125. 348, -9350, -5189, 10252, -21445, 18550,
  126. -938, -2385, -7868, -646, 9788, -5104,
  127. 2056, -1210, -224, -6490, 5643, 232,
  128. 368, 1866, -2711, 3019, -4397, 1830})});
  129. }
  130. TEST_F(NAIVE, CONVOLUTION_WITH_NCHW4) {
  131. Checker<Convolution> checker(handle());
  132. Convolution::Param param;
  133. param.format = Convolution::Param::Format::NCHW4;
  134. auto convert_true_format = [](const TensorLayout& layout) {
  135. if (layout.ndim == 4)
  136. return layout.reshape({layout[0], layout[1] / 4, layout[2], layout[3], 4})
  137. .dimshuffle({0, 1, 4, 2, 3});
  138. else
  139. return layout
  140. .reshape(
  141. {layout[0], layout[1], layout[2] / 4, layout[3], layout[4],
  142. 4})
  143. .dimshuffle({0, 1, 2, 5, 3, 4});
  144. };
  145. auto extra_impl = [&, this](const TensorNDArray& tensors) {
  146. auto conv = handle()->create_operator<Convolution>();
  147. conv->param() = param;
  148. conv->param().format = Convolution::Param::Format::NCHW;
  149. TensorNDArray nchw_tensors;
  150. for (size_t i = 0; i < tensors.size(); ++i) {
  151. auto layout = tensors[i].layout;
  152. if (layout.ndim == 5) {
  153. layout = layout.reshape(
  154. {layout[0], layout[1] * layout[4], layout[2], layout[3]});
  155. } else {
  156. megdnn_assert(
  157. layout.ndim == 6 &&
  158. param.sparse == Convolution::Param::Sparse::GROUP);
  159. layout = layout.reshape(
  160. {layout[0], layout[1], layout[2] * layout[5], layout[3],
  161. layout[4]});
  162. }
  163. nchw_tensors.emplace_back(malloc(layout.span().dist_byte()), layout);
  164. }
  165. TensorNDArray nchw4_tensors;
  166. for (size_t i = 0; i < tensors.size(); ++i) {
  167. auto layout = convert_true_format(nchw_tensors[i].layout);
  168. nchw4_tensors.emplace_back(tensors[i].raw_ptr(), std::move(layout));
  169. }
  170. auto workspace_size = conv->get_workspace_in_bytes(
  171. tensors[0].layout, tensors[1].layout, tensors[2].layout, nullptr);
  172. dt_byte* workspace_ptr = static_cast<dt_byte*>(malloc(workspace_size));
  173. Workspace workspace{workspace_ptr, workspace_size};
  174. auto relayout = handle()->create_operator<RelayoutForward>();
  175. relayout->exec(nchw4_tensors[0], nchw_tensors[0]);
  176. relayout->exec(nchw4_tensors[1], nchw_tensors[1]);
  177. conv->exec(
  178. nchw_tensors[0], nchw_tensors[1], nchw_tensors[2], nullptr, workspace);
  179. relayout->exec(nchw_tensors[2], nchw4_tensors[2]);
  180. free(workspace_ptr);
  181. for (auto&& tensor : nchw_tensors) {
  182. free(tensor.raw_ptr());
  183. }
  184. };
  185. UniformIntRNG rng{0, 4};
  186. ConstValue filter_rng{1};
  187. checker.set_extra_opr_impl(extra_impl)
  188. .set_rng(0, &filter_rng)
  189. .set_rng(1, &filter_rng);
  190. checker.set_param(param)
  191. .execs({{1, 2, 2, 2, 4}, {4, 2, 1, 1, 4}, {}})
  192. .execs({{20, 3, 30, 30, 4}, {4, 3, 1, 1, 4}, {}})
  193. .execs({{20, 2, 30, 30, 4}, {4, 2, 3, 3, 4}, {}});
  194. param.sparse = Convolution::Param::Sparse::GROUP;
  195. checker.set_param(param)
  196. .execs({{20, 15, 30, 30, 4}, {5, 4, 3, 3, 3, 4}, {}})
  197. .execs({{20, 25, 30, 30, 4}, {5, 4, 5, 1, 1, 4}, {}})
  198. .execs({{20, 27, 30, 30, 4}, {3, 4, 9, 1, 1, 4}, {}});
  199. }
  200. TEST_F(NAIVE, CONVOLUTION_BFLOAT16) {
  201. Checker<Convolution> checker(handle(), false);
  202. using Param = Convolution::Param;
  203. Param param;
  204. param.sparse = param::Convolution::Sparse::DENSE;
  205. Param impl_param = param;
  206. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc, size_t fh,
  207. size_t fw) {
  208. float scale = 1.0f / sqrt(ic * fh * fw);
  209. UniformFloatRNG rng(scale, 2 * scale);
  210. param.pad_h = param.pad_w = 1;
  211. param.stride_h = param.stride_w = 1;
  212. impl_param.pad_h = impl_param.pad_w = 1;
  213. impl_param.stride_h = impl_param.stride_w = 1;
  214. auto extra_impl = extra_impl_helper<Convolution>(handle(), impl_param);
  215. for (auto cmode : std::vector<Param::ComputeMode>{
  216. Param::ComputeMode::DEFAULT, Param::ComputeMode::FLOAT32}) {
  217. param.compute_mode = cmode;
  218. checker.set_param(param)
  219. .set_dtype(0, dtype::BFloat16())
  220. .set_dtype(1, dtype::BFloat16())
  221. // Use inferred output dtype.
  222. .set_dtype(2, {})
  223. .set_rng(0, &rng)
  224. .set_rng(1, &rng)
  225. .set_extra_opr_impl(extra_impl)
  226. .set_epsilon(1e-1)
  227. .execs({{n, ic, ih, iw}, {oc, ic, fh, fw}, {}});
  228. }
  229. };
  230. run(1, 1, 20, 20, 5, 3, 3);
  231. run(1, 2, 8, 7, 11, 3, 1);
  232. }
  233. TEST_F(NAIVE, CONVOLUTION_BACKWARD_DATA_BFLOAT16) {
  234. Checker<ConvolutionBackwardData> checker(handle(), false);
  235. using Param = ConvolutionBackwardData::Param;
  236. Param param, impl_param;
  237. param.sparse = Param::Sparse::DENSE;
  238. auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, size_t fh,
  239. size_t fw, size_t stride, size_t padding,
  240. const Param::ComputeMode& cmode = Param::ComputeMode::DEFAULT) {
  241. param.pad_h = param.pad_w = padding;
  242. param.stride_h = param.stride_w = stride;
  243. param.dilate_h = param.dilate_w = 1;
  244. param.compute_mode = cmode;
  245. TensorLayout diff = TensorLayout{{n, oc, oh, ow}, dtype::BFloat16()};
  246. TensorLayout grad;
  247. TensorLayout filter;
  248. filter = {{oc, ic, fh, fw}, dtype::BFloat16()};
  249. // TensorLayout grad;
  250. {
  251. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  252. opr->param() = param;
  253. opr->deduce_layout(filter, diff, grad);
  254. }
  255. impl_param = param;
  256. impl_param.compute_mode = Param::ComputeMode::DEFAULT;
  257. auto extra_impl =
  258. extra_impl_helper<ConvolutionBackwardData>(handle(), impl_param);
  259. checker.set_param(param)
  260. .set_extra_opr_impl(extra_impl)
  261. .set_epsilon(1e-1)
  262. .set_dtype(0, dtype::BFloat16())
  263. .set_dtype(1, dtype::BFloat16());
  264. checker.exec(TensorLayoutArray{filter, diff, grad});
  265. };
  266. run(4, 3, 10, 13, 5, 1, 1, 1, 0);
  267. run(2, 1, 24, 43, 11, 3, 3, 2, 1, Param::ComputeMode::FLOAT32);
  268. }
  269. TEST_F(NAIVE, CONVOLUTION_BACKWARD_FILTER_BFLOAT16) {
  270. using namespace convolution;
  271. Checker<ConvolutionBackwardFilter> checker(handle(), false);
  272. using Param = ConvolutionBackwardFilter::Param;
  273. Param param;
  274. Param impl_param = param;
  275. auto run = [&](size_t n, size_t ic, size_t ih, size_t iw, size_t oc, size_t fh,
  276. size_t fw,
  277. const Param::ComputeMode& cmode = Param::ComputeMode::DEFAULT) {
  278. auto src = TensorLayout({n, ic, ih, iw}, dtype::BFloat16());
  279. auto filter = TensorLayout({oc, ic, fh, fw}, dtype::BFloat16());
  280. TensorLayout dst;
  281. {
  282. auto opr = handle()->create_operator<Convolution>();
  283. opr->param() = param;
  284. opr->deduce_layout(src, filter, dst);
  285. }
  286. float scale = 1.0f / sqrt(dst[2] * dst[3]);
  287. UniformFloatRNG rng(scale, 2 * scale);
  288. src.dtype = dst.dtype = filter.dtype = dtype::BFloat16();
  289. param.compute_mode = cmode;
  290. impl_param = param;
  291. impl_param.compute_mode = Param::ComputeMode::DEFAULT;
  292. auto extra_impl =
  293. extra_impl_helper<ConvolutionBackwardFilter>(handle(), impl_param);
  294. checker.set_rng(0, &rng)
  295. .set_rng(1, &rng)
  296. .set_dtype(0, dtype::BFloat16())
  297. .set_dtype(1, dtype::BFloat16())
  298. .set_epsilon(1e-1)
  299. .set_extra_opr_impl(extra_impl)
  300. .set_param(param)
  301. .exec(TensorLayoutArray{src, dst, filter});
  302. };
  303. run(1, 2, 8, 7, 11, 3, 1);
  304. run(1, 1, 20, 20, 5, 3, 3, Param::ComputeMode::FLOAT32);
  305. }
  306. // vim: syntax=cpp.doxygen