You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convolution.cpp 9.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. /**
  2. * \file dnn/test/naive/convolution.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/naive/fixture.h"
  12. #include "test/common/benchmarker.h"
  13. #include "megdnn/oprs/nn.h"
  14. #include "test/common/checker.h"
  15. #include "test/common/random_state.h"
  16. #include "test/common/convolution.h"
  17. using namespace megdnn;
  18. using namespace test;
  19. #if MEGDNN_WITH_BENCHMARK
  20. TEST_F(NAIVE, BENCHMARK_CONVOLUTION_BACKWARD_DATA) {
  21. using Param = ConvolutionBackwardData::Param;
  22. auto run = [&](const TensorLayoutArray& tensors, Param param) {
  23. Benchmarker<ConvolutionBackwardData> benchmarker_naive(handle());
  24. size_t RUN = 500;
  25. auto tfloat = benchmarker_naive.set_display(false)
  26. .set_dtype(0, dtype::Float32{})
  27. .set_dtype(1, dtype::Float32{})
  28. .set_times(RUN)
  29. .set_param(param)
  30. .exec(tensors);
  31. size_t IC = tensors[0][1];
  32. size_t FH = tensors[0][2];
  33. size_t FW = tensors[0][3];
  34. printf("fp32 flops: %.3f mflops\n",
  35. (IC * tensors[1].total_nr_elems() * FH * FW * 2) /
  36. (tfloat / RUN * 1000));
  37. };
  38. auto profile = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
  39. size_t fh, size_t fw, size_t stride = 1,
  40. size_t padding = 0) {
  41. Param param;
  42. param.pad_h = param.pad_w = padding;
  43. param.stride_h = param.stride_w = stride;
  44. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n",
  45. oc, ic, ow, oh, stride, fh);
  46. TensorLayout diff = TensorLayout{{n, oc, oh, ow}, dtype::Float32()};
  47. TensorLayout filter = TensorLayout{{oc, ic, fh, fw}, dtype::Float32()};
  48. TensorLayout grad;
  49. {
  50. auto opr = handle()->create_operator<ConvolutionBackwardData>();
  51. opr->param() = param;
  52. opr->deduce_layout(filter, diff, grad);
  53. }
  54. run(TensorLayoutArray{filter, diff, grad}, param);
  55. };
  56. profile(1, 1, 2, 2, 1, 3, 3);
  57. profile(1, 1, 4, 4, 1, 3, 3);
  58. profile(1, 1, 8, 8, 1, 3, 3);
  59. profile(1, 1, 16, 16, 1, 3, 3);
  60. profile(1, 1, 32, 32, 1, 3, 3);
  61. profile(1, 1, 64, 64, 1, 3, 3);
  62. profile(1, 1, 128, 128, 1, 3, 3);
  63. }
  64. #endif
  65. TEST_F(NAIVE, CONVOLUTION_QUANTIZED8x8x32) {
  66. Checker<Convolution> checker(handle(), /* check_dispatch */false);
  67. Convolution::Param param;
  68. param.format = Convolution::Param::Format::NCHW;
  69. checker.set_param(param).exect(
  70. Testcase{
  71. TensorValue({1, 1, 4, 4}, dtype::Quantized8Asymm(0.1f, (uint8_t)128),
  72. {90, 136, 85, 204,
  73. 48, 9, 226, 25,
  74. 118, 109, 87, 132,
  75. 104, 163, 25, 90}),
  76. TensorValue({3, 1, 3, 3}, dtype::Quantized8Asymm(0.2f, (uint8_t)124),
  77. {153, 170, 102,
  78. 103, 23, 213,
  79. 116, 195, 191,
  80. 44, 50, 247,
  81. 172, 42, 32,
  82. 233, 163, 247,
  83. 120, 241, 209,
  84. 83, 201, 115,
  85. 32, 140, 147}),
  86. {}},
  87. Testcase{
  88. {},
  89. {},
  90. TensorValue({1, 3, 2, 2}, dtype::QuantizedS32(0.1f * 0.2f),
  91. {18617, -22475,
  92. -15694, -1920,
  93. -12813, 4440,
  94. 18190, -13195,
  95. -9659, 15933,
  96. -5558, -4969})});
  97. }
  98. TEST_F(NAIVE, DECONVOLUTION_QUANTIZED8x8x32) {
  99. Checker<ConvolutionBackwardData> checker(handle(), /* check_dispatch */false);
  100. ConvolutionBackwardData::Param param;
  101. param.format = ConvolutionBackwardData::Param::Format::NCHW;
  102. checker.set_param(param).exect(
  103. Testcase{
  104. TensorValue({1, 3, 3, 3}, dtype::Quantized8Asymm(0.0084f, (uint8_t)135),
  105. {131, 155, 190,
  106. 255, 43, 155,
  107. 97, 238, 127,
  108. 157, 72, 161,
  109. 157, 0, 69,
  110. 204, 167, 180,
  111. 108, 47, 203,
  112. 179, 136, 83,
  113. 143, 182, 105}),
  114. TensorValue({1, 1, 4, 4}, dtype::Quantized8Asymm(0.1f, (uint8_t)157),
  115. {126, 49, 99, 0,
  116. 173, 19, 129, 19,
  117. 161, 180, 32, 255,
  118. 203, 120, 208, 96}),
  119. {}},
  120. Testcase{
  121. {},
  122. {},
  123. TensorValue({1, 3, 6, 6}, dtype::QuantizedS32(0.1f * 0.0084f),
  124. { 124, -188, -3633, -6472, -6330, -8635,
  125. -3784, -9236, 588, -23262, 8984, -10730,
  126. 3082, -17133, 2164, -17515, -8486, 3886,
  127. -312, 10352, -28728, 26413, -23921, -291,
  128. 5368, -9134, 17531, -29535, 17726, -2004,
  129. -1748, 6144, -6117, 7867, -6691, 488,
  130. -682, -423, 4722, -2608, 8383, -4082,
  131. -330, -2235, 23844, 6644, 32989, 6774,
  132. -1699, -13386, 4010, 2932, 3420, 4591,
  133. 2204, -12756, -7098, -4632, -5487, -14264,
  134. 1288, -5309, -4628, -1988, 2380, 8436,
  135. 3174, -1081, 4405, -4242, 343, -2745,
  136. 837, 5644, 8962, 1999, 9872, -10676,
  137. -1796, -2465, 12940, -4544, 13099, -1220,
  138. 348, -9350, -5189, 10252, -21445, 18550,
  139. -938, -2385, -7868, -646, 9788, -5104,
  140. 2056, -1210, -224, -6490, 5643, 232,
  141. 368, 1866, -2711, 3019, -4397, 1830})});
  142. }
  143. TEST_F(NAIVE, CONVOLUTION_WITH_NCHW4) {
  144. Checker<Convolution> checker(handle());
  145. Convolution::Param param;
  146. param.format = Convolution::Param::Format::NCHW4;
  147. auto convert_true_format = [](const TensorLayout& layout) {
  148. if (layout.ndim == 4)
  149. return layout
  150. .reshape(
  151. {layout[0], layout[1] / 4, layout[2], layout[3], 4})
  152. .dimshuffle({0, 1, 4, 2, 3});
  153. else
  154. return layout
  155. .reshape({layout[0], layout[1], layout[2] / 4, layout[3],
  156. layout[4], 4})
  157. .dimshuffle({0, 1, 2, 5, 3, 4});
  158. };
  159. auto extra_impl = [&, this](const TensorNDArray& tensors) {
  160. auto conv = handle()->create_operator<Convolution>();
  161. conv->param() = param;
  162. conv->param().format = Convolution::Param::Format::NCHW;
  163. TensorNDArray nchw_tensors;
  164. for (size_t i = 0; i < tensors.size(); ++i) {
  165. auto layout = tensors[i].layout;
  166. if (layout.ndim == 5) {
  167. layout = layout.reshape({layout[0], layout[1] * layout[4],
  168. layout[2], layout[3]});
  169. } else {
  170. megdnn_assert(layout.ndim == 6 &&
  171. param.sparse == Convolution::Param::Sparse::GROUP);
  172. layout = layout.reshape(
  173. {layout[0], layout[1], layout[2] * layout[5],
  174. layout[3], layout[4]});
  175. }
  176. nchw_tensors.emplace_back(
  177. malloc(layout.span().dist_byte()), layout);
  178. }
  179. TensorNDArray nchw4_tensors;
  180. for (size_t i = 0; i < tensors.size(); ++i) {
  181. auto layout = convert_true_format(nchw_tensors[i].layout);
  182. nchw4_tensors.emplace_back(tensors[i].raw_ptr, std::move(layout));
  183. }
  184. auto workspace_size = conv->get_workspace_in_bytes(
  185. tensors[0].layout, tensors[1].layout, tensors[2].layout);
  186. dt_byte* workspace_ptr = static_cast<dt_byte*>(malloc(workspace_size));
  187. Workspace workspace{workspace_ptr, workspace_size};
  188. auto relayout = handle()->create_operator<RelayoutForward>();
  189. relayout->exec(nchw4_tensors[0], nchw_tensors[0]);
  190. relayout->exec(nchw4_tensors[1], nchw_tensors[1]);
  191. conv->exec(nchw_tensors[0], nchw_tensors[1], nchw_tensors[2],
  192. workspace);
  193. relayout->exec(nchw_tensors[2], nchw4_tensors[2]);
  194. free(workspace_ptr);
  195. for (auto&& tensor : nchw_tensors) {
  196. free(tensor.raw_ptr);
  197. }
  198. };
  199. UniformIntRNG rng{0, 4};
  200. ConstValue filter_rng{1};
  201. checker.set_extra_opr_impl(extra_impl)
  202. .set_rng(0, &filter_rng)
  203. .set_rng(1, &filter_rng);
  204. checker.set_param(param)
  205. .execs({{1, 2, 2, 2, 4}, {4, 2, 1, 1, 4}, {}})
  206. .execs({{20, 3, 30, 30, 4}, {4, 3, 1, 1, 4}, {}})
  207. .execs({{20, 2, 30, 30, 4}, {4, 2, 3, 3, 4}, {}});
  208. param.sparse = Convolution::Param::Sparse::GROUP;
  209. checker.set_param(param)
  210. .execs({{20, 15, 30, 30, 4}, {5, 4, 3, 3, 3, 4}, {}})
  211. .execs({{20, 25, 30, 30, 4}, {5, 4, 5, 1, 1, 4}, {}})
  212. .execs({{20, 27, 30, 30, 4}, {3, 4, 9, 1, 1, 4}, {}});
  213. }
  214. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台