You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

conv_bias.cpp 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. /**
  2. * \file dnn/test/fallback/conv_bias.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/common/conv_bias.h"
  12. #include "megdnn/opr_param_defs.h"
  13. #include "megdnn/oprs.h"
  14. #include "test/common/benchmarker.h"
  15. #include "test/common/checker.h"
  16. #include "test/common/rng.h"
  17. #include "test/common/tensor.h"
  18. #include "test/fallback/fixture.h"
  19. #if MEGDNN_X86
  20. #include "src/x86/utils.h"
  21. #endif
  22. namespace megdnn {
  23. namespace test {
  24. TEST_F(FALLBACK, CONV_BIAS_FORWARD) {
  25. using namespace conv_bias;
  26. std::vector<TestArg> args = get_args();
  27. Checker<ConvBiasForward> checker(handle());
  28. NormalRNG default_rng;
  29. UniformIntRNG int_rng{-50, 50};
  30. param::ConvBias param;
  31. {
  32. param.format = param::ConvBias::Format::NHWC;
  33. auto src_shape = TensorShape{2, 16, 32, 24};
  34. auto filter_shape = TensorShape{4, 3, 3, 24};
  35. auto bias_shape_channel = TensorShape{1, 1, 1, 4};
  36. checker.set_dtype(0, dtype::Float32())
  37. .set_dtype(1, dtype::Float32())
  38. .set_dtype(2, dtype::Float32())
  39. .set_rng(0, &default_rng)
  40. .set_rng(1, &default_rng)
  41. .set_rng(2, &default_rng)
  42. .set_param(param)
  43. .execs({src_shape, filter_shape, bias_shape_channel, {}, {}});
  44. }
  45. checker.set_before_exec_callback(
  46. conv_bias::ConvBiasAlgoChecker<ConvBias>("FALLBACK_NAIVE"));
  47. for (auto&& arg : args) {
  48. checker.set_dtype(0, dtype::Float32())
  49. .set_dtype(1, dtype::Float32())
  50. .set_dtype(2, dtype::Float32())
  51. .set_rng(0, &default_rng)
  52. .set_rng(1, &default_rng)
  53. .set_rng(2, &default_rng)
  54. .set_epsilon(1e-3)
  55. .set_param(arg.param)
  56. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  57. }
  58. {
  59. param.format = param::ConvBias::Format::NCHW;
  60. param.sparse = ConvBias::Param::Sparse::GROUP;
  61. auto src_shape = TensorShape{2, 16, 32, 24};
  62. auto filter_shape = TensorShape{4, 4, 4, 1, 1};
  63. auto bias_shape_channel = TensorShape{1, 16, 1, 1};
  64. auto bias_shape = TensorShape{2, 16, 32, 24};
  65. checker.set_dtype(0, dtype::Float32())
  66. .set_dtype(1, dtype::Float32())
  67. .set_dtype(2, dtype::Float32())
  68. .set_rng(0, &default_rng)
  69. .set_rng(1, &default_rng)
  70. .set_rng(2, &default_rng)
  71. .set_param(param)
  72. .execs({src_shape, filter_shape, bias_shape, {}, {}})
  73. .execs({src_shape, filter_shape, bias_shape_channel, {}, {}});
  74. }
  75. }
  76. std::vector<conv_bias::TestArg> get_conv_bias_args(
  77. std::vector<size_t> kernel, std::vector<size_t> padv,
  78. std::vector<param::ConvBias::NonlineMode> nlmodev,
  79. std::vector<size_t> stridev, bool no_bias, bool only_broadbias) {
  80. using namespace conv_bias;
  81. using Param = param::ConvBias;
  82. using NLMode = param::ConvBias::NonlineMode;
  83. std::vector<TestArg> args;
  84. auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h,
  85. size_t pad, size_t kernel, size_t stride,
  86. NLMode nonlinemode) {
  87. Param param;
  88. param.stride_h = stride;
  89. param.stride_w = stride;
  90. param.pad_h = pad;
  91. param.pad_w = pad;
  92. param.nonlineMode = nonlinemode;
  93. args.emplace_back(param, TensorShape{n, ic, h, w},
  94. TensorShape{oc, ic, kernel, kernel}, TensorShape{});
  95. if (!no_bias) {
  96. args.emplace_back(param, TensorShape{n, ic, h, w},
  97. TensorShape{oc, ic, kernel, kernel},
  98. TensorShape{1, oc, 1, 1});
  99. if (!only_broadbias) {
  100. args.emplace_back(
  101. param, TensorShape{n, ic, h, w},
  102. TensorShape{oc, ic, kernel, kernel},
  103. TensorShape{
  104. n, oc,
  105. (h + 2 * param.pad_h - kernel) / stride + 1,
  106. (w + 2 * param.pad_h - kernel) / stride + 1});
  107. }
  108. }
  109. };
  110. auto pack_group = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h,
  111. size_t pad, size_t kernel, size_t stride,
  112. NLMode nonlinemode) {
  113. Param param;
  114. param.stride_h = stride;
  115. param.stride_w = stride;
  116. param.pad_h = pad;
  117. param.pad_w = pad;
  118. param.nonlineMode = nonlinemode;
  119. param.sparse = param::ConvBias::Sparse::GROUP;
  120. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  121. TensorShape{2, oc, ic, kernel, kernel},
  122. TensorShape{});
  123. if (!no_bias) {
  124. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  125. TensorShape{2, oc, ic, kernel, kernel},
  126. TensorShape{1, oc * 2, 1, 1});
  127. if (!only_broadbias) {
  128. args.emplace_back(
  129. param, TensorShape{n, 2 * ic, h, w},
  130. TensorShape{2, oc, ic, kernel, kernel},
  131. TensorShape{
  132. n, 2 * oc,
  133. (h + 2 * param.pad_h - kernel) / stride + 1,
  134. (w + 2 * param.pad_h - kernel) / stride + 1});
  135. }
  136. }
  137. };
  138. for (size_t n : {1, 2}) {
  139. for (auto nlmode : nlmodev) {
  140. for (auto pad : padv) {
  141. for (auto stride : stridev) {
  142. for (size_t ic : {1, 5}) {
  143. for (size_t oc : {1, 11}) {
  144. for (size_t size : {9, 30}) {
  145. for (size_t kern : kernel) {
  146. pack(n, oc, ic, size + 4, size + 4, pad,
  147. kern, stride, nlmode);
  148. pack_group(n, oc, ic, size, size, pad, kern,
  149. stride, nlmode);
  150. }
  151. }
  152. }
  153. }
  154. }
  155. }
  156. }
  157. }
  158. return args;
  159. }
  160. void checker_conv_bias(std::vector<conv_bias::TestArg> args, Handle* handle,
  161. RNG* rng, float epsilon, DType type0, DType type1,
  162. DType type2, DType type3, const char* algo_name) {
  163. using namespace conv_bias;
  164. Checker<ConvBias> checker(handle);
  165. checker.set_before_exec_callback(
  166. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  167. checker.set_dtype(0, type0);
  168. checker.set_dtype(1, type1);
  169. checker.set_dtype(2, type2);
  170. checker.set_dtype(4, type3);
  171. checker.set_epsilon(epsilon);
  172. if (NULL != rng) {
  173. checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng).set_rng(3, rng);
  174. }
  175. for (auto&& arg : args) {
  176. checker.set_param(arg.param).execs(
  177. {arg.src, arg.filter, arg.bias, {}, {}});
  178. }
  179. }
  180. TEST_F(FALLBACK_MULTI_THREADS, CONV_BIAS_FORWARD_IM2COL_8X8X16) {
  181. using namespace conv_bias;
  182. param::ConvBias cur_param;
  183. using NLMode = param::ConvBias::NonlineMode;
  184. std::vector<conv_bias::TestArg> args = get_conv_bias_args(
  185. {1, 3}, {0}, {NLMode::IDENTITY, NLMode::RELU}, {1}, false, true);
  186. NormalRNG default_rng;
  187. Checker<ConvBias> checker(handle());
  188. checker.set_dtype(0, dtype::Int8{});
  189. checker.set_dtype(1, dtype::Int8{});
  190. checker.set_dtype(2, dtype::Int16{});
  191. checker.set_dtype(4, dtype::Int16{});
  192. for (auto&& arg : args) {
  193. checker.set_param(arg.param).execs(
  194. {arg.src, arg.filter, arg.bias, {}, {}});
  195. }
  196. }
  197. TEST_F(FALLBACK_MULTI_THREADS, CONV_BIAS_FORWARD) {
  198. using namespace conv_bias;
  199. param::ConvBias cur_param;
  200. using NLMode = param::ConvBias::NonlineMode;
  201. std::vector<conv_bias::TestArg> args = get_conv_bias_args(
  202. {1, 3, 5}, {0, 3},
  203. {NLMode::IDENTITY, NLMode::H_SWISH, NLMode::SIGMOID, NLMode::RELU},
  204. {1, 2}, false, false);
  205. NormalRNG default_rng;
  206. checker_conv_bias(args, handle(), &default_rng, 1e-3, dtype::Float32{},
  207. dtype::Float32{}, dtype::Float32{}, dtype::Float32{},
  208. "FALLBACK_NAIVE");
  209. }
  210. TEST_F(FALLBACK_MULTI_THREADS, CONV_BIAS_FORWARD_QUANTIZED) {
  211. using namespace conv_bias;
  212. param::ConvBias cur_param;
  213. using NLMode = param::ConvBias::NonlineMode;
  214. std::vector<conv_bias::TestArg> args = get_conv_bias_args(
  215. {1, 3, 5, 7}, {0, 3},
  216. {NLMode::IDENTITY, NLMode::H_SWISH, NLMode::RELU}, {1, 2}, false,
  217. false);
  218. UniformIntRNG int_rng{-50, 50};
  219. float epsilon = 1e-3;
  220. checker_conv_bias(args, handle(), &int_rng, epsilon,
  221. dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f),
  222. dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f),
  223. "FALLBACK_NAIVE");
  224. }
  225. #if MEGDNN_WITH_BENCHMARK
  226. TEST_F(FALLBACK, BENCHMARK_CONVBIAS) {
  227. constexpr size_t RUNS = 10;
  228. param::ConvBias param;
  229. param.stride_h = 1;
  230. param.stride_w = 1;
  231. Benchmarker<ConvBias> benchmarker_int(handle());
  232. benchmarker_int.set_times(RUNS)
  233. .set_dtype(0, dtype::QuantizedS8(2.5f))
  234. .set_dtype(1, dtype::QuantizedS8(2.5f))
  235. .set_dtype(2, dtype::QuantizedS32(6.25f))
  236. .set_dtype(4, dtype::QuantizedS8(40.25f))
  237. .set_display(false);
  238. Benchmarker<ConvBias> benchmarker_float(handle());
  239. benchmarker_float.set_display(false).set_times(RUNS);
  240. auto run = [&](size_t N, size_t IC, size_t OC, size_t H, size_t W,
  241. size_t FS) {
  242. TensorShape src({N, IC, H, W}), filter({OC, IC, FS, FS}),
  243. bias({N, OC, 1, 1}), z({}), dst({N, OC, H, W});
  244. param.pad_h = FS / 2;
  245. param.pad_w = FS / 2;
  246. auto int_used = benchmarker_int.set_param(param).exec(
  247. {src, filter, bias, z, dst}) /
  248. RUNS;
  249. auto float_used = benchmarker_float.set_param(param).exec(
  250. {src, filter, bias, z, dst}) /
  251. RUNS;
  252. float computations =
  253. IC * (FS * FS + 1) * dst.total_nr_elems() * 2 * 1e-6;
  254. printf("run: %s %s %s->%s \nfloat: %f ms %f Gflops int: %f ms "
  255. "%f Gflops speedup: %f\n",
  256. src.to_string().c_str(), filter.to_string().c_str(),
  257. bias.to_string().c_str(), dst.to_string().c_str(), float_used,
  258. computations / float_used, int_used, computations / int_used,
  259. float_used / int_used);
  260. };
  261. run(1, 128, 128, 32, 32, 3);
  262. for (size_t IC : {32, 64, 128}) {
  263. for (size_t OC : {32, 64, 128}) {
  264. for (size_t size : {28, 56}) {
  265. for (size_t FS : {3, 5}) {
  266. run(1, IC, OC, size, size, FS);
  267. }
  268. }
  269. }
  270. }
  271. }
  272. #endif
  273. } // namespace test
  274. } // namespace megdnn
  275. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台