You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

conv_bias.cpp 13 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. /**
  2. * \file dnn/test/fallback/conv_bias.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/common/conv_bias.h"
  12. #include "megdnn/opr_param_defs.h"
  13. #include "megdnn/oprs.h"
  14. #include "test/common/benchmarker.h"
  15. #include "test/common/checker.h"
  16. #include "test/common/rng.h"
  17. #include "test/common/task_record_check.h"
  18. #include "test/common/tensor.h"
  19. #include "test/fallback/fixture.h"
  20. #if MEGDNN_X86
  21. #include "src/x86/utils.h"
  22. #endif
  23. namespace megdnn {
  24. namespace test {
  25. TEST_F(FALLBACK, CONV_BIAS_FORWARD) {
  26. using namespace conv_bias;
  27. std::vector<TestArg> args = get_args();
  28. Checker<ConvBiasForward> checker(handle());
  29. NormalRNG default_rng;
  30. UniformIntRNG int_rng{-50, 50};
  31. param::ConvBias param;
  32. {
  33. param.format = param::ConvBias::Format::NHWC;
  34. auto src_shape = TensorShape{2, 16, 32, 24};
  35. auto filter_shape = TensorShape{4, 3, 3, 24};
  36. auto bias_shape_channel = TensorShape{1, 1, 1, 4};
  37. checker.set_dtype(0, dtype::Float32())
  38. .set_dtype(1, dtype::Float32())
  39. .set_dtype(2, dtype::Float32())
  40. .set_rng(0, &default_rng)
  41. .set_rng(1, &default_rng)
  42. .set_rng(2, &default_rng)
  43. .set_param(param)
  44. .execs({src_shape, filter_shape, bias_shape_channel, {}, {}});
  45. }
  46. checker.set_before_exec_callback(
  47. conv_bias::ConvBiasAlgoChecker<ConvBias>("FALLBACK_NAIVE"));
  48. for (auto&& arg : args) {
  49. checker.set_dtype(0, dtype::Float32())
  50. .set_dtype(1, dtype::Float32())
  51. .set_dtype(2, dtype::Float32())
  52. .set_rng(0, &default_rng)
  53. .set_rng(1, &default_rng)
  54. .set_rng(2, &default_rng)
  55. .set_epsilon(1e-3)
  56. .set_param(arg.param)
  57. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  58. }
  59. {
  60. param.format = param::ConvBias::Format::NCHW;
  61. param.sparse = ConvBias::Param::Sparse::GROUP;
  62. auto src_shape = TensorShape{2, 16, 32, 24};
  63. auto filter_shape = TensorShape{4, 4, 4, 1, 1};
  64. auto bias_shape_channel = TensorShape{1, 16, 1, 1};
  65. auto bias_shape = TensorShape{2, 16, 32, 24};
  66. checker.set_dtype(0, dtype::Float32())
  67. .set_dtype(1, dtype::Float32())
  68. .set_dtype(2, dtype::Float32())
  69. .set_rng(0, &default_rng)
  70. .set_rng(1, &default_rng)
  71. .set_rng(2, &default_rng)
  72. .set_param(param)
  73. .execs({src_shape, filter_shape, bias_shape, {}, {}})
  74. .execs({src_shape, filter_shape, bias_shape_channel, {}, {}});
  75. }
  76. }
  77. TEST_F(FALLBACK, CONV_BIAS_FORWARD_RECORD) {
  78. using namespace conv_bias;
  79. TaskRecordChecker<ConvBiasForward> checker(1);
  80. NormalRNG default_rng;
  81. UniformIntRNG int_rng{-50, 50};
  82. param::ConvBias param;
  83. {
  84. param.format = param::ConvBias::Format::NHWC;
  85. auto src_shape = TensorShape{2, 16, 32, 24};
  86. auto filter_shape = TensorShape{4, 3, 3, 24};
  87. auto bias_shape_channel = TensorShape{1, 1, 1, 4};
  88. checker.set_dtype(0, dtype::Float32())
  89. .set_dtype(1, dtype::Float32())
  90. .set_dtype(2, dtype::Float32())
  91. .set_rng(0, &default_rng)
  92. .set_rng(1, &default_rng)
  93. .set_rng(2, &default_rng)
  94. .set_param(param)
  95. .execs({src_shape, filter_shape, bias_shape_channel, {}, {}});
  96. }
  97. {
  98. param.format = param::ConvBias::Format::NCHW;
  99. param.sparse = ConvBias::Param::Sparse::GROUP;
  100. auto src_shape = TensorShape{2, 16, 32, 24};
  101. auto filter_shape = TensorShape{4, 4, 4, 1, 1};
  102. auto bias_shape_channel = TensorShape{1, 16, 1, 1};
  103. auto bias_shape = TensorShape{2, 16, 32, 24};
  104. checker.set_dtype(0, dtype::Float32())
  105. .set_dtype(1, dtype::Float32())
  106. .set_dtype(2, dtype::Float32())
  107. .set_rng(0, &default_rng)
  108. .set_rng(1, &default_rng)
  109. .set_rng(2, &default_rng)
  110. .set_param(param)
  111. .execs({src_shape, filter_shape, bias_shape, {}, {}})
  112. .execs({src_shape, filter_shape, bias_shape_channel, {}, {}});
  113. }
  114. }
  115. std::vector<conv_bias::TestArg> get_conv_bias_args(
  116. std::vector<size_t> kernel, std::vector<size_t> padv,
  117. std::vector<param::ConvBias::NonlineMode> nlmodev, std::vector<size_t> stridev,
  118. bool no_bias, bool only_broadbias) {
  119. using namespace conv_bias;
  120. using Param = param::ConvBias;
  121. using NLMode = param::ConvBias::NonlineMode;
  122. std::vector<TestArg> args;
  123. auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h, size_t pad,
  124. size_t kernel, size_t stride, NLMode nonlinemode) {
  125. Param param;
  126. param.stride_h = stride;
  127. param.stride_w = stride;
  128. param.pad_h = pad;
  129. param.pad_w = pad;
  130. param.nonlineMode = nonlinemode;
  131. args.emplace_back(
  132. param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, kernel, kernel},
  133. TensorShape{});
  134. if (!no_bias) {
  135. args.emplace_back(
  136. param, TensorShape{n, ic, h, w},
  137. TensorShape{oc, ic, kernel, kernel}, TensorShape{1, oc, 1, 1});
  138. if (!only_broadbias) {
  139. args.emplace_back(
  140. param, TensorShape{n, ic, h, w},
  141. TensorShape{oc, ic, kernel, kernel},
  142. TensorShape{
  143. n, oc, (h + 2 * param.pad_h - kernel) / stride + 1,
  144. (w + 2 * param.pad_h - kernel) / stride + 1});
  145. }
  146. }
  147. };
  148. auto pack_group = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h,
  149. size_t pad, size_t kernel, size_t stride,
  150. NLMode nonlinemode) {
  151. Param param;
  152. param.stride_h = stride;
  153. param.stride_w = stride;
  154. param.pad_h = pad;
  155. param.pad_w = pad;
  156. param.nonlineMode = nonlinemode;
  157. param.sparse = param::ConvBias::Sparse::GROUP;
  158. args.emplace_back(
  159. param, TensorShape{n, 2 * ic, h, w},
  160. TensorShape{2, oc, ic, kernel, kernel}, TensorShape{});
  161. if (!no_bias) {
  162. args.emplace_back(
  163. param, TensorShape{n, 2 * ic, h, w},
  164. TensorShape{2, oc, ic, kernel, kernel},
  165. TensorShape{1, oc * 2, 1, 1});
  166. if (!only_broadbias) {
  167. args.emplace_back(
  168. param, TensorShape{n, 2 * ic, h, w},
  169. TensorShape{2, oc, ic, kernel, kernel},
  170. TensorShape{
  171. n, 2 * oc, (h + 2 * param.pad_h - kernel) / stride + 1,
  172. (w + 2 * param.pad_h - kernel) / stride + 1});
  173. }
  174. }
  175. };
  176. for (size_t n : {1, 2}) {
  177. for (auto nlmode : nlmodev) {
  178. for (auto pad : padv) {
  179. for (auto stride : stridev) {
  180. for (size_t ic : {1, 5}) {
  181. for (size_t oc : {1, 11}) {
  182. for (size_t size : {9, 30}) {
  183. for (size_t kern : kernel) {
  184. pack(n, oc, ic, size + 4, size + 4, pad, kern,
  185. stride, nlmode);
  186. pack_group(
  187. n, oc, ic, size, size, pad, kern, stride,
  188. nlmode);
  189. }
  190. }
  191. }
  192. }
  193. }
  194. }
  195. }
  196. }
  197. return args;
  198. }
  199. void checker_conv_bias(
  200. std::vector<conv_bias::TestArg> args, Handle* handle, RNG* rng, float epsilon,
  201. DType type0, DType type1, DType type2, DType type3, const char* algo_name) {
  202. using namespace conv_bias;
  203. Checker<ConvBias> checker(handle);
  204. checker.set_before_exec_callback(
  205. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  206. checker.set_dtype(0, type0);
  207. checker.set_dtype(1, type1);
  208. checker.set_dtype(2, type2);
  209. checker.set_dtype(4, type3);
  210. checker.set_epsilon(epsilon);
  211. if (NULL != rng) {
  212. checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng).set_rng(3, rng);
  213. }
  214. for (auto&& arg : args) {
  215. checker.set_param(arg.param).execs({arg.src, arg.filter, arg.bias, {}, {}});
  216. }
  217. }
  218. TEST_F(FALLBACK_MULTI_THREADS, CONV_BIAS_FORWARD_IM2COL_8X8X16) {
  219. using namespace conv_bias;
  220. param::ConvBias cur_param;
  221. using NLMode = param::ConvBias::NonlineMode;
  222. std::vector<conv_bias::TestArg> args = get_conv_bias_args(
  223. {1, 3}, {0}, {NLMode::IDENTITY, NLMode::RELU}, {1}, false, true);
  224. NormalRNG default_rng;
  225. Checker<ConvBias> checker(handle());
  226. checker.set_dtype(0, dtype::Int8{});
  227. checker.set_dtype(1, dtype::Int8{});
  228. checker.set_dtype(2, dtype::Int16{});
  229. checker.set_dtype(4, dtype::Int16{});
  230. for (auto&& arg : args) {
  231. checker.set_param(arg.param).execs({arg.src, arg.filter, arg.bias, {}, {}});
  232. }
  233. }
  234. TEST_F(FALLBACK_MULTI_THREADS, CONV_BIAS_FORWARD) {
  235. using namespace conv_bias;
  236. param::ConvBias cur_param;
  237. using NLMode = param::ConvBias::NonlineMode;
  238. std::vector<conv_bias::TestArg> args = get_conv_bias_args(
  239. {1, 3, 5}, {0, 3},
  240. {NLMode::IDENTITY, NLMode::H_SWISH, NLMode::SIGMOID, NLMode::RELU}, {1, 2},
  241. false, false);
  242. NormalRNG default_rng;
  243. checker_conv_bias(
  244. args, handle(), &default_rng, 1e-3, dtype::Float32{}, dtype::Float32{},
  245. dtype::Float32{}, dtype::Float32{}, "FALLBACK_NAIVE");
  246. }
  247. TEST_F(FALLBACK_MULTI_THREADS, CONV_BIAS_FORWARD_QUANTIZED) {
  248. using namespace conv_bias;
  249. param::ConvBias cur_param;
  250. using NLMode = param::ConvBias::NonlineMode;
  251. std::vector<conv_bias::TestArg> args = get_conv_bias_args(
  252. {1, 3, 5, 7}, {0, 3}, {NLMode::IDENTITY, NLMode::H_SWISH, NLMode::RELU},
  253. {1, 2}, false, false);
  254. UniformIntRNG int_rng{-50, 50};
  255. float epsilon = 1e-3;
  256. checker_conv_bias(
  257. args, handle(), &int_rng, epsilon, dtype::QuantizedS8(2.5f),
  258. dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f),
  259. dtype::QuantizedS8(60.25f), "FALLBACK_NAIVE");
  260. }
  261. #if MEGDNN_WITH_BENCHMARK
  262. TEST_F(FALLBACK, BENCHMARK_CONVBIAS) {
  263. constexpr size_t RUNS = 10;
  264. param::ConvBias param;
  265. param.stride_h = 1;
  266. param.stride_w = 1;
  267. Benchmarker<ConvBias> benchmarker_int(handle());
  268. benchmarker_int.set_times(RUNS)
  269. .set_dtype(0, dtype::QuantizedS8(2.5f))
  270. .set_dtype(1, dtype::QuantizedS8(2.5f))
  271. .set_dtype(2, dtype::QuantizedS32(6.25f))
  272. .set_dtype(4, dtype::QuantizedS8(40.25f))
  273. .set_display(false);
  274. Benchmarker<ConvBias> benchmarker_float(handle());
  275. benchmarker_float.set_display(false).set_times(RUNS);
  276. auto run = [&](size_t N, size_t IC, size_t OC, size_t H, size_t W, size_t FS) {
  277. TensorShape src({N, IC, H, W}), filter({OC, IC, FS, FS}), bias({N, OC, 1, 1}),
  278. z({}), dst({N, OC, H, W});
  279. param.pad_h = FS / 2;
  280. param.pad_w = FS / 2;
  281. auto int_used =
  282. benchmarker_int.set_param(param).exec({src, filter, bias, z, dst}) /
  283. RUNS;
  284. auto float_used =
  285. benchmarker_float.set_param(param).exec({src, filter, bias, z, dst}) /
  286. RUNS;
  287. float computations = IC * (FS * FS + 1) * dst.total_nr_elems() * 2 * 1e-6;
  288. printf("run: %s %s %s->%s \nfloat: %f ms %f Gflops int: %f ms "
  289. "%f Gflops speedup: %f\n",
  290. src.to_string().c_str(), filter.to_string().c_str(),
  291. bias.to_string().c_str(), dst.to_string().c_str(), float_used,
  292. computations / float_used, int_used, computations / int_used,
  293. float_used / int_used);
  294. };
  295. run(1, 128, 128, 32, 32, 3);
  296. for (size_t IC : {32, 64, 128}) {
  297. for (size_t OC : {32, 64, 128}) {
  298. for (size_t size : {28, 56}) {
  299. for (size_t FS : {3, 5}) {
  300. run(1, IC, OC, size, size, FS);
  301. }
  302. }
  303. }
  304. }
  305. }
  306. #endif
  307. } // namespace test
  308. } // namespace megdnn
  309. // vim: syntax=cpp.doxygen