You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

conv_bias_multi_thread.cpp 48 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206
  1. /**
  2. * \file dnn/test/arm_common/conv_bias_multi_thread.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "megdnn/dtype.h"
  13. #include "test/arm_common/fixture.h"
  14. #include "test/common/benchmarker.h"
  15. #include "test/common/conv_bias.h"
  16. #include "test/arm_common/cpuinfo_help.h"
  17. using namespace megdnn;
  18. using namespace test;
  19. using namespace conv_bias;
  20. std::vector<conv_bias::TestArg> get_int8_quint8_conv_bias_args(
  21. std::vector<size_t> kernel, size_t stride, bool no_pad, bool no_bias,
  22. bool no_nonlinemode) {
  23. using namespace conv_bias;
  24. using Param = param::ConvBias;
  25. using NLMode = param::ConvBias::NonlineMode;
  26. std::vector<TestArg> args;
  27. auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h,
  28. size_t kernel, size_t stride, NLMode nlmode) {
  29. Param param;
  30. param.stride_h = stride;
  31. param.stride_w = stride;
  32. if (!no_pad) {
  33. param.pad_h = kernel / 2;
  34. param.pad_w = kernel / 2;
  35. } else {
  36. param.pad_h = 0;
  37. param.pad_w = 0;
  38. }
  39. param.nonlineMode = nlmode;
  40. args.emplace_back(param, TensorShape{n, ic, h, w},
  41. TensorShape{oc, ic, kernel, kernel}, TensorShape{});
  42. if (!no_bias) {
  43. args.emplace_back(param, TensorShape{n, ic, h, w},
  44. TensorShape{oc, ic, kernel, kernel},
  45. TensorShape{1, oc, 1, 1});
  46. }
  47. };
  48. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  49. if (!no_nonlinemode) {
  50. nonlinemode.emplace_back(NLMode::RELU);
  51. nonlinemode.emplace_back(NLMode::H_SWISH);
  52. }
  53. for (size_t n : {1, 2}) {
  54. for (auto nlmode : nonlinemode) {
  55. for (size_t ic : {1, 3, 7}) {
  56. for (size_t oc : {1, 3, 7}) {
  57. for (size_t size : {4, 6, 8, 14, 16, 18}) {
  58. for (size_t kern : kernel) {
  59. pack(n, oc, ic, size, size, kern, stride, nlmode);
  60. }
  61. }
  62. }
  63. }
  64. }
  65. }
  66. return args;
  67. }
  68. std::vector<conv_bias::TestArg> get_nchw44_channel_wise_args(
  69. std::vector<size_t> kernel, size_t stride, bool no_bias,
  70. bool no_nonlinemode, bool no_full_bias) {
  71. using namespace conv_bias;
  72. using Param = param::ConvBias;
  73. using NLMode = param::ConvBias::NonlineMode;
  74. std::vector<TestArg> args;
  75. auto pack = [&](size_t n, size_t group, size_t w, size_t h, size_t kernel,
  76. size_t stride, NLMode nlmode, bool pad) {
  77. Param param;
  78. param.stride_h = stride;
  79. param.stride_w = stride;
  80. if (pad) {
  81. param.pad_h = kernel / 2;
  82. param.pad_w = kernel / 2;
  83. } else {
  84. param.pad_h = 0;
  85. param.pad_w = 0;
  86. }
  87. param.nonlineMode = nlmode;
  88. param.format = param::ConvBias::Format::NCHW44;
  89. param.sparse = param::ConvBias::Sparse::GROUP;
  90. args.emplace_back(param, TensorShape{n, group, h, w, 4},
  91. TensorShape{group, 1, 1, kernel, kernel, 4},
  92. TensorShape{});
  93. if (!no_bias) {
  94. args.emplace_back(param, TensorShape{n, group, h, w, 4},
  95. TensorShape{group, 1, 1, kernel, kernel, 4},
  96. TensorShape{1, group, 1, 1, 4});
  97. }
  98. if (!no_full_bias) {
  99. args.emplace_back(
  100. param, TensorShape{n, group, h, w, 4},
  101. TensorShape{group, 1, 1, kernel, kernel, 4},
  102. TensorShape{n, group,
  103. (h + 2 * param.pad_w - kernel) / stride + 1,
  104. (w + 2 * param.pad_w - kernel) / stride + 1,
  105. 4});
  106. }
  107. };
  108. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  109. if (!no_nonlinemode) {
  110. nonlinemode.emplace_back(NLMode::RELU);
  111. nonlinemode.emplace_back(NLMode::H_SWISH);
  112. }
  113. for (size_t n : {1, 2}) {
  114. for (auto nlmode : nonlinemode) {
  115. for (bool pad : {true}) {
  116. for (size_t group : {1, 2, 4, 7, 128}) {
  117. for (size_t size : {4, 6, 7, 9, 15, 40}) {
  118. for (size_t kern : kernel) {
  119. pack(n, group, size, size, kern, stride, nlmode,
  120. pad);
  121. }
  122. }
  123. }
  124. }
  125. for (bool pad : {false}) {
  126. for (size_t group : {1, 2, 7, 128}) {
  127. for (size_t size : {7, 9, 15, 40}) {
  128. for (size_t kern : kernel) {
  129. pack(n, group, size, size, kern, stride, nlmode,
  130. pad);
  131. }
  132. }
  133. }
  134. }
  135. }
  136. }
  137. return args;
  138. }
  139. std::vector<conv_bias::TestArg> get_nchw88_channel_wise_args(
  140. std::vector<size_t> kernel, size_t stride, bool no_bias,
  141. bool no_nonlinemode, bool no_full_bias) {
  142. using namespace conv_bias;
  143. using Param = param::ConvBias;
  144. using NLMode = param::ConvBias::NonlineMode;
  145. std::vector<TestArg> args;
  146. auto pack = [&](size_t n, size_t group, size_t w, size_t h, size_t kernel,
  147. size_t stride, NLMode nlmode, bool pad) {
  148. Param param;
  149. param.stride_h = stride;
  150. param.stride_w = stride;
  151. if (pad) {
  152. param.pad_h = kernel / 2;
  153. param.pad_w = kernel / 2;
  154. } else {
  155. param.pad_h = 0;
  156. param.pad_w = 0;
  157. }
  158. param.nonlineMode = nlmode;
  159. param.format = param::ConvBias::Format::NCHW88;
  160. param.sparse = param::ConvBias::Sparse::GROUP;
  161. args.emplace_back(param, TensorShape{n, group, h, w, 8},
  162. TensorShape{group, 1, 1, kernel, kernel, 8},
  163. TensorShape{});
  164. if (!no_bias) {
  165. args.emplace_back(param, TensorShape{n, group, h, w, 8},
  166. TensorShape{group, 1, 1, kernel, kernel, 8},
  167. TensorShape{1, group, 1, 1, 8});
  168. }
  169. if (!no_full_bias) {
  170. args.emplace_back(
  171. param, TensorShape{n, group, h, w, 8},
  172. TensorShape{group, 1, 1, kernel, kernel, 8},
  173. TensorShape{n, group,
  174. (h + 2 * param.pad_w - kernel) / stride + 1,
  175. (w + 2 * param.pad_w - kernel) / stride + 1,
  176. 8});
  177. }
  178. };
  179. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  180. if (!no_nonlinemode) {
  181. nonlinemode.emplace_back(NLMode::RELU);
  182. nonlinemode.emplace_back(NLMode::H_SWISH);
  183. }
  184. for (size_t n : {1, 2}) {
  185. for (auto nlmode : nonlinemode) {
  186. for (bool pad : {true}) {
  187. for (size_t group : {1, 2, 4, 7, 8, 128}) {
  188. for (size_t size : {4, 6, 7, 9, 15, 40}) {
  189. for (size_t kern : kernel) {
  190. pack(n, group, size, size, kern, stride, nlmode,
  191. pad);
  192. }
  193. }
  194. }
  195. }
  196. for (bool pad : {false}) {
  197. for (size_t group : {1, 2, 7, 128}) {
  198. for (size_t size : {7, 9, 15, 40}) {
  199. for (size_t kern : kernel) {
  200. pack(n, group, size, size, kern, stride, nlmode,
  201. pad);
  202. }
  203. }
  204. }
  205. }
  206. }
  207. }
  208. return args;
  209. }
  210. void checker_conv_bias_qint8x8x8(std::vector<conv_bias::TestArg> args,
  211. Handle* handle, const char* algo_name) {
  212. Checker<ConvBias> checker(handle);
  213. checker.set_before_exec_callback(
  214. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  215. #if MEGDNN_ARMV7
  216. checker.set_epsilon(1);
  217. #endif
  218. UniformIntRNG rng{-50, 50};
  219. checker.set_dtype(0, dtype::QuantizedS8(0.41113496f))
  220. .set_dtype(1, dtype::QuantizedS8(0.01887994f))
  221. .set_dtype(2, dtype::QuantizedS32(0.41113496f * 0.01887994f))
  222. .set_dtype(4, dtype::QuantizedS8(0.49550694f))
  223. .set_rng(0, &rng)
  224. .set_rng(1, &rng)
  225. .set_rng(2, &rng);
  226. for (auto&& arg : args) {
  227. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  228. }
  229. }
  230. void checker_conv_bias_qint8x8x32(std::vector<conv_bias::TestArg> args,
  231. Handle* handle, const char* algo_name) {
  232. Checker<ConvBias> checker(handle);
  233. UniformIntRNG rng{-50, 50};
  234. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  235. .set_dtype(1, dtype::QuantizedS8(2.5f))
  236. .set_dtype(2, dtype::QuantizedS32(6.25f))
  237. .set_dtype(4, {});
  238. checker.set_before_exec_callback(
  239. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  240. for (auto&& arg : args) {
  241. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  242. }
  243. }
  244. void checker_conv_bias_quint8x8x8(std::vector<conv_bias::TestArg> args,
  245. Handle* handle, const char* algo_name) {
  246. Checker<ConvBias> checker(handle);
  247. checker.set_before_exec_callback(
  248. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  249. UniformIntRNG rng(0, 255);
  250. checker.set_dtype(0, dtype::Quantized8Asymm(0.2f, 100))
  251. .set_dtype(1, dtype::Quantized8Asymm(0.2f, 120))
  252. .set_dtype(2, dtype::QuantizedS32(0.04f))
  253. .set_dtype(4, dtype::Quantized8Asymm(1.4f, 110))
  254. .set_rng(0, &rng)
  255. .set_rng(1, &rng)
  256. .set_rng(2, &rng);
  257. for (auto&& arg : args) {
  258. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  259. }
  260. }
  261. void checker_conv_bias_quint8x8x32(std::vector<conv_bias::TestArg> args,
  262. Handle* handle, const char* algo_name) {
  263. Checker<ConvBias> checker(handle);
  264. checker.set_before_exec_callback(
  265. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  266. NormalRNG rng(128.f);
  267. checker.set_rng(0, &rng).set_rng(1, &rng);
  268. checker.set_dtype(0, dtype::Quantized8Asymm(1.2f, (uint8_t)127))
  269. .set_dtype(1, dtype::Quantized8Asymm(1.3f, (uint8_t)129))
  270. .set_dtype(2, dtype::QuantizedS32(1.2 * 1.3))
  271. .set_dtype(4, {});
  272. for (auto&& arg : args) {
  273. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  274. }
  275. }
  276. void checker_conv_bias_int8x8x32_multi(std::vector<conv_bias::TestArg> args,
  277. Handle* handle, const char* algo_name) {
  278. Checker<ConvBias> checker(handle);
  279. checker.set_before_exec_callback(
  280. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  281. checker.set_dtype(0, dtype::Int8());
  282. checker.set_dtype(1, dtype::Int8());
  283. checker.set_dtype(2, dtype::Int32());
  284. checker.set_dtype(4, dtype::Int32());
  285. for (auto&& arg : args) {
  286. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  287. }
  288. }
  289. /**********************************F32 direct************************/
  290. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP32) {
  291. check_conv_bias(
  292. get_conv_bias_args({1, 2, 3, 4, 5, 6, 7}, 1, false, false, false),
  293. handle(), "F32DIRECT");
  294. }
  295. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP32_NCHW44_S1_K7) {
  296. //! k=7 s=1
  297. check_conv_bias(get_nchw44_conv_bias_args({7}, ONLY_IDENTITY_NLMODE,
  298. BR_AND_NO_BIASMODE, 1),
  299. handle(), "F32_CONV_NCHW44_DIRECT");
  300. }
  301. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP32_NCHW44_S1_K2K3) {
  302. check_conv_bias(
  303. get_nchw44_conv_bias_args({2, 3}, FULL_NLMODE, ONLY_BR_BIASMODE, 1),
  304. handle(), "F32_CONV_NCHW44_DIRECT");
  305. }
  306. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP32_NCHW44_S1_K5) {
  307. check_conv_bias(
  308. get_nchw44_conv_bias_args({5}, FULL_NLMODE, ONLY_BR_BIASMODE, 1),
  309. handle(), "F32_CONV_NCHW44_DIRECT");
  310. }
  311. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP32_NCHW44_S2) {
  312. check_conv_bias(get_nchw44_conv_bias_args({2, 3, 5, 7}, FULL_NLMODE,
  313. ONLY_BR_BIASMODE, 2),
  314. handle(), "F32_CONV_NCHW44_DIRECT");
  315. }
  316. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP32_STR1) {
  317. check_conv_bias(get_conv_bias_args({2, 3, 5, 7}, 1, false, false, false),
  318. handle(), "F32STRD1");
  319. }
  320. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP32_STR2) {
  321. check_conv_bias(get_conv_bias_args({2, 3, 5, 7}, 2, false, false, false),
  322. handle(), "F32STRD2");
  323. }
  324. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_NCHW_NCHW44_F32_S2) {
  325. check_conv_bias(
  326. get_nchw44_conv_bias_args({2, 3, 5, 7}, ONLY_IDENTITY_NLMODE,
  327. ONLY_BR_BIASMODE, 2, false, true),
  328. handle(), "F32_CONV_NCHW_NCHW44");
  329. }
  330. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_NCHW_NCHW44_F32_S1) {
  331. check_conv_bias(
  332. get_nchw44_conv_bias_args({2, 3, 5, 7}, ONLY_IDENTITY_NLMODE,
  333. ONLY_BR_BIASMODE, 1, false, true),
  334. handle(), "F32_CONV_NCHW_NCHW44");
  335. }
  336. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_CHANNEL_WISE_STRIDE1_FP32_NCHW44_1) {
  337. check_conv_bias(
  338. get_nchw44_channel_wise_args({2, 3}, 1, false, false, false),
  339. handle(), "F32_CHANNEL_WISE_NCHW44");
  340. }
  341. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_CHANNEL_WISE_STRIDE1_FP32_NCHW44_2) {
  342. check_conv_bias(get_nchw44_channel_wise_args({5}, 1, false, false, false),
  343. handle(), "F32_CHANNEL_WISE_NCHW44");
  344. }
  345. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_CHANNEL_WISE_STRIDE2_FP32_NCHW44) {
  346. check_conv_bias(
  347. get_nchw44_channel_wise_args({2, 3, 5}, 2, false, false, false),
  348. handle(), "F32_CHANNEL_WISE_NCHW44");
  349. }
  350. /**********************************F16 direct************************/
  351. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  352. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP16) {
  353. NormalRNG rng(1);
  354. checker_conv_bias_f16(
  355. get_conv_bias_args({1, 2, 3, 4, 5, 6, 7}, 1, false, false, false),
  356. handle(), rng, "F16DIRECT", 0.03);
  357. }
  358. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP16_STR1) {
  359. NormalRNG rng(1);
  360. checker_conv_bias_f16(get_conv_bias_args({2, 3, 5}, 1, false, false, false),
  361. handle(), rng, "F16STRD1", 0.03);
  362. }
  363. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_CHANNEL_WISE_STRIDE1_FP16_NCHW88_1) {
  364. NormalRNG rng(1);
  365. checker_conv_bias_f16(
  366. get_nchw88_channel_wise_args({2, 3}, 1, false, false, false),
  367. handle(), rng, "F16_CHANNEL_WISE_NCHW88", 0.03);
  368. }
  369. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_CHANNEL_WISE_STRIDE1_FP16_NCHW88_2) {
  370. NormalRNG rng(1);
  371. checker_conv_bias_f16(
  372. get_nchw88_channel_wise_args({5}, 1, false, false, false), handle(),
  373. rng, "F16_CHANNEL_WISE_NCHW88", 0.03);
  374. }
  375. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_CHANNEL_WISE_STRIDE2_FP16_NCHW88) {
  376. NormalRNG rng(1);
  377. checker_conv_bias_f16(
  378. get_nchw88_channel_wise_args({2, 3, 5}, 2, false, false, false),
  379. handle(), rng, "F16_CHANNEL_WISE_NCHW88", 0.03);
  380. }
  381. #endif
  382. /**********************************algo 8816 direct************************/
  383. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_INT8_INT16_DIRECT) {
  384. checker_conv_bias_int8x8x16(
  385. get_conv_bias_args({2, 3, 5}, 1, false, true, true), handle(),
  386. "I8816DIRECT");
  387. }
  388. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_INT8_INT16_STRIDE2) {
  389. checker_conv_bias_int8x8x16(
  390. get_conv_bias_args({2, 3, 5}, 2, false, true, true), handle(),
  391. "I8816STRD2");
  392. }
  393. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_INT8_INT16_NCHW_NCHW44_S2) {
  394. checker_conv_bias_int8x8x16(
  395. get_nchw44_conv_bias_args({2, 3, 5, 7}, ONLY_IDENTITY_NLMODE,
  396. ONLY_NO_BIASMODE, 2, false, true),
  397. handle(), "I8816_CONV_NCHW_NCHW44");
  398. }
  399. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_INT8_INT16_NCHW_NCHW44_S1) {
  400. checker_conv_bias_int8x8x16(
  401. get_nchw44_conv_bias_args({2, 3, 5, 7}, ONLY_IDENTITY_NLMODE,
  402. ONLY_NO_BIASMODE, 1, false, true),
  403. handle(), "I8816_CONV_NCHW_NCHW44");
  404. }
  405. /**********************************algo 8-8-32 direct************************/
  406. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_INT8_INT32_STRIDE1) {
  407. checker_conv_bias_int8x8x32_multi(
  408. get_conv_bias_args({2, 3, 5, 7}, 1, false, true, true), handle(),
  409. "S8STRD1");
  410. }
  411. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_INT8_INT32_STRIDE2) {
  412. checker_conv_bias_int8x8x32_multi(
  413. get_conv_bias_args({2, 3, 5, 7}, 2, false, true, true), handle(),
  414. "S8STRD2");
  415. }
  416. TEST_F(ARM_COMMON_MULTI_THREADS,
  417. CONV_BIAS_INT8_INT8_INT32_CHANNEL_WISE_DIRECT1_NCHW44) {
  418. checker_conv_bias_int8x8x32_multi(
  419. get_nchw44_channel_wise_args({2, 3, 5}, 1, false, true, true),
  420. handle(), "S8_CHAN_WISE_STRD1_NCHW44");
  421. }
  422. TEST_F(ARM_COMMON_MULTI_THREADS,
  423. CONV_BIAS_INT8_INT8_INT32_CHANNEL_WISE_DIRECT2_NCHW44) {
  424. checker_conv_bias_int8x8x32_multi(
  425. get_nchw44_channel_wise_args({2, 3, 5}, 2, false, true, true),
  426. handle(), "S8_CHAN_WISE_STRD2_NCHW44");
  427. }
  428. TEST_F(ARM_COMMON, CONV_BIAS_INT8_INT8_INT16_CHANNEL_WISE_DIRECT1_NCHW44) {
  429. Checker<ConvBias> checker(handle());
  430. checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
  431. "S8x8x16_CHAN_WISE_STRD1_STRD2_NCHW44"));
  432. checker.set_dtype(0, dtype::Int8());
  433. checker.set_dtype(1, dtype::Int8());
  434. checker.set_dtype(2, dtype::Int16());
  435. checker.set_dtype(4, dtype::Int16());
  436. auto args = get_nchw44_channel_wise_args({2, 3, 5}, 1, false, true, true);
  437. for (auto&& arg : args) {
  438. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  439. }
  440. }
  441. TEST_F(ARM_COMMON_MULTI_THREADS,
  442. CONV_BIAS_INT8_INT8_INT16_CHANNEL_WISE_DIRECT2_NCHW44) {
  443. Checker<ConvBias> checker(handle());
  444. checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
  445. "S8x8x16_CHAN_WISE_STRD1_STRD2_NCHW44"));
  446. checker.set_dtype(0, dtype::Int8());
  447. checker.set_dtype(1, dtype::Int8());
  448. checker.set_dtype(2, dtype::Int16());
  449. checker.set_dtype(4, dtype::Int16());
  450. auto args = get_nchw44_channel_wise_args({2, 3, 5}, 2, false, true, true);
  451. for (auto&& arg : args) {
  452. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  453. }
  454. }
  455. /********************************qint8 direct******************************/
  456. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE1) {
  457. checker_conv_bias_qint8x8x8(get_int8_quint8_conv_bias_args(
  458. {2, 3, 5, 7}, 1, false, false, false),
  459. handle(), "S8STRD1");
  460. }
  461. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE2) {
  462. checker_conv_bias_qint8x8x8(get_int8_quint8_conv_bias_args(
  463. {2, 3, 5, 7}, 2, false, false, false),
  464. handle(), "S8STRD2");
  465. }
  466. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE1_NCHW44) {
  467. checker_conv_bias_qint8x8x8(
  468. get_nchw44_conv_bias_args({2, 3, 5, 7}, QUAN_NLMODE,
  469. ONLY_BR_BIASMODE, 1),
  470. handle(), "S8_NCHW44_DIRECT");
  471. }
  472. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE1_NCHW44_8816) {
  473. checker_conv_bias_int8x8x16(
  474. get_nchw44_conv_bias_args({2, 3, 5, 7}, ONLY_IDENTITY_NLMODE,
  475. ONLY_BR_BIASMODE, 1),
  476. handle(), "S8x8x16_NCHW44_DIRECT");
  477. }
  478. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE2_NCHW44_8816) {
  479. checker_conv_bias_int8x8x16(
  480. get_nchw44_conv_bias_args({2, 3, 5, 7}, ONLY_IDENTITY_NLMODE,
  481. ONLY_BR_BIASMODE, 2),
  482. handle(), "S8x8x16_NCHW44_DIRECT");
  483. }
  484. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE1_NCHW44_8832) {
  485. checker_conv_bias_qint8x8x32(
  486. get_nchw44_conv_bias_args({2, 3, 5, 7}, ONLY_IDENTITY_NLMODE,
  487. ONLY_BR_BIASMODE, 1),
  488. handle(), "S8_NCHW44_DIRECT");
  489. }
  490. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE2_NCHW44_8832) {
  491. checker_conv_bias_qint8x8x32(
  492. get_nchw44_conv_bias_args({2, 3, 5, 7}, ONLY_IDENTITY_NLMODE,
  493. ONLY_NO_BIASMODE, 2),
  494. handle(), "S8_NCHW44_DIRECT");
  495. }
  496. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE2_NCHW44) {
  497. checker_conv_bias_qint8x8x8(
  498. get_nchw44_conv_bias_args({2, 3, 5, 7}, QUAN_NLMODE,
  499. BR_AND_NO_BIASMODE, 2),
  500. handle(), "S8_NCHW44_DIRECT");
  501. }
  502. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_QS8_CHANNEL_WISE_DIRECT1_NCHW44) {
  503. checker_conv_bias_qint8x8x8(
  504. get_nchw44_conv_bias_args({2, 3, 5, 7}, ONLY_IDENTITY_NLMODE,
  505. BR_AND_NO_BIASMODE, 1),
  506. handle(), "S8_NCHW44_DIRECT");
  507. checker_conv_bias_qint8x8x8(
  508. get_nchw44_channel_wise_args({2, 3, 5}, 1, false, false, true),
  509. handle(), "S8_CHAN_WISE_STRD1_NCHW44");
  510. }
  511. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_QS8_CHANNEL_WISE_DIRECT2_NCHW44) {
  512. checker_conv_bias_qint8x8x8(
  513. get_nchw44_channel_wise_args({2, 3, 5}, 2, false, false, true),
  514. handle(), "S8_CHAN_WISE_STRD2_NCHW44");
  515. }
  516. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_NCHW_NCHW44_S1) {
  517. checker_conv_bias_qint8x8x8(
  518. get_nchw44_conv_bias_args({2, 3, 5, 7}, QUAN_NLMODE,
  519. BR_AND_NO_BIASMODE, 1, false, true),
  520. handle(), "S8_CONV_NCHW_NCHW44");
  521. }
  522. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_NCHW_NCHW44_S2) {
  523. checker_conv_bias_qint8x8x8(
  524. get_nchw44_conv_bias_args({2, 3, 5, 7}, QUAN_NLMODE,
  525. BR_AND_NO_BIASMODE, 2, false, true),
  526. handle(), "S8_CONV_NCHW_NCHW44");
  527. }
  528. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_NCHW_NCHW44_S1_F1) {
  529. checker_conv_bias_qint8x8x8(
  530. get_nchw44_conv_bias_args({1}, QUAN_NLMODE, BR_AND_NO_BIASMODE, 1,
  531. false, true),
  532. handle(), "S8_CONV_NCHW_NCHW44");
  533. }
  534. /*****************************quint8 direct****************************/
  535. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_QUINT8_STRIDE1) {
  536. checker_conv_bias_quint8x8x8(get_int8_quint8_conv_bias_args(
  537. {2, 3, 5, 7}, 1, false, false, false),
  538. handle(), "QU8STRD1");
  539. }
  540. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_QUINT8_STRIDE2) {
  541. checker_conv_bias_quint8x8x8(get_int8_quint8_conv_bias_args(
  542. {2, 3, 5, 7}, 2, false, false, false),
  543. handle(), "QU8STRD2");
  544. }
  545. /****************************dot qint8 direct*************************/
  546. #if MGB_ENABLE_DOT
  547. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_DOT_NCHW_NCHW44) {
  548. auto args = get_nchw44_conv_bias_args({2, 3, 5, 7}, QUAN_NLMODE,
  549. BR_AND_NO_BIASMODE, 2, false, true);
  550. for (auto&& arg : args) {
  551. arg.param.format = param::ConvBias::Format::NCHW44_DOT;
  552. }
  553. checker_conv_bias_qint8x8x8(args, handle(), "ARMDOTS8_NCHW_NCHW44");
  554. args = get_nchw44_conv_bias_args({2, 3, 5, 7}, QUAN_NLMODE,
  555. BR_AND_NO_BIASMODE, 1, false, true);
  556. for (auto&& arg : args) {
  557. arg.param.format = param::ConvBias::Format::NCHW44_DOT;
  558. }
  559. checker_conv_bias_qint8x8x8(args, handle(), "ARMDOTS8_NCHW_NCHW44");
  560. }
  561. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_DOT_NCHW_NCHW44_S1_F1) {
  562. auto args = get_nchw44_conv_bias_args({1}, QUAN_NLMODE, BR_AND_NO_BIASMODE,
  563. 1, false, true);
  564. for (auto&& arg : args) {
  565. arg.param.format = param::ConvBias::Format::NCHW44_DOT;
  566. }
  567. checker_conv_bias_qint8x8x8(args, handle(), "ARMDOTS8_NCHW_NCHW44");
  568. }
  569. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE1_WITHDOTPROD) {
  570. checker_conv_bias_qint8x8x8(get_int8_quint8_conv_bias_args(
  571. {2, 3, 5, 7}, 1, false, false, false),
  572. handle(), "ARMDOTS8STRD1");
  573. }
  574. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE2_WITHDOTPROD) {
  575. checker_conv_bias_qint8x8x8(get_int8_quint8_conv_bias_args(
  576. {2, 3, 5, 7}, 2, false, false, false),
  577. handle(), "ARMDOTS8STRD2");
  578. }
  579. /****************************dot 8-8-32 direct*************************/
  580. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_I8832STRD1_WITHDOT) {
  581. checker_conv_bias_qint8x8x32(
  582. get_conv_bias_args({2, 3, 5, 7}, 1, false, true, true), handle(),
  583. "ARMDOTS8STRD1");
  584. }
  585. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_I8832STRD2_WITHDOT) {
  586. checker_conv_bias_qint8x8x32(
  587. get_conv_bias_args({2, 3, 5, 7}, 2, false, true, true), handle(),
  588. "ARMDOTS8STRD2");
  589. }
  590. /******************************dot quint8*****************************/
  591. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_QUINT8_STRIDE1_WITHDOTPROD) {
  592. checker_conv_bias_quint8x8x8(get_int8_quint8_conv_bias_args(
  593. {2, 3, 5, 7}, 1, false, false, false),
  594. handle(), "ARMDOTU8STRD1");
  595. }
  596. //! TODO: this test without test kernel size=3, add it will case buss error now
  597. //! in armv7
  598. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_QUINT8_STRIDE2_WITHDOTPROD) {
  599. checker_conv_bias_quint8x8x8(
  600. get_int8_quint8_conv_bias_args({2, 5, 7}, 2, false, false, false),
  601. handle(), "ARMDOTU8STRD2");
  602. }
  603. /******************************dot quint8x8x32***********************/
  604. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_QUINT8_DIRECT_STRIDE1) {
  605. checker_conv_bias_quint8x8x32(
  606. get_conv_bias_args({2, 3, 5, 7}, 1, false, true, true), handle(),
  607. "ARMDOTU8STRD1");
  608. }
  609. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_QUINT8_DIRECT_STRIDE2) {
  610. checker_conv_bias_quint8x8x32(
  611. get_conv_bias_args({2, 3, 5, 7}, 2, false, true, true), handle(),
  612. "ARMDOTU8STRD2");
  613. }
  614. /******************************dot int8x8x8 nchw44 ***********************/
  615. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_DIRECT_DOT_NCHW44_S1_Q8x8x8) {
  616. using namespace conv_bias;
  617. std::vector<TestArg> args = get_nchw44_conv_bias_args(
  618. {2, 3, 5, 7}, QUAN_NLMODE, ONLY_BR_BIASMODE, 1);
  619. for (auto&& arg : args)
  620. arg.param.format = param::ConvBias::Format::NCHW44_DOT;
  621. checker_conv_bias_qint8x8x8(args, handle(), "ARMDOTS8DIRECT_NCHW44");
  622. }
  623. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_DIRECT_DOT_NCHW44_S1_Q8x8x32) {
  624. using namespace conv_bias;
  625. std::vector<TestArg> args = get_nchw44_conv_bias_args(
  626. {2, 3, 5, 7}, ONLY_IDENTITY_NLMODE, BR_AND_NO_BIASMODE, 1);
  627. for (auto&& arg : args)
  628. arg.param.format = param::ConvBias::Format::NCHW44_DOT;
  629. checker_conv_bias_qint8x8x32(args, handle(), "ARMDOTS8DIRECT_NCHW44");
  630. }
  631. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_DIRECT_DOT_NCHW44_S1_8x8x32) {
  632. using namespace conv_bias;
  633. std::vector<TestArg> args = get_nchw44_conv_bias_args(
  634. {2, 3, 5, 7}, ONLY_IDENTITY_NLMODE, BR_AND_NO_BIASMODE, 1);
  635. for (auto&& arg : args)
  636. arg.param.format = param::ConvBias::Format::NCHW44_DOT;
  637. checker_conv_bias_int8x8x32_multi(args, handle(), "ARMDOTS8DIRECT_NCHW44");
  638. }
  639. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_DIRECT_DOT_NCHW44_S2_Q8x8x8) {
  640. using namespace conv_bias;
  641. //! test qint8x8x8
  642. std::vector<TestArg> args = get_nchw44_conv_bias_args(
  643. {2, 3, 5, 7}, QUAN_NLMODE, BR_AND_NO_BIASMODE, 2);
  644. for (auto&& arg : args)
  645. arg.param.format = param::ConvBias::Format::NCHW44_DOT;
  646. checker_conv_bias_qint8x8x8(args, handle(), "ARMDOTS8DIRECT_NCHW44");
  647. }
  648. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_DIRECT_DOT_NCHW44_S2_Q8x8x32) {
  649. using namespace conv_bias;
  650. //! test qint8x8x8
  651. std::vector<TestArg> args = get_nchw44_conv_bias_args(
  652. {2, 3, 5, 7}, ONLY_IDENTITY_NLMODE, BR_AND_NO_BIASMODE, 2);
  653. for (auto&& arg : args)
  654. arg.param.format = param::ConvBias::Format::NCHW44_DOT;
  655. checker_conv_bias_qint8x8x32(args, handle(), "ARMDOTS8DIRECT_NCHW44");
  656. }
  657. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_DIRECT_DOT_NCHW44_S2_8x8x32) {
  658. using namespace conv_bias;
  659. //! test qint8x8x8
  660. std::vector<TestArg> args = get_nchw44_conv_bias_args(
  661. {2, 3, 5, 7}, ONLY_IDENTITY_NLMODE, BR_AND_NO_BIASMODE, 2);
  662. for (auto&& arg : args)
  663. arg.param.format = param::ConvBias::Format::NCHW44_DOT;
  664. checker_conv_bias_int8x8x32_multi(args, handle(), "ARMDOTS8DIRECT_NCHW44");
  665. }
  666. #endif
  667. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD) {
  668. using namespace conv_bias;
  669. std::vector<TestArg> args = get_winograd_args(3);
  670. Checker<ConvBiasForward> checker(handle());
  671. auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
  672. DType B_dtype, DType C_dtype, DType D_dtype,
  673. const float eps) {
  674. for (auto&& arg : args) {
  675. checker.set_dtype(0, A_dtype)
  676. .set_dtype(1, B_dtype)
  677. .set_dtype(2, C_dtype)
  678. .set_dtype(4, D_dtype)
  679. .set_epsilon(eps)
  680. .set_param(arg.param)
  681. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  682. }
  683. };
  684. run(args, dtype::Float32(), dtype::Float32(), dtype::Float32(),
  685. dtype::Float32(), 1e-3f);
  686. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  687. Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
  688. checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng);
  689. run(args, dtype::Float16(), dtype::Float16(), dtype::Float16(),
  690. dtype::Float16(), 0.35f);
  691. #endif
  692. }
  693. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4) {
  694. using namespace conv_bias;
  695. std::vector<TestArg> args = get_winograd_mk_packed_args();
  696. Checker<ConvBiasForward> checker(handle());
  697. check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4);
  698. }
  699. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4_NCHW44) {
  700. using namespace conv_bias;
  701. std::vector<TestArg> args =
  702. get_nchw44_conv_bias_args({3}, QUAN_NLMODE, BR_AND_NO_BIASMODE, 1);
  703. Checker<ConvBiasForward> checker(handle());
  704. check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4,
  705. param::ConvBias::Format::NCHW44);
  706. }
  707. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63) {
  708. using namespace conv_bias;
  709. std::vector<TestArg> args = get_winograd_args(3);
  710. Checker<ConvBiasForward> checker(handle());
  711. check_winograd("1:6:32", checker, args);
  712. }
  713. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4) {
  714. using namespace conv_bias;
  715. std::vector<TestArg> args = get_winograd_mk_packed_args();
  716. Checker<ConvBiasForward> checker(handle());
  717. check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4);
  718. }
  719. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_NCHW44) {
  720. using namespace conv_bias;
  721. std::vector<TestArg> args =
  722. get_nchw44_conv_bias_args({3},QUAN_NLMODE,BR_AND_NO_BIASMODE,1);
  723. Checker<ConvBiasForward> checker(handle());
  724. check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4,
  725. param::ConvBias::Format::NCHW44);
  726. }
  727. //! uncomment it when low precision mode is ok
  728. #if 0
  729. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F73_4_NCHW44) {
  730. using namespace conv_bias;
  731. std::vector<TestArg> args =
  732. get_nchw44_conv_bias_args({3},QUAN_NLMODE,BR_AND_NO_BIASMODE,1);
  733. Checker<ConvBiasForward> checker(handle());
  734. check_winograd("4:7:16", checker, args, param::MatrixMul::Format::MK4,
  735. param::ConvBias::Format::NCHW44);
  736. }
  737. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F73_4_NCHW44_WEIGHT_PREPROCESS) {
  738. using namespace conv_bias;
  739. std::vector<TestArg> args =
  740. get_nchw44_conv_bias_args({3},QUAN_NLMODE,BR_AND_NO_BIASMODE,1);
  741. Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
  742. handle());
  743. check_winograd("4:7:16", checker, args, param::MatrixMul::Format::MK4,
  744. param::ConvBias::Format::NCHW44);
  745. }
  746. #endif
  747. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F54) {
  748. using namespace conv_bias;
  749. std::vector<TestArg> args = get_winograd_args(4);
  750. Checker<ConvBiasForward> checker(handle());
  751. check_winograd("1:5:32", checker, args);
  752. }
  753. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45) {
  754. using namespace conv_bias;
  755. std::vector<TestArg> args = get_winograd_args(5);
  756. Checker<ConvBiasForward> checker(handle());
  757. check_winograd("1:4:32", checker, args);
  758. }
  759. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_1) {
  760. using namespace conv_bias;
  761. Checker<ConvBiasForward> checker(handle());
  762. auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
  763. DType B_dtype, DType C_dtype, DType D_dtype,
  764. float eps) {
  765. for (auto&& arg : args) {
  766. checker.set_dtype(0, A_dtype)
  767. .set_dtype(1, B_dtype)
  768. .set_dtype(2, C_dtype)
  769. .set_dtype(4, D_dtype)
  770. .set_epsilon(eps)
  771. .set_param(arg.param)
  772. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  773. }
  774. };
  775. std::vector<TestArg> args = get_winograd_mk_packed_args(8);
  776. std::vector<TestArg> args_first_half(args.begin(),
  777. args.begin() + args.size() / 2);
  778. run(args_first_half, dtype::Float32{}, dtype::Float32{}, dtype::Float32{},
  779. dtype::Float32{}, 1e-3f);
  780. }
  781. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2) {
  782. using namespace conv_bias;
  783. Checker<ConvBiasForward> checker(handle());
  784. auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
  785. DType B_dtype, DType C_dtype, DType D_dtype,
  786. float eps) {
  787. for (auto&& arg : args) {
  788. checker.set_dtype(0, A_dtype)
  789. .set_dtype(1, B_dtype)
  790. .set_dtype(2, C_dtype)
  791. .set_dtype(4, D_dtype)
  792. .set_epsilon(eps)
  793. .set_param(arg.param)
  794. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  795. }
  796. };
  797. std::vector<TestArg> args = get_winograd_mk_packed_args(8);
  798. std::vector<TestArg> args_second_half(args.begin() + args.size() / 2,
  799. args.end());
  800. run(args_second_half, dtype::Float32{}, dtype::Float32{}, dtype::Float32{},
  801. dtype::Float32{}, 1e-3f);
  802. }
  803. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  804. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F16) {
  805. using namespace conv_bias;
  806. Checker<ConvBiasForward> checker(handle());
  807. auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
  808. DType B_dtype, DType C_dtype, DType D_dtype,
  809. float eps) {
  810. for (auto&& arg : args) {
  811. checker.set_dtype(0, A_dtype)
  812. .set_dtype(1, B_dtype)
  813. .set_dtype(2, C_dtype)
  814. .set_dtype(4, D_dtype)
  815. .set_epsilon(eps)
  816. .set_param(arg.param)
  817. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  818. }
  819. };
  820. std::vector<TestArg> args = get_winograd_mk_packed_args(8);
  821. Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
  822. checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng);
  823. run(args, dtype::Float16{}, dtype::Float16{}, dtype::Float16{},
  824. dtype::Float16{}, 0.25);
  825. }
  826. #endif
  827. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_INT8) {
  828. using namespace conv_bias;
  829. Checker<ConvBiasForward> checker(handle());
  830. auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
  831. DType B_dtype, DType C_dtype, DType D_dtype,
  832. float eps) {
  833. for (auto&& arg : args) {
  834. checker.set_dtype(0, A_dtype)
  835. .set_dtype(1, B_dtype)
  836. .set_dtype(2, C_dtype)
  837. .set_dtype(4, D_dtype)
  838. .set_epsilon(eps)
  839. .set_param(arg.param)
  840. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  841. }
  842. };
  843. #if MEGDNN_AARCH64
  844. const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8";
  845. #else
  846. const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8";
  847. #endif
  848. checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
  849. ssprintf("WINOGRAD:%s:8:2:32", matmul_name).c_str()));
  850. std::vector<TestArg> quantized_args =
  851. get_quantized_winograd_mk_packed_args(8);
  852. UniformIntRNG int_rng{-50, 50};
  853. checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
  854. run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f),
  855. dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3);
  856. }
  857. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8) {
  858. using namespace conv_bias;
  859. Checker<ConvBiasForward> checker(handle());
  860. auto run = [&checker](const std::vector<TestArg>& args,
  861. DType A_dtype,
  862. DType B_dtype, DType C_dtype, DType D_dtype,
  863. float eps) {
  864. for (auto&& arg : args) {
  865. checker.set_dtype(0, A_dtype)
  866. .set_dtype(1, B_dtype)
  867. .set_dtype(2, C_dtype)
  868. .set_dtype(4, D_dtype)
  869. .set_epsilon(eps)
  870. .set_param(arg.param)
  871. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  872. }
  873. };
  874. #if MEGDNN_AARCH64
  875. const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8";
  876. #else
  877. const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8";
  878. #endif
  879. checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
  880. ssprintf("WINOGRAD_NCHW44:%s:8:2:32", matmul_name).c_str()));
  881. std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4);
  882. UniformIntRNG int_rng{-50, 50};
  883. checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
  884. run(quantized_args, dtype::QuantizedS8(2.5f),
  885. dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f),
  886. dtype::QuantizedS8(60.25f),1e-3);
  887. }
  888. TEST_F(ARM_COMMON_MULTI_THREADS,
  889. CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32_GROUPMODE) {
  890. using namespace conv_bias;
  891. Checker<ConvBiasForward> checker(handle());
  892. auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
  893. DType B_dtype, DType C_dtype, DType D_dtype,
  894. float eps) {
  895. for (auto&& arg : args) {
  896. checker.set_dtype(0, A_dtype)
  897. .set_dtype(1, B_dtype)
  898. .set_dtype(2, C_dtype)
  899. .set_dtype(4, D_dtype)
  900. .set_epsilon(eps)
  901. .set_param(arg.param)
  902. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  903. }
  904. };
  905. float epsilon = 0.001;
  906. #if MEGDNN_AARCH64
  907. const char* matmul_name = "AARCH64_F32_MK4_4x16";
  908. #else
  909. const char* matmul_name = "ARMV7_F32_MK4_4x8";
  910. #endif
  911. checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
  912. ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str()));
  913. std::vector<TestArg> quantized_args =
  914. get_int8_nchw44_args(3, 4, true, true);
  915. UniformIntRNG int_rng{-50, 50};
  916. checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
  917. run(quantized_args, dtype::QuantizedS8(0.41113496f),
  918. dtype::QuantizedS8(0.01887994f),
  919. dtype::QuantizedS32(0.41113496f * 0.01887994f),
  920. dtype::QuantizedS8(0.49550694f), epsilon);
  921. }
  922. TEST_F(ARM_COMMON_MULTI_THREADS,
  923. CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_GROUPMODE) {
  924. using namespace conv_bias;
  925. Checker<ConvBiasForward> checker(handle());
  926. auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
  927. DType B_dtype, DType C_dtype, DType D_dtype,
  928. float eps) {
  929. for (auto&& arg : args) {
  930. checker.set_dtype(0, A_dtype)
  931. .set_dtype(1, B_dtype)
  932. .set_dtype(2, C_dtype)
  933. .set_dtype(4, D_dtype)
  934. .set_epsilon(eps)
  935. .set_param(arg.param)
  936. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  937. }
  938. };
  939. #if MEGDNN_AARCH64
  940. const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8";
  941. #else
  942. const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8";
  943. #endif
  944. checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
  945. ssprintf("WINOGRAD_NCHW44:%s:8:2:32", matmul_name).c_str()));
  946. std::vector<TestArg> quantized_args =
  947. get_int8_nchw44_args(3, 4, false, true);
  948. UniformIntRNG int_rng{-50, 50};
  949. checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
  950. run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f),
  951. dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3);
  952. }
  953. TEST_F(ARM_COMMON_MULTI_THREADS,
  954. CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32) {
  955. using namespace conv_bias;
  956. Checker<ConvBiasForward> checker(handle());
  957. auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
  958. DType B_dtype, DType C_dtype, DType D_dtype,
  959. float eps) {
  960. for (auto&& arg : args) {
  961. checker.set_dtype(0, A_dtype)
  962. .set_dtype(1, B_dtype)
  963. .set_dtype(2, C_dtype)
  964. .set_dtype(4, D_dtype)
  965. .set_epsilon(eps)
  966. .set_param(arg.param)
  967. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  968. }
  969. };
  970. float epsilon = 0.001;
  971. #if MEGDNN_AARCH64
  972. const char* matmul_name = "AARCH64_F32_MK4_4x16";
  973. #else
  974. const char* matmul_name = "ARMV7_F32_MK4_4x8";
  975. #endif
  976. checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
  977. ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str()));
  978. std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4, true);
  979. UniformIntRNG int_rng{-50, 50};
  980. checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
  981. run(quantized_args, dtype::QuantizedS8(0.41113496f),
  982. dtype::QuantizedS8(0.01887994f),
  983. dtype::QuantizedS32(0.41113496f * 0.01887994f),
  984. dtype::QuantizedS8(0.49550694f), epsilon);
  985. }
  986. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  987. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23) {
  988. using namespace conv_bias;
  989. std::vector<TestArg> args = get_winograd_mk_packed_args();
  990. Checker<ConvBiasForward> checker(handle());
  991. check_winograd_fp16("1:2:32", checker, args, NULL, 0.08);
  992. }
  993. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F45_1) {
  994. using namespace conv_bias;
  995. std::vector<TestArg> args = get_winograd_args(5);
  996. std::vector<TestArg> args_head_half(args.begin(),
  997. args.begin() + args.size() / 2);
  998. Checker<ConvBiasForward> checker(handle());
  999. //! fp16 range -1.0 ~ 1.0
  1000. Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
  1001. check_winograd_fp16("1:4:32", checker, args_head_half, rng, 0.25);
  1002. }
  1003. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F45_2) {
  1004. using namespace conv_bias;
  1005. std::vector<TestArg> args = get_winograd_args(5);
  1006. std::vector<TestArg> args_back_half(args.begin() + args.size() / 2,
  1007. args.end());
  1008. Checker<ConvBiasForward> checker(handle());
  1009. //! fp16 range -1.0 ~ 1.0
  1010. Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
  1011. check_winograd_fp16("1:4:32", checker, args_back_half, rng, 0.25);
  1012. }
  1013. //! FIXME: This test may be failed if run `ARM_COMMON.CONV_BIAS_WINOGRAD*`, but
  1014. //! it will pass when run single testcase
  1015. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F63) {
  1016. using namespace conv_bias;
  1017. std::vector<TestArg> args = get_winograd_args(3);
  1018. Checker<ConvBiasForward> checker(handle());
  1019. //! fp16 range -1.0 ~ 1.0
  1020. Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
  1021. check_winograd_fp16("1:6:32", checker, args, rng, 0.3);
  1022. }
  1023. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_8x8_1) {
  1024. using namespace conv_bias;
  1025. std::vector<TestArg> args = get_winograd_mk_packed_args(8);
  1026. std::vector<TestArg> args_head_half(args.begin(),
  1027. args.begin() + args.size() / 2);
  1028. Checker<ConvBiasForward> checker(handle());
  1029. Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
  1030. check_winograd_fp16("8:2:32", checker, args_head_half, rng, 0.25,
  1031. param::MatrixMul::Format::MK8);
  1032. }
  1033. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_8x8_2) {
  1034. using namespace conv_bias;
  1035. std::vector<TestArg> args = get_winograd_mk_packed_args(8);
  1036. std::vector<TestArg> args_back_half(args.begin() + args.size() / 2,
  1037. args.end());
  1038. Checker<ConvBiasForward> checker(handle());
  1039. Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
  1040. check_winograd_fp16("8:2:32", checker, args_back_half, rng, 0.25,
  1041. param::MatrixMul::Format::MK8);
  1042. }
  1043. #endif
  1044. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) {
  1045. using namespace conv_bias;
  1046. std::vector<TestArg> args = get_quantized_winograd_mk_packed_args(8);
  1047. Checker<ConvBiasForward> checker(handle());
  1048. UniformIntRNG rng{-50, 50};
  1049. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  1050. .set_dtype(1, dtype::QuantizedS8(2.5f))
  1051. .set_dtype(2, dtype::QuantizedS32(6.25f))
  1052. .set_dtype(4, dtype::QuantizedS8(60.25f))
  1053. .set_rng(0, &rng)
  1054. .set_rng(1, &rng)
  1055. .set_rng(2, &rng);
  1056. check_winograd("8:2:32", checker, args, param::MatrixMul::Format::MK8);
  1057. }
  1058. TEST_F(ARM_COMMON_MULTI_THREADS,
  1059. CONV_BIAS_WINOGRAD_INT8_8X8_WEIGHT_PREPROCESS) {
  1060. using namespace conv_bias;
  1061. std::vector<TestArg> args = get_quantized_winograd_mk_packed_args(8);
  1062. Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
  1063. handle());
  1064. UniformIntRNG rng{-50, 50};
  1065. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  1066. .set_dtype(1, dtype::QuantizedS8(2.5f))
  1067. .set_dtype(2, dtype::QuantizedS32(6.25f))
  1068. .set_dtype(4, dtype::QuantizedS8(60.25f))
  1069. .set_rng(0, &rng)
  1070. .set_rng(1, &rng)
  1071. .set_rng(2, &rng);
  1072. check_winograd("8:2:32", checker, args, param::MatrixMul::Format::MK8);
  1073. }
  1074. // clang-format on
  1075. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台