You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

conv_bias_multi_thread.cpp 49 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213
  1. /**
  2. * \file dnn/test/arm_common/conv_bias_multi_thread.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "megdnn/dtype.h"
  13. #include "test/arm_common/fixture.h"
  14. #include "test/common/benchmarker.h"
  15. #include "test/common/conv_bias.h"
  16. #include "test/arm_common/cpuinfo_help.h"
  17. using namespace megdnn;
  18. using namespace test;
  19. using namespace conv_bias;
  20. std::vector<conv_bias::TestArg> get_int8_quint8_conv_bias_args(
  21. std::vector<size_t> kernel, size_t stride, bool no_pad, bool no_bias,
  22. bool no_nonlinemode) {
  23. using namespace conv_bias;
  24. using Param = param::ConvBias;
  25. using NLMode = param::ConvBias::NonlineMode;
  26. std::vector<TestArg> args;
  27. auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h,
  28. size_t kernel, size_t stride, NLMode nlmode) {
  29. Param param;
  30. param.stride_h = stride;
  31. param.stride_w = stride;
  32. if (!no_pad) {
  33. param.pad_h = kernel / 2;
  34. param.pad_w = kernel / 2;
  35. } else {
  36. param.pad_h = 0;
  37. param.pad_w = 0;
  38. }
  39. param.nonlineMode = nlmode;
  40. args.emplace_back(param, TensorShape{n, ic, h, w},
  41. TensorShape{oc, ic, kernel, kernel}, TensorShape{});
  42. if (!no_bias) {
  43. args.emplace_back(param, TensorShape{n, ic, h, w},
  44. TensorShape{oc, ic, kernel, kernel},
  45. TensorShape{1, oc, 1, 1});
  46. }
  47. };
  48. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  49. if (!no_nonlinemode) {
  50. nonlinemode.emplace_back(NLMode::RELU);
  51. nonlinemode.emplace_back(NLMode::H_SWISH);
  52. }
  53. for (size_t n : {1, 2}) {
  54. for (auto nlmode : nonlinemode) {
  55. for (size_t ic : {1, 3, 7}) {
  56. for (size_t oc : {1, 3, 7}) {
  57. for (size_t size : {4, 6, 8, 14, 16, 18}) {
  58. for (size_t kern : kernel) {
  59. pack(n, oc, ic, size, size, kern, stride, nlmode);
  60. }
  61. }
  62. }
  63. }
  64. }
  65. }
  66. return args;
  67. }
  68. std::vector<conv_bias::TestArg> get_nchw44_channel_wise_args(
  69. std::vector<size_t> kernel, size_t stride, bool no_bias,
  70. bool no_nonlinemode, bool no_full_bias) {
  71. using namespace conv_bias;
  72. using Param = param::ConvBias;
  73. using NLMode = param::ConvBias::NonlineMode;
  74. std::vector<TestArg> args;
  75. auto pack = [&](size_t n, size_t group, size_t w, size_t h, size_t kernel,
  76. size_t stride, NLMode nlmode, bool pad) {
  77. Param param;
  78. param.stride_h = stride;
  79. param.stride_w = stride;
  80. if (pad) {
  81. param.pad_h = kernel / 2;
  82. param.pad_w = kernel / 2;
  83. } else {
  84. param.pad_h = 0;
  85. param.pad_w = 0;
  86. }
  87. param.nonlineMode = nlmode;
  88. param.format = param::ConvBias::Format::NCHW44;
  89. param.sparse = param::ConvBias::Sparse::GROUP;
  90. args.emplace_back(param, TensorShape{n, group, h, w, 4},
  91. TensorShape{group, 1, 1, kernel, kernel, 4},
  92. TensorShape{});
  93. if (!no_bias) {
  94. args.emplace_back(param, TensorShape{n, group, h, w, 4},
  95. TensorShape{group, 1, 1, kernel, kernel, 4},
  96. TensorShape{1, group, 1, 1, 4});
  97. }
  98. if (!no_full_bias) {
  99. args.emplace_back(
  100. param, TensorShape{n, group, h, w, 4},
  101. TensorShape{group, 1, 1, kernel, kernel, 4},
  102. TensorShape{n, group,
  103. (h + 2 * param.pad_w - kernel) / stride + 1,
  104. (w + 2 * param.pad_w - kernel) / stride + 1,
  105. 4});
  106. }
  107. };
  108. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  109. if (!no_nonlinemode) {
  110. nonlinemode.emplace_back(NLMode::RELU);
  111. nonlinemode.emplace_back(NLMode::H_SWISH);
  112. }
  113. for (size_t n : {1, 2}) {
  114. for (auto nlmode : nonlinemode) {
  115. for (bool pad : {true}) {
  116. for (size_t group : {1, 2, 4, 7, 16}) {
  117. for (size_t size : {4, 6, 7, 9, 20}) {
  118. for (size_t kern : kernel) {
  119. pack(n, group, size, size, kern, stride, nlmode,
  120. pad);
  121. }
  122. }
  123. }
  124. }
  125. for (bool pad : {false}) {
  126. for (size_t group : {1, 2, 7, 16}) {
  127. for (size_t size : {7, 9, 20}) {
  128. for (size_t kern : kernel) {
  129. pack(n, group, size, size, kern, stride, nlmode,
  130. pad);
  131. }
  132. }
  133. }
  134. }
  135. }
  136. }
  137. return args;
  138. }
  139. std::vector<conv_bias::TestArg> get_nchw88_channel_wise_args(
  140. std::vector<size_t> kernel, size_t stride, bool no_bias,
  141. bool no_nonlinemode, bool no_full_bias) {
  142. using namespace conv_bias;
  143. using Param = param::ConvBias;
  144. using NLMode = param::ConvBias::NonlineMode;
  145. std::vector<TestArg> args;
  146. auto pack = [&](size_t n, size_t group, size_t w, size_t h, size_t kernel,
  147. size_t stride, NLMode nlmode, bool pad) {
  148. Param param;
  149. param.stride_h = stride;
  150. param.stride_w = stride;
  151. if (pad) {
  152. param.pad_h = kernel / 2;
  153. param.pad_w = kernel / 2;
  154. } else {
  155. param.pad_h = 0;
  156. param.pad_w = 0;
  157. }
  158. param.nonlineMode = nlmode;
  159. param.format = param::ConvBias::Format::NCHW88;
  160. param.sparse = param::ConvBias::Sparse::GROUP;
  161. args.emplace_back(param, TensorShape{n, group, h, w, 8},
  162. TensorShape{group, 1, 1, kernel, kernel, 8},
  163. TensorShape{});
  164. if (!no_bias) {
  165. args.emplace_back(param, TensorShape{n, group, h, w, 8},
  166. TensorShape{group, 1, 1, kernel, kernel, 8},
  167. TensorShape{1, group, 1, 1, 8});
  168. }
  169. if (!no_full_bias) {
  170. args.emplace_back(
  171. param, TensorShape{n, group, h, w, 8},
  172. TensorShape{group, 1, 1, kernel, kernel, 8},
  173. TensorShape{n, group,
  174. (h + 2 * param.pad_w - kernel) / stride + 1,
  175. (w + 2 * param.pad_w - kernel) / stride + 1,
  176. 8});
  177. }
  178. };
  179. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  180. if (!no_nonlinemode) {
  181. nonlinemode.emplace_back(NLMode::RELU);
  182. nonlinemode.emplace_back(NLMode::H_SWISH);
  183. }
  184. for (size_t n : {1, 2}) {
  185. for (auto nlmode : nonlinemode) {
  186. for (bool pad : {true}) {
  187. for (size_t group : {1, 2, 4, 7, 8, 16}) {
  188. for (size_t size : {4, 6, 7, 9, 20}) {
  189. for (size_t kern : kernel) {
  190. pack(n, group, size, size, kern, stride, nlmode,
  191. pad);
  192. }
  193. }
  194. }
  195. }
  196. for (bool pad : {false}) {
  197. for (size_t group : {1, 2, 7, 16}) {
  198. for (size_t size : {7, 9, 20}) {
  199. for (size_t kern : kernel) {
  200. pack(n, group, size, size, kern, stride, nlmode,
  201. pad);
  202. }
  203. }
  204. }
  205. }
  206. }
  207. }
  208. return args;
  209. }
  210. void checker_conv_bias_qint8x8x8(std::vector<conv_bias::TestArg> args,
  211. Handle* handle, const char* algo_name) {
  212. Checker<ConvBias> checker(handle);
  213. checker.set_before_exec_callback(
  214. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  215. #if MEGDNN_ARMV7
  216. checker.set_epsilon(1);
  217. #endif
  218. UniformIntRNG rng{-50, 50};
  219. checker.set_dtype(0, dtype::QuantizedS8(0.41113496f))
  220. .set_dtype(1, dtype::QuantizedS8(0.01887994f))
  221. .set_dtype(2, dtype::QuantizedS32(0.41113496f * 0.01887994f))
  222. .set_dtype(4, dtype::QuantizedS8(0.49550694f))
  223. .set_rng(0, &rng)
  224. .set_rng(1, &rng)
  225. .set_rng(2, &rng);
  226. for (auto&& arg : args) {
  227. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  228. }
  229. }
  230. void checker_conv_bias_qint8x8x32(std::vector<conv_bias::TestArg> args,
  231. Handle* handle, const char* algo_name) {
  232. Checker<ConvBias> checker(handle);
  233. UniformIntRNG rng{-50, 50};
  234. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  235. .set_dtype(1, dtype::QuantizedS8(2.5f))
  236. .set_dtype(2, dtype::QuantizedS32(6.25f))
  237. .set_dtype(4, {});
  238. checker.set_before_exec_callback(
  239. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  240. for (auto&& arg : args) {
  241. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  242. }
  243. }
  244. void checker_conv_bias_quint8x8x8(std::vector<conv_bias::TestArg> args,
  245. Handle* handle, const char* algo_name) {
  246. Checker<ConvBias> checker(handle);
  247. checker.set_before_exec_callback(
  248. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  249. UniformIntRNG rng(0, 255);
  250. checker.set_dtype(0, dtype::Quantized8Asymm(0.2f, 100))
  251. .set_dtype(1, dtype::Quantized8Asymm(0.2f, 120))
  252. .set_dtype(2, dtype::QuantizedS32(0.04f))
  253. .set_dtype(4, dtype::Quantized8Asymm(1.4f, 110))
  254. .set_rng(0, &rng)
  255. .set_rng(1, &rng)
  256. .set_rng(2, &rng);
  257. for (auto&& arg : args) {
  258. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  259. }
  260. }
  261. void checker_conv_bias_quint8x8x32(std::vector<conv_bias::TestArg> args,
  262. Handle* handle, const char* algo_name) {
  263. Checker<ConvBias> checker(handle);
  264. checker.set_before_exec_callback(
  265. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  266. NormalRNG rng(128.f);
  267. checker.set_rng(0, &rng).set_rng(1, &rng);
  268. checker.set_dtype(0, dtype::Quantized8Asymm(1.2f, (uint8_t)127))
  269. .set_dtype(1, dtype::Quantized8Asymm(1.3f, (uint8_t)129))
  270. .set_dtype(2, dtype::QuantizedS32(1.2 * 1.3))
  271. .set_dtype(4, {});
  272. for (auto&& arg : args) {
  273. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  274. }
  275. }
  276. void checker_conv_bias_int8x8x32_multi(std::vector<conv_bias::TestArg> args,
  277. Handle* handle, const char* algo_name) {
  278. Checker<ConvBias> checker(handle);
  279. checker.set_before_exec_callback(
  280. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  281. checker.set_dtype(0, dtype::Int8());
  282. checker.set_dtype(1, dtype::Int8());
  283. checker.set_dtype(2, dtype::Int32());
  284. checker.set_dtype(4, dtype::Int32());
  285. for (auto&& arg : args) {
  286. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  287. }
  288. }
  289. /**********************************F32 direct************************/
  290. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP32) {
  291. check_conv_bias(
  292. get_conv_bias_args({1, 2, 3, 4, 5, 6, 7}, 1, false, false, false),
  293. handle(), "F32DIRECT");
  294. }
  295. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP32_NCHW44_S1_K7) {
  296. //! k=7 s=1
  297. check_conv_bias(get_nchw44_conv_bias_args({7}, ONLY_IDENTITY_NLMODE,
  298. BR_AND_NO_BIASMODE, 1),
  299. handle(), "F32_CONV_NCHW44_DIRECT");
  300. }
  301. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP32_NCHW44_S1_K2K3) {
  302. check_conv_bias(
  303. get_nchw44_conv_bias_args({2, 3}, FULL_NLMODE, ONLY_BR_BIASMODE, 1),
  304. handle(), "F32_CONV_NCHW44_DIRECT");
  305. }
  306. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP32_NCHW44_S1_K5) {
  307. check_conv_bias(
  308. get_nchw44_conv_bias_args({5}, FULL_NLMODE, ONLY_BR_BIASMODE, 1),
  309. handle(), "F32_CONV_NCHW44_DIRECT");
  310. }
  311. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP32_NCHW44_S2) {
  312. check_conv_bias(get_nchw44_conv_bias_args({2, 3, 5, 7}, FULL_NLMODE,
  313. ONLY_BR_BIASMODE, 2),
  314. handle(), "F32_CONV_NCHW44_DIRECT");
  315. }
  316. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP32_STR1) {
  317. check_conv_bias(get_conv_bias_args({2, 3, 5, 7}, 1, false, false, false),
  318. handle(), "F32STRD1");
  319. }
  320. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP32_STR2) {
  321. check_conv_bias(get_conv_bias_args({2, 3, 5, 7}, 2, false, false, false),
  322. handle(), "F32STRD2");
  323. }
  324. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_NCHW_NCHW44_F32_S2) {
  325. check_conv_bias(
  326. get_nchw44_conv_bias_args({2, 3, 5, 7}, ONLY_IDENTITY_NLMODE,
  327. ONLY_BR_BIASMODE, 2, false, true),
  328. handle(), "F32_CONV_NCHW_NCHW44");
  329. }
  330. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_NCHW_NCHW44_F32_S1) {
  331. check_conv_bias(
  332. get_nchw44_conv_bias_args({2, 3, 5, 7}, ONLY_IDENTITY_NLMODE,
  333. ONLY_BR_BIASMODE, 1, false, true),
  334. handle(), "F32_CONV_NCHW_NCHW44");
  335. }
  336. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_CHANNEL_WISE_STRIDE1_FP32_NCHW44_1) {
  337. check_conv_bias(
  338. get_nchw44_channel_wise_args({2, 3}, 1, false, false, false),
  339. handle(), "F32_CHANNEL_WISE_NCHW44");
  340. }
  341. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_CHANNEL_WISE_STRIDE1_FP32_NCHW44_2) {
  342. check_conv_bias(get_nchw44_channel_wise_args({5}, 1, false, false, false),
  343. handle(), "F32_CHANNEL_WISE_NCHW44");
  344. }
  345. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_CHANNEL_WISE_STRIDE2_FP32_NCHW44) {
  346. check_conv_bias(
  347. get_nchw44_channel_wise_args({2, 3, 5}, 2, false, false, false),
  348. handle(), "F32_CHANNEL_WISE_NCHW44");
  349. }
  350. /**********************************F16 direct************************/
  351. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  352. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP16) {
  353. NormalRNG rng(1);
  354. checker_conv_bias_f16(
  355. get_conv_bias_args({1, 2, 3, 4, 5, 6, 7}, 1, false, false, false),
  356. handle(), rng, "F16DIRECT", 0.03);
  357. }
  358. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP16_STR1) {
  359. NormalRNG rng(1);
  360. checker_conv_bias_f16(get_conv_bias_args({2, 3, 5}, 1, false, false, false),
  361. handle(), rng, "F16STRD1", 0.03);
  362. }
  363. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_CHANNEL_WISE_STRIDE1_FP16_NCHW88_1) {
  364. NormalRNG rng(1);
  365. checker_conv_bias_f16(
  366. get_nchw88_channel_wise_args({2, 3}, 1, false, false, false),
  367. handle(), rng, "F16_CHANNEL_WISE_NCHW88", 0.03);
  368. }
  369. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_CHANNEL_WISE_STRIDE1_FP16_NCHW88_2) {
  370. NormalRNG rng(1);
  371. checker_conv_bias_f16(
  372. get_nchw88_channel_wise_args({5}, 1, false, false, false), handle(),
  373. rng, "F16_CHANNEL_WISE_NCHW88", 0.03);
  374. }
  375. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_CHANNEL_WISE_STRIDE2_FP16_NCHW88) {
  376. NormalRNG rng(1);
  377. checker_conv_bias_f16(
  378. get_nchw88_channel_wise_args({2, 3, 5}, 2, false, false, false),
  379. handle(), rng, "F16_CHANNEL_WISE_NCHW88", 0.03);
  380. }
  381. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP16_NCHW88_S1) {
  382. NormalRNG rng(1);
  383. checker_conv_bias_f16(
  384. get_nchw88_conv_bias_args({1, 2, 3, 5, 7}, FULL_NLMODE,
  385. ALL_BIASMODE, 1),
  386. handle(), rng, "F16_CONV_NCHW88_DIRECT", 0.03);
  387. }
  388. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_DIRECT_FP16_NCHW88_S2) {
  389. NormalRNG rng(1);
  390. checker_conv_bias_f16(
  391. get_nchw88_conv_bias_args({1, 2, 3, 5, 7}, FULL_NLMODE,
  392. ALL_BIASMODE, 2),
  393. handle(), rng, "F16_CONV_NCHW88_DIRECT", 0.03);
  394. }
  395. #endif
  396. /**********************************algo 8816 direct************************/
  397. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_INT8_INT16_DIRECT) {
  398. checker_conv_bias_int8x8x16(
  399. get_conv_bias_args({2, 3, 5}, 1, false, true, true), handle(),
  400. "I8816DIRECT");
  401. }
  402. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_INT8_INT16_STRIDE2) {
  403. checker_conv_bias_int8x8x16(
  404. get_conv_bias_args({2, 3, 5}, 2, false, true, true), handle(),
  405. "I8816STRD2");
  406. }
  407. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_INT8_INT16_NCHW_NCHW44_S2) {
  408. checker_conv_bias_int8x8x16(
  409. get_nchw44_conv_bias_args({2, 3, 5, 7}, ONLY_IDENTITY_NLMODE,
  410. ONLY_NO_BIASMODE, 2, false, true),
  411. handle(), "I8816_CONV_NCHW_NCHW44");
  412. }
  413. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_INT8_INT16_NCHW_NCHW44_S1) {
  414. checker_conv_bias_int8x8x16(
  415. get_nchw44_conv_bias_args({2, 3, 5, 7}, ONLY_IDENTITY_NLMODE,
  416. ONLY_NO_BIASMODE, 1, false, true),
  417. handle(), "I8816_CONV_NCHW_NCHW44");
  418. }
  419. /**********************************algo 8-8-32 direct************************/
  420. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_INT8_INT32_STRIDE1) {
  421. checker_conv_bias_int8x8x32_multi(
  422. get_conv_bias_args({2, 3, 5, 7}, 1, false, true, true), handle(),
  423. "S8STRD1");
  424. }
  425. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_INT8_INT32_STRIDE2) {
  426. checker_conv_bias_int8x8x32_multi(
  427. get_conv_bias_args({2, 3, 5, 7}, 2, false, true, true), handle(),
  428. "S8STRD2");
  429. }
  430. TEST_F(ARM_COMMON_MULTI_THREADS,
  431. CONV_BIAS_INT8_INT8_INT32_CHANNEL_WISE_DIRECT1_NCHW44) {
  432. checker_conv_bias_int8x8x32_multi(
  433. get_nchw44_channel_wise_args({2, 3, 5}, 1, false, true, true),
  434. handle(), "S8_CHAN_WISE_STRD1_NCHW44");
  435. }
  436. TEST_F(ARM_COMMON_MULTI_THREADS,
  437. CONV_BIAS_INT8_INT8_INT32_CHANNEL_WISE_DIRECT2_NCHW44) {
  438. checker_conv_bias_int8x8x32_multi(
  439. get_nchw44_channel_wise_args({2, 3, 5}, 2, false, true, true),
  440. handle(), "S8_CHAN_WISE_STRD2_NCHW44");
  441. }
  442. TEST_F(ARM_COMMON, CONV_BIAS_INT8_INT8_INT16_CHANNEL_WISE_DIRECT1_NCHW44) {
  443. Checker<ConvBias> checker(handle());
  444. checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
  445. "S8x8x16_CHAN_WISE_STRD1_STRD2_NCHW44"));
  446. checker.set_dtype(0, dtype::Int8());
  447. checker.set_dtype(1, dtype::Int8());
  448. checker.set_dtype(2, dtype::Int16());
  449. checker.set_dtype(4, dtype::Int16());
  450. auto args = get_nchw44_channel_wise_args({2, 3, 5}, 1, false, true, true);
  451. for (auto&& arg : args) {
  452. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  453. }
  454. }
  455. TEST_F(ARM_COMMON_MULTI_THREADS,
  456. CONV_BIAS_INT8_INT8_INT16_CHANNEL_WISE_DIRECT2_NCHW44) {
  457. Checker<ConvBias> checker(handle());
  458. checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
  459. "S8x8x16_CHAN_WISE_STRD1_STRD2_NCHW44"));
  460. checker.set_dtype(0, dtype::Int8());
  461. checker.set_dtype(1, dtype::Int8());
  462. checker.set_dtype(2, dtype::Int16());
  463. checker.set_dtype(4, dtype::Int16());
  464. auto args = get_nchw44_channel_wise_args({2, 3, 5}, 2, false, true, true);
  465. for (auto&& arg : args) {
  466. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  467. }
  468. }
  469. /********************************qint8 direct******************************/
  470. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE1) {
  471. checker_conv_bias_qint8x8x8(get_int8_quint8_conv_bias_args(
  472. {2, 3, 5, 7}, 1, false, false, false),
  473. handle(), "S8STRD1");
  474. }
  475. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE2) {
  476. checker_conv_bias_qint8x8x8(get_int8_quint8_conv_bias_args(
  477. {2, 3, 5, 7}, 2, false, false, false),
  478. handle(), "S8STRD2");
  479. }
  480. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE1_NCHW44) {
  481. checker_conv_bias_qint8x8x8(
  482. get_nchw44_conv_bias_args({2, 3, 5, 7}, QUAN_NLMODE,
  483. ONLY_BR_BIASMODE, 1),
  484. handle(), "S8_NCHW44_DIRECT");
  485. }
  486. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE1_NCHW44_8816) {
  487. checker_conv_bias_int8x8x16(
  488. get_nchw44_conv_bias_args({2, 3, 5, 7}, ONLY_IDENTITY_NLMODE,
  489. ONLY_BR_BIASMODE, 1),
  490. handle(), "S8x8x16_NCHW44_DIRECT");
  491. }
  492. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE2_NCHW44_8816) {
  493. checker_conv_bias_int8x8x16(
  494. get_nchw44_conv_bias_args({2, 3, 5, 7}, ONLY_IDENTITY_NLMODE,
  495. ONLY_BR_BIASMODE, 2),
  496. handle(), "S8x8x16_NCHW44_DIRECT");
  497. }
  498. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE1_NCHW44_8832) {
  499. checker_conv_bias_qint8x8x32(
  500. get_nchw44_conv_bias_args({2, 3, 5, 7}, ONLY_IDENTITY_NLMODE,
  501. ONLY_BR_BIASMODE, 1),
  502. handle(), "S8_NCHW44_DIRECT");
  503. }
  504. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE2_NCHW44_8832) {
  505. checker_conv_bias_qint8x8x32(
  506. get_nchw44_conv_bias_args({2, 3, 5, 7}, ONLY_IDENTITY_NLMODE,
  507. ONLY_NO_BIASMODE, 2),
  508. handle(), "S8_NCHW44_DIRECT");
  509. }
  510. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE2_NCHW44) {
  511. checker_conv_bias_qint8x8x8(
  512. get_nchw44_conv_bias_args({2, 3, 5, 7}, QUAN_NLMODE,
  513. BR_AND_NO_BIASMODE, 2),
  514. handle(), "S8_NCHW44_DIRECT");
  515. }
  516. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_QS8_CHANNEL_WISE_DIRECT1_NCHW44) {
  517. checker_conv_bias_qint8x8x8(
  518. get_nchw44_conv_bias_args({2, 3, 5, 7}, ONLY_IDENTITY_NLMODE,
  519. BR_AND_NO_BIASMODE, 1),
  520. handle(), "S8_NCHW44_DIRECT");
  521. checker_conv_bias_qint8x8x8(
  522. get_nchw44_channel_wise_args({2, 3, 5}, 1, false, false, true),
  523. handle(), "S8_CHAN_WISE_STRD1_NCHW44");
  524. }
  525. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_QS8_CHANNEL_WISE_DIRECT2_NCHW44) {
  526. checker_conv_bias_qint8x8x8(
  527. get_nchw44_channel_wise_args({2, 3, 5}, 2, false, false, true),
  528. handle(), "S8_CHAN_WISE_STRD2_NCHW44");
  529. }
  530. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_NCHW_NCHW44_S1) {
  531. checker_conv_bias_qint8x8x8(
  532. get_nchw44_conv_bias_args({2, 3, 5, 7}, QUAN_NLMODE,
  533. BR_AND_NO_BIASMODE, 1, false, true),
  534. handle(), "S8_CONV_NCHW_NCHW44");
  535. }
  536. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_NCHW_NCHW44_S2) {
  537. checker_conv_bias_qint8x8x8(
  538. get_nchw44_conv_bias_args({2, 3, 5, 7}, QUAN_NLMODE,
  539. BR_AND_NO_BIASMODE, 2, false, true),
  540. handle(), "S8_CONV_NCHW_NCHW44");
  541. }
  542. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_NCHW_NCHW44_S1_F1) {
  543. checker_conv_bias_qint8x8x8(
  544. get_nchw44_conv_bias_args({1}, QUAN_NLMODE, BR_AND_NO_BIASMODE, 1,
  545. false, true),
  546. handle(), "S8_CONV_NCHW_NCHW44");
  547. }
  548. /*****************************quint8 direct****************************/
  549. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_QUINT8_STRIDE1) {
  550. checker_conv_bias_quint8x8x8(get_int8_quint8_conv_bias_args(
  551. {2, 3, 5, 7}, 1, false, false, false),
  552. handle(), "QU8STRD1");
  553. }
  554. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_QUINT8_STRIDE2) {
  555. checker_conv_bias_quint8x8x8(get_int8_quint8_conv_bias_args(
  556. {2, 3, 5, 7}, 2, false, false, false),
  557. handle(), "QU8STRD2");
  558. }
  559. /****************************dot qint8 direct*************************/
  560. #if MGB_ENABLE_DOT
  561. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_DOT_NCHW_NCHW44) {
  562. auto args = get_nchw44_conv_bias_args({2, 3, 5, 7}, QUAN_NLMODE,
  563. BR_AND_NO_BIASMODE, 2, false, true);
  564. for (auto&& arg : args) {
  565. arg.param.format = param::ConvBias::Format::NCHW44_DOT;
  566. }
  567. checker_conv_bias_qint8x8x8(args, handle(), "ARMDOTS8_NCHW_NCHW44");
  568. args = get_nchw44_conv_bias_args({2, 3, 5, 7}, QUAN_NLMODE,
  569. BR_AND_NO_BIASMODE, 1, false, true);
  570. for (auto&& arg : args) {
  571. arg.param.format = param::ConvBias::Format::NCHW44_DOT;
  572. }
  573. checker_conv_bias_qint8x8x8(args, handle(), "ARMDOTS8_NCHW_NCHW44");
  574. }
  575. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_DOT_NCHW_NCHW44_S1_F1) {
  576. auto args = get_nchw44_conv_bias_args({1}, QUAN_NLMODE, BR_AND_NO_BIASMODE,
  577. 1, false, true);
  578. for (auto&& arg : args) {
  579. arg.param.format = param::ConvBias::Format::NCHW44_DOT;
  580. }
  581. checker_conv_bias_qint8x8x8(args, handle(), "ARMDOTS8_NCHW_NCHW44");
  582. }
  583. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE1_WITHDOTPROD) {
  584. checker_conv_bias_qint8x8x8(get_int8_quint8_conv_bias_args(
  585. {2, 3, 5, 7}, 1, false, false, false),
  586. handle(), "ARMDOTS8STRD1");
  587. }
  588. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_INT8_STRIDE2_WITHDOTPROD) {
  589. checker_conv_bias_qint8x8x8(get_int8_quint8_conv_bias_args(
  590. {2, 3, 5, 7}, 2, false, false, false),
  591. handle(), "ARMDOTS8STRD2");
  592. }
  593. /****************************dot 8-8-32 direct*************************/
  594. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_I8832STRD1_WITHDOT) {
  595. checker_conv_bias_qint8x8x32(
  596. get_conv_bias_args({2, 3, 5, 7}, 1, false, true, true), handle(),
  597. "ARMDOTS8STRD1");
  598. }
  599. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_I8832STRD2_WITHDOT) {
  600. checker_conv_bias_qint8x8x32(
  601. get_conv_bias_args({2, 3, 5, 7}, 2, false, true, true), handle(),
  602. "ARMDOTS8STRD2");
  603. }
  604. /******************************dot quint8*****************************/
  605. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_QUINT8_STRIDE1_WITHDOTPROD) {
  606. checker_conv_bias_quint8x8x8(get_int8_quint8_conv_bias_args(
  607. {2, 3, 5, 7}, 1, false, false, false),
  608. handle(), "ARMDOTU8STRD1");
  609. }
  610. //! TODO: this test without test kernel size=3, add it will case buss error now
  611. //! in armv7
  612. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_QUINT8_STRIDE2_WITHDOTPROD) {
  613. checker_conv_bias_quint8x8x8(
  614. get_int8_quint8_conv_bias_args({2, 5, 7}, 2, false, false, false),
  615. handle(), "ARMDOTU8STRD2");
  616. }
  617. /******************************dot quint8x8x32***********************/
  618. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_QUINT8_DIRECT_STRIDE1) {
  619. checker_conv_bias_quint8x8x32(
  620. get_conv_bias_args({2, 3, 5, 7}, 1, false, true, true), handle(),
  621. "ARMDOTU8STRD1");
  622. }
  623. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_QUINT8_DIRECT_STRIDE2) {
  624. checker_conv_bias_quint8x8x32(
  625. get_conv_bias_args({2, 3, 5, 7}, 2, false, true, true), handle(),
  626. "ARMDOTU8STRD2");
  627. }
  628. /******************************dot int8x8x8 nchw44 ***********************/
  629. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_DIRECT_DOT_NCHW44_S1_Q8x8x8) {
  630. using namespace conv_bias;
  631. std::vector<TestArg> args = get_nchw44_conv_bias_args(
  632. {2, 3, 5, 7}, QUAN_NLMODE, ONLY_BR_BIASMODE, 1);
  633. for (auto&& arg : args)
  634. arg.param.format = param::ConvBias::Format::NCHW44_DOT;
  635. checker_conv_bias_qint8x8x8(args, handle(), "ARMDOTS8DIRECT_NCHW44");
  636. }
  637. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_DIRECT_DOT_NCHW44_S1_Q8x8x32) {
  638. using namespace conv_bias;
  639. std::vector<TestArg> args = get_nchw44_conv_bias_args(
  640. {2, 3, 5, 7}, ONLY_IDENTITY_NLMODE, BR_AND_NO_BIASMODE, 1);
  641. for (auto&& arg : args)
  642. arg.param.format = param::ConvBias::Format::NCHW44_DOT;
  643. checker_conv_bias_qint8x8x32(args, handle(), "ARMDOTS8DIRECT_NCHW44");
  644. }
  645. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_DIRECT_DOT_NCHW44_S1_8x8x32) {
  646. using namespace conv_bias;
  647. std::vector<TestArg> args = get_nchw44_conv_bias_args(
  648. {2, 3, 5, 7}, ONLY_IDENTITY_NLMODE, BR_AND_NO_BIASMODE, 1);
  649. for (auto&& arg : args)
  650. arg.param.format = param::ConvBias::Format::NCHW44_DOT;
  651. checker_conv_bias_int8x8x32_multi(args, handle(), "ARMDOTS8DIRECT_NCHW44");
  652. }
  653. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_DIRECT_DOT_NCHW44_S2_Q8x8x8) {
  654. using namespace conv_bias;
  655. //! test qint8x8x8
  656. std::vector<TestArg> args = get_nchw44_conv_bias_args(
  657. {2, 3, 5, 7}, QUAN_NLMODE, BR_AND_NO_BIASMODE, 2);
  658. for (auto&& arg : args)
  659. arg.param.format = param::ConvBias::Format::NCHW44_DOT;
  660. checker_conv_bias_qint8x8x8(args, handle(), "ARMDOTS8DIRECT_NCHW44");
  661. }
  662. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_DIRECT_DOT_NCHW44_S2_Q8x8x32) {
  663. using namespace conv_bias;
  664. //! test qint8x8x8
  665. std::vector<TestArg> args = get_nchw44_conv_bias_args(
  666. {2, 3, 5, 7}, ONLY_IDENTITY_NLMODE, BR_AND_NO_BIASMODE, 2);
  667. for (auto&& arg : args)
  668. arg.param.format = param::ConvBias::Format::NCHW44_DOT;
  669. checker_conv_bias_qint8x8x32(args, handle(), "ARMDOTS8DIRECT_NCHW44");
  670. }
  671. TEST_F(ARM_COMMON_MULTI_THREADS, CONVBIAS_INT8_DIRECT_DOT_NCHW44_S2_8x8x32) {
  672. using namespace conv_bias;
  673. //! test qint8x8x8
  674. std::vector<TestArg> args = get_nchw44_conv_bias_args(
  675. {2, 3, 5, 7}, ONLY_IDENTITY_NLMODE, BR_AND_NO_BIASMODE, 2);
  676. for (auto&& arg : args)
  677. arg.param.format = param::ConvBias::Format::NCHW44_DOT;
  678. checker_conv_bias_int8x8x32_multi(args, handle(), "ARMDOTS8DIRECT_NCHW44");
  679. }
  680. #endif
  681. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD) {
  682. using namespace conv_bias;
  683. std::vector<TestArg> args = get_winograd_args(3);
  684. Checker<ConvBiasForward> checker(handle());
  685. auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
  686. DType B_dtype, DType C_dtype, DType D_dtype,
  687. const float eps) {
  688. for (auto&& arg : args) {
  689. checker.set_dtype(0, A_dtype)
  690. .set_dtype(1, B_dtype)
  691. .set_dtype(2, C_dtype)
  692. .set_dtype(4, D_dtype)
  693. .set_epsilon(eps)
  694. .set_param(arg.param)
  695. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  696. }
  697. };
  698. run(args, dtype::Float32(), dtype::Float32(), dtype::Float32(),
  699. dtype::Float32(), 1e-3f);
  700. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  701. Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
  702. checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng);
  703. run(args, dtype::Float16(), dtype::Float16(), dtype::Float16(),
  704. dtype::Float16(), 0.35f);
  705. #endif
  706. }
  707. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4) {
  708. using namespace conv_bias;
  709. std::vector<TestArg> args = get_winograd_mk_packed_args();
  710. Checker<ConvBiasForward> checker(handle());
  711. check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4);
  712. }
  713. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4_NCHW44) {
  714. using namespace conv_bias;
  715. std::vector<TestArg> args =
  716. get_nchw44_conv_bias_args({3}, QUAN_NLMODE, BR_AND_NO_BIASMODE, 1);
  717. Checker<ConvBiasForward> checker(handle());
  718. check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4,
  719. param::ConvBias::Format::NCHW44);
  720. }
  721. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63) {
  722. using namespace conv_bias;
  723. std::vector<TestArg> args = get_winograd_args(3);
  724. Checker<ConvBiasForward> checker(handle());
  725. check_winograd("1:6:32", checker, args);
  726. }
  727. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4) {
  728. using namespace conv_bias;
  729. std::vector<TestArg> args = get_winograd_mk_packed_args();
  730. Checker<ConvBiasForward> checker(handle());
  731. check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4);
  732. }
  733. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_NCHW44) {
  734. using namespace conv_bias;
  735. std::vector<TestArg> args =
  736. get_nchw44_conv_bias_args({3}, QUAN_NLMODE, BR_AND_NO_BIASMODE, 1);
  737. Checker<ConvBiasForward> checker(handle());
  738. check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4,
  739. param::ConvBias::Format::NCHW44);
  740. }
  741. //! uncomment it when low precision mode is ok
  742. #if 0
  743. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F73_4_NCHW44) {
  744. using namespace conv_bias;
  745. std::vector<TestArg> args =
  746. get_nchw44_conv_bias_args({3},QUAN_NLMODE,BR_AND_NO_BIASMODE,1);
  747. Checker<ConvBiasForward> checker(handle());
  748. check_winograd("4:7:16", checker, args, param::MatrixMul::Format::MK4,
  749. param::ConvBias::Format::NCHW44);
  750. }
  751. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F73_4_NCHW44_WEIGHT_PREPROCESS) {
  752. using namespace conv_bias;
  753. std::vector<TestArg> args =
  754. get_nchw44_conv_bias_args({3},QUAN_NLMODE,BR_AND_NO_BIASMODE,1);
  755. Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
  756. handle());
  757. check_winograd("4:7:16", checker, args, param::MatrixMul::Format::MK4,
  758. param::ConvBias::Format::NCHW44);
  759. }
  760. #endif
  761. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F54) {
  762. using namespace conv_bias;
  763. std::vector<TestArg> args = get_winograd_args(4);
  764. Checker<ConvBiasForward> checker(handle());
  765. check_winograd("1:5:32", checker, args);
  766. }
  767. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45) {
  768. using namespace conv_bias;
  769. std::vector<TestArg> args = get_winograd_args(5);
  770. Checker<ConvBiasForward> checker(handle());
  771. check_winograd("1:4:32", checker, args);
  772. }
  773. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_1) {
  774. using namespace conv_bias;
  775. Checker<ConvBiasForward> checker(handle());
  776. auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
  777. DType B_dtype, DType C_dtype, DType D_dtype,
  778. float eps) {
  779. for (auto&& arg : args) {
  780. checker.set_dtype(0, A_dtype)
  781. .set_dtype(1, B_dtype)
  782. .set_dtype(2, C_dtype)
  783. .set_dtype(4, D_dtype)
  784. .set_epsilon(eps)
  785. .set_param(arg.param)
  786. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  787. }
  788. };
  789. std::vector<TestArg> args = get_winograd_mk_packed_args(8);
  790. std::vector<TestArg> args_first_half(args.begin(),
  791. args.begin() + args.size() / 2);
  792. run(args_first_half, dtype::Float32{}, dtype::Float32{}, dtype::Float32{},
  793. dtype::Float32{}, 1e-3f);
  794. }
  795. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2) {
  796. using namespace conv_bias;
  797. Checker<ConvBiasForward> checker(handle());
  798. auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
  799. DType B_dtype, DType C_dtype, DType D_dtype,
  800. float eps) {
  801. for (auto&& arg : args) {
  802. checker.set_dtype(0, A_dtype)
  803. .set_dtype(1, B_dtype)
  804. .set_dtype(2, C_dtype)
  805. .set_dtype(4, D_dtype)
  806. .set_epsilon(eps)
  807. .set_param(arg.param)
  808. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  809. }
  810. };
  811. std::vector<TestArg> args = get_winograd_mk_packed_args(8);
  812. std::vector<TestArg> args_second_half(args.begin() + args.size() / 2,
  813. args.end());
  814. run(args_second_half, dtype::Float32{}, dtype::Float32{}, dtype::Float32{},
  815. dtype::Float32{}, 1e-3f);
  816. }
  817. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  818. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F16) {
  819. using namespace conv_bias;
  820. Checker<ConvBiasForward> checker(handle());
  821. auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
  822. DType B_dtype, DType C_dtype, DType D_dtype,
  823. float eps) {
  824. for (auto&& arg : args) {
  825. checker.set_dtype(0, A_dtype)
  826. .set_dtype(1, B_dtype)
  827. .set_dtype(2, C_dtype)
  828. .set_dtype(4, D_dtype)
  829. .set_epsilon(eps)
  830. .set_param(arg.param)
  831. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  832. }
  833. };
  834. std::vector<TestArg> args = get_winograd_mk_packed_args(8);
  835. Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
  836. checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng);
  837. run(args, dtype::Float16{}, dtype::Float16{}, dtype::Float16{},
  838. dtype::Float16{}, 0.25);
  839. }
  840. #endif
  841. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_INT8) {
  842. using namespace conv_bias;
  843. Checker<ConvBiasForward> checker(handle());
  844. auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
  845. DType B_dtype, DType C_dtype, DType D_dtype,
  846. float eps) {
  847. for (auto&& arg : args) {
  848. checker.set_dtype(0, A_dtype)
  849. .set_dtype(1, B_dtype)
  850. .set_dtype(2, C_dtype)
  851. .set_dtype(4, D_dtype)
  852. .set_epsilon(eps)
  853. .set_param(arg.param)
  854. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  855. }
  856. };
  857. #if MEGDNN_AARCH64
  858. const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8";
  859. #else
  860. const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8";
  861. #endif
  862. checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
  863. ssprintf("WINOGRAD:%s:8:2:32", matmul_name).c_str()));
  864. std::vector<TestArg> quantized_args =
  865. get_quantized_winograd_mk_packed_args(8);
  866. UniformIntRNG int_rng{-50, 50};
  867. checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
  868. run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f),
  869. dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3);
  870. }
  871. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8) {
  872. using namespace conv_bias;
  873. Checker<ConvBiasForward> checker(handle());
  874. auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
  875. DType B_dtype, DType C_dtype, DType D_dtype,
  876. float eps) {
  877. for (auto&& arg : args) {
  878. checker.set_dtype(0, A_dtype)
  879. .set_dtype(1, B_dtype)
  880. .set_dtype(2, C_dtype)
  881. .set_dtype(4, D_dtype)
  882. .set_epsilon(eps)
  883. .set_param(arg.param)
  884. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  885. }
  886. };
  887. #if MEGDNN_AARCH64
  888. const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8";
  889. #else
  890. const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8";
  891. #endif
  892. checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
  893. ssprintf("WINOGRAD_NCHW44:%s:8:2:32", matmul_name).c_str()));
  894. std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4);
  895. UniformIntRNG int_rng{-50, 50};
  896. checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
  897. run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f),
  898. dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3);
  899. }
  900. TEST_F(ARM_COMMON_MULTI_THREADS,
  901. CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32_GROUPMODE) {
  902. using namespace conv_bias;
  903. Checker<ConvBiasForward> checker(handle());
  904. auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
  905. DType B_dtype, DType C_dtype, DType D_dtype,
  906. float eps) {
  907. for (auto&& arg : args) {
  908. checker.set_dtype(0, A_dtype)
  909. .set_dtype(1, B_dtype)
  910. .set_dtype(2, C_dtype)
  911. .set_dtype(4, D_dtype)
  912. .set_epsilon(eps)
  913. .set_param(arg.param)
  914. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  915. }
  916. };
  917. float epsilon = 0.001;
  918. #if MEGDNN_AARCH64
  919. const char* matmul_name = "AARCH64_F32_MK4_4x16";
  920. #else
  921. const char* matmul_name = "ARMV7_F32_MK4_4x8";
  922. #endif
  923. checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
  924. ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str()));
  925. std::vector<TestArg> quantized_args =
  926. get_int8_nchw44_args(3, 4, true, true);
  927. UniformIntRNG int_rng{-50, 50};
  928. checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
  929. run(quantized_args, dtype::QuantizedS8(0.41113496f),
  930. dtype::QuantizedS8(0.01887994f),
  931. dtype::QuantizedS32(0.41113496f * 0.01887994f),
  932. dtype::QuantizedS8(0.49550694f), epsilon);
  933. }
  934. TEST_F(ARM_COMMON_MULTI_THREADS,
  935. CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_GROUPMODE) {
  936. using namespace conv_bias;
  937. Checker<ConvBiasForward> checker(handle());
  938. auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
  939. DType B_dtype, DType C_dtype, DType D_dtype,
  940. float eps) {
  941. for (auto&& arg : args) {
  942. checker.set_dtype(0, A_dtype)
  943. .set_dtype(1, B_dtype)
  944. .set_dtype(2, C_dtype)
  945. .set_dtype(4, D_dtype)
  946. .set_epsilon(eps)
  947. .set_param(arg.param)
  948. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  949. }
  950. };
  951. #if MEGDNN_AARCH64
  952. const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8";
  953. #else
  954. const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8";
  955. #endif
  956. checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
  957. ssprintf("WINOGRAD_NCHW44:%s:8:2:32", matmul_name).c_str()));
  958. std::vector<TestArg> quantized_args =
  959. get_int8_nchw44_args(3, 4, false, true);
  960. UniformIntRNG int_rng{-50, 50};
  961. checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
  962. run(quantized_args, dtype::QuantizedS8(2.5f), dtype::QuantizedS8(2.5f),
  963. dtype::QuantizedS32(6.25f), dtype::QuantizedS8(60.25f), 1e-3);
  964. }
  965. TEST_F(ARM_COMMON_MULTI_THREADS,
  966. CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32) {
  967. using namespace conv_bias;
  968. Checker<ConvBiasForward> checker(handle());
  969. auto run = [&checker](const std::vector<TestArg>& args, DType A_dtype,
  970. DType B_dtype, DType C_dtype, DType D_dtype,
  971. float eps) {
  972. for (auto&& arg : args) {
  973. checker.set_dtype(0, A_dtype)
  974. .set_dtype(1, B_dtype)
  975. .set_dtype(2, C_dtype)
  976. .set_dtype(4, D_dtype)
  977. .set_epsilon(eps)
  978. .set_param(arg.param)
  979. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  980. }
  981. };
  982. float epsilon = 0.001;
  983. #if MEGDNN_AARCH64
  984. const char* matmul_name = "AARCH64_F32_MK4_4x16";
  985. #else
  986. const char* matmul_name = "ARMV7_F32_MK4_4x8";
  987. #endif
  988. checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
  989. ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str()));
  990. std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4, true);
  991. UniformIntRNG int_rng{-50, 50};
  992. checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
  993. run(quantized_args, dtype::QuantizedS8(0.41113496f),
  994. dtype::QuantizedS8(0.01887994f),
  995. dtype::QuantizedS32(0.41113496f * 0.01887994f),
  996. dtype::QuantizedS8(0.49550694f), epsilon);
  997. }
  998. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  999. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23) {
  1000. using namespace conv_bias;
  1001. std::vector<TestArg> args = get_winograd_mk_packed_args();
  1002. Checker<ConvBiasForward> checker(handle());
  1003. check_winograd_fp16("1:2:32", checker, args, NULL, 0.08);
  1004. }
  1005. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F45_1) {
  1006. using namespace conv_bias;
  1007. std::vector<TestArg> args = get_winograd_args(5);
  1008. std::vector<TestArg> args_head_half(args.begin(),
  1009. args.begin() + args.size() / 2);
  1010. Checker<ConvBiasForward> checker(handle());
  1011. //! fp16 range -1.0 ~ 1.0
  1012. Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
  1013. check_winograd_fp16("1:4:32", checker, args_head_half, rng, 0.25);
  1014. }
  1015. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F45_2) {
  1016. using namespace conv_bias;
  1017. std::vector<TestArg> args = get_winograd_args(5);
  1018. std::vector<TestArg> args_back_half(args.begin() + args.size() / 2,
  1019. args.end());
  1020. Checker<ConvBiasForward> checker(handle());
  1021. //! fp16 range -1.0 ~ 1.0
  1022. Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
  1023. check_winograd_fp16("1:4:32", checker, args_back_half, rng, 0.25);
  1024. }
  1025. //! FIXME: This test may be failed if run `ARM_COMMON.CONV_BIAS_WINOGRAD*`, but
  1026. //! it will pass when run single testcase
  1027. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F63) {
  1028. using namespace conv_bias;
  1029. std::vector<TestArg> args = get_winograd_args(3);
  1030. Checker<ConvBiasForward> checker(handle());
  1031. //! fp16 range -1.0 ~ 1.0
  1032. Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
  1033. check_winograd_fp16("1:6:32", checker, args, rng, 0.3);
  1034. }
  1035. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_8x8_1) {
  1036. using namespace conv_bias;
  1037. std::vector<TestArg> args = get_winograd_mk_packed_args(8);
  1038. std::vector<TestArg> args_head_half(args.begin(),
  1039. args.begin() + args.size() / 2);
  1040. Checker<ConvBiasForward> checker(handle());
  1041. Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
  1042. check_winograd_fp16("8:2:32", checker, args_head_half, rng, 0.25,
  1043. param::MatrixMul::Format::MK8);
  1044. }
  1045. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_8x8_2) {
  1046. using namespace conv_bias;
  1047. std::vector<TestArg> args = get_winograd_mk_packed_args(8);
  1048. std::vector<TestArg> args_back_half(args.begin() + args.size() / 2,
  1049. args.end());
  1050. Checker<ConvBiasForward> checker(handle());
  1051. Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
  1052. check_winograd_fp16("8:2:32", checker, args_back_half, rng, 0.25,
  1053. param::MatrixMul::Format::MK8);
  1054. }
  1055. #endif
  1056. TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) {
  1057. using namespace conv_bias;
  1058. std::vector<TestArg> args = get_quantized_winograd_mk_packed_args(8);
  1059. Checker<ConvBiasForward> checker(handle());
  1060. UniformIntRNG rng{-50, 50};
  1061. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  1062. .set_dtype(1, dtype::QuantizedS8(2.5f))
  1063. .set_dtype(2, dtype::QuantizedS32(6.25f))
  1064. .set_dtype(4, dtype::QuantizedS8(60.25f))
  1065. .set_rng(0, &rng)
  1066. .set_rng(1, &rng)
  1067. .set_rng(2, &rng);
  1068. check_winograd("8:2:32", checker, args, param::MatrixMul::Format::MK8);
  1069. }
  1070. TEST_F(ARM_COMMON_MULTI_THREADS,
  1071. CONV_BIAS_WINOGRAD_INT8_8X8_WEIGHT_PREPROCESS) {
  1072. using namespace conv_bias;
  1073. std::vector<TestArg> args = get_quantized_winograd_mk_packed_args(8);
  1074. Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
  1075. handle());
  1076. UniformIntRNG rng{-50, 50};
  1077. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  1078. .set_dtype(1, dtype::QuantizedS8(2.5f))
  1079. .set_dtype(2, dtype::QuantizedS32(6.25f))
  1080. .set_dtype(4, dtype::QuantizedS8(60.25f))
  1081. .set_rng(0, &rng)
  1082. .set_rng(1, &rng)
  1083. .set_rng(2, &rng);
  1084. check_winograd("8:2:32", checker, args, param::MatrixMul::Format::MK8);
  1085. }
  1086. // clang-format on
  1087. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台