You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

conv_bias.cpp 54 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384
  1. /**
  2. * \file dnn/test/common/conv_bias.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "test/common/conv_bias.h"
  13. #include "megdnn/opr_param_defs.h"
  14. #include "src/common/utils.h"
  15. #include "test/common/benchmarker.h"
  16. namespace megdnn {
  17. namespace test {
  18. namespace conv_bias {
  19. namespace {
  20. void convert_arg_from_nchw4_to_chwn4(TestArg& arg) {
  21. arg.param.format = param::ConvBias::Format::CHWN4;
  22. arg.src = TensorShape{arg.src[1], arg.src[2], arg.src[3], arg.src[0], 4};
  23. arg.filter = TensorShape{arg.filter[1], arg.filter[2], arg.filter[3],
  24. arg.filter[0], 4};
  25. arg.bias =
  26. TensorShape{arg.bias[1], arg.bias[2], arg.bias[3], arg.bias[0], 4};
  27. }
  28. } // namespace
  29. std::vector<TestArg> get_args() {
  30. std::vector<TestArg> args;
  31. param::ConvBias cur_param;
  32. using NLMode = param::ConvBias::NonlineMode;
  33. // clang-format off
  34. for (auto nlmode :
  35. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  36. for (size_t i : {9, 63}) {
  37. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  38. cur_param.nonlineMode = nlmode;
  39. // fallback case
  40. args.emplace_back(cur_param, TensorShape{10, 1, i, i},
  41. TensorShape{1, 1, 8, 8}, TensorShape{1, 1, 1, 1});
  42. args.emplace_back(cur_param, TensorShape{10, 4, i, i},
  43. TensorShape{3, 4, 4, 4}, TensorShape{1, 3, 1, 1});
  44. cur_param.mode = param::ConvBias::Mode::CONVOLUTION;
  45. args.emplace_back(cur_param, TensorShape{10, 4, i, i},
  46. TensorShape{1, 4, 3, 3}, TensorShape{1, 1, 1, 1});
  47. args.emplace_back(cur_param, TensorShape{1, 4, i, i},
  48. TensorShape{5, 4, 3, 3}, TensorShape{1, 5, 1, 1});
  49. } }
  50. // clang-format on
  51. return args;
  52. }
  53. std::vector<TestArg> get_chanwise_args() {
  54. std::vector<TestArg> args;
  55. param::ConvBias cur_param;
  56. using NLMode = param::ConvBias::NonlineMode;
  57. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  58. cur_param.sparse = ConvBias::Param::Sparse::GROUP;
  59. for (auto nlmode :
  60. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  61. cur_param.nonlineMode = nlmode;
  62. // simple case
  63. for (uint32_t s : {1, 2})
  64. for (uint32_t p : {0, 1, 2, 3})
  65. for (size_t f : {2, 3, 5, 7})
  66. for (size_t ocpg : {1, 3}) {
  67. cur_param.pad_h = cur_param.pad_w = p;
  68. cur_param.stride_h = cur_param.stride_w = s;
  69. args.emplace_back(cur_param, TensorShape{2, 3, 16, 16},
  70. TensorShape{3, ocpg, 1, f, f},
  71. TensorShape{1, 3 * ocpg, 1, 1});
  72. }
  73. args.emplace_back(cur_param, TensorShape{32, 12, 20, 10},
  74. TensorShape{12, 2, 1, 4, 5},
  75. TensorShape{1, 24, 1, 1});
  76. // padding larger than kern
  77. args.emplace_back(cur_param, TensorShape{32, 12, 20, 10},
  78. TensorShape{12, 2, 1, 4, 5},
  79. TensorShape{1, 24, 1, 1});
  80. }
  81. return args;
  82. }
  83. std::vector<TestArg> get_args_1x1() {
  84. std::vector<TestArg> args;
  85. param::ConvBias cur_param;
  86. using NLMode = param::ConvBias::NonlineMode;
  87. for (auto nlmode :
  88. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  89. cur_param.nonlineMode = nlmode;
  90. for (size_t i : {16, 19}) {
  91. cur_param.mode = param::ConvBias::Mode::CONVOLUTION;
  92. args.emplace_back(cur_param, TensorShape{2, 20, i, i + 1},
  93. TensorShape{30, 20, 1, 1},
  94. TensorShape{1, 30, 1, 1});
  95. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  96. args.emplace_back(cur_param, TensorShape{2, 20, i, i + 1},
  97. TensorShape{30, 20, 1, 1},
  98. TensorShape{1, 30, 1, 1});
  99. }
  100. }
  101. return args;
  102. }
  103. std::vector<TestArg> get_winograd_args(size_t kernel_size) {
  104. std::vector<TestArg> args;
  105. param::ConvBias cur_param;
  106. using NLMode = param::ConvBias::NonlineMode;
  107. // clang-format off
  108. for (auto nlmode :
  109. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  110. for (size_t ic : {1, 3, 4, 7}) {
  111. for (size_t oc : {1, 3, 4, 7}) {
  112. for (size_t i : {9, 63}) {
  113. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  114. cur_param.nonlineMode = nlmode;
  115. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  116. cur_param.pad_h = cur_param.pad_w = 0;
  117. //! no bias
  118. args.emplace_back(cur_param, TensorShape{1, ic, i, i},
  119. TensorShape{oc, ic, kernel_size, kernel_size},
  120. TensorShape{});
  121. //! bias
  122. args.emplace_back(
  123. cur_param, TensorShape{2, ic, i, i},
  124. TensorShape{oc, ic, kernel_size, kernel_size},
  125. TensorShape{2, oc, (i + cur_param.pad_h * 2 - kernel_size) + 1,
  126. (i + cur_param.pad_w * 2 - kernel_size) + 1});
  127. //! bias channel
  128. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  129. TensorShape{oc, ic, kernel_size, kernel_size},
  130. TensorShape{1, oc, 1, 1});
  131. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  132. args.emplace_back(
  133. cur_param, TensorShape{2, 2 * ic, i, i},
  134. TensorShape{2, oc, ic, kernel_size, kernel_size},
  135. TensorShape{2, 2 * oc,
  136. (i + cur_param.pad_h * 2 - kernel_size) + 1,
  137. (i + cur_param.pad_w * 2 - kernel_size) + 1});
  138. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  139. TensorShape{2, oc, ic, kernel_size, kernel_size},
  140. TensorShape{1, 2 * oc, 1, 1});
  141. } } } }
  142. // clang-format on
  143. //! test for multi-thread OC parallel
  144. for (size_t i : {9, 63}) {
  145. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  146. cur_param.pad_h = cur_param.pad_w = 1;
  147. args.emplace_back(cur_param, TensorShape{1, 8, i, i},
  148. TensorShape{128, 8, kernel_size, kernel_size},
  149. TensorShape{1, 128, 1, 1});
  150. args.emplace_back(cur_param, TensorShape{2, 8, i, i},
  151. TensorShape{128, 8, kernel_size, kernel_size},
  152. TensorShape{1, 128, 1, 1});
  153. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  154. args.emplace_back(cur_param, TensorShape{2, 2 * 8, i, i},
  155. TensorShape{2, 128, 8, kernel_size, kernel_size},
  156. TensorShape{1, 2 * 128, 1, 1});
  157. }
  158. return args;
  159. }
  160. std::vector<TestArg> get_winograd_mk_packed_args(size_t pack_size) {
  161. std::vector<TestArg> args;
  162. param::ConvBias cur_param;
  163. using NLMode = param::ConvBias::NonlineMode;
  164. // clang-format off
  165. for (auto nlmode :
  166. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  167. for (size_t ic : {pack_size, 2 * pack_size}) {
  168. for (size_t oc : {pack_size, 2 * pack_size}) {
  169. for (size_t i : {9, 63}) {
  170. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  171. cur_param.nonlineMode = nlmode;
  172. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  173. cur_param.pad_h = cur_param.pad_w = 1;
  174. args.emplace_back(cur_param, TensorShape{1, pack_size, 3, 3},
  175. TensorShape{pack_size, pack_size, 3, 3},
  176. TensorShape{1, pack_size, 1, 1});
  177. //! no bias
  178. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  179. TensorShape{oc, ic, 3, 3}, TensorShape{});
  180. //! bias
  181. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  182. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  183. //! bias channel
  184. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  185. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  186. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  187. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  188. TensorShape{2, oc, ic, 3, 3},
  189. TensorShape{2, 2 * oc, i, i});
  190. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  191. TensorShape{2, oc, ic, 3, 3},
  192. TensorShape{1, 2 * oc, 1, 1});
  193. } } } }
  194. // clang-format on
  195. //! test for multi-thread OC parallel
  196. for (size_t i : {9, 63}) {
  197. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  198. cur_param.pad_h = cur_param.pad_w = 1;
  199. args.emplace_back(cur_param, TensorShape{1, 8, i, i},
  200. TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1});
  201. args.emplace_back(cur_param, TensorShape{2, 8, i, i},
  202. TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1});
  203. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  204. args.emplace_back(cur_param, TensorShape{2, 2 * 8, i, i},
  205. TensorShape{2, 128, 8, 3, 3},
  206. TensorShape{1, 2 * 128, 1, 1});
  207. }
  208. return args;
  209. }
  210. std::vector<TestArg> get_quantized_winograd_mk_packed_args(
  211. size_t pack_size, bool compute_float32) {
  212. std::vector<TestArg> args;
  213. param::ConvBias cur_param;
  214. using NLMode = param::ConvBias::NonlineMode;
  215. // clang-format off
  216. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  217. for (size_t ic : {pack_size, 2 * pack_size}) {
  218. for (size_t oc : {pack_size, 2 * pack_size}) {
  219. for (size_t i : {9, 63}) {
  220. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  221. cur_param.nonlineMode = nlmode;
  222. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  223. cur_param.pad_h = cur_param.pad_w = 1;
  224. if(compute_float32){
  225. cur_param.compute_mode = param::ConvBias::ComputeMode::FLOAT32;
  226. }
  227. args.emplace_back(cur_param, TensorShape{1, pack_size, 3, 3},
  228. TensorShape{pack_size, pack_size, 3, 3},
  229. TensorShape{1, pack_size, 1, 1});
  230. //! no bias
  231. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  232. TensorShape{oc, ic, 3, 3}, TensorShape{});
  233. //! bias
  234. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  235. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  236. //! bias channel
  237. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  238. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  239. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  240. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  241. TensorShape{2, oc, ic, 3, 3},
  242. TensorShape{2, 2 * oc, i, i});
  243. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  244. TensorShape{2, oc, ic, 3, 3},
  245. TensorShape{1, 2 * oc, 1, 1});
  246. } } } }
  247. // clang-format on
  248. //! test for multi-thread OC parallel
  249. for (size_t i : {9, 63}) {
  250. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  251. cur_param.pad_h = cur_param.pad_w = 1;
  252. args.emplace_back(cur_param, TensorShape{1, 8, i, i},
  253. TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1});
  254. args.emplace_back(cur_param, TensorShape{2, 8, i, i},
  255. TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1});
  256. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  257. args.emplace_back(cur_param, TensorShape{2, 2 * 8, i, i},
  258. TensorShape{2, 128, 8, 3, 3},
  259. TensorShape{1, 2 * 128, 1, 1});
  260. }
  261. return args;
  262. }
  263. std::vector<TestArg> get_quantized_args_with_nlmode(
  264. param::ConvBias::NonlineMode nlmode) {
  265. std::vector<TestArg> args;
  266. param::ConvBias cur_param;
  267. // clang-format off
  268. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION,
  269. param::ConvBias::Mode::CONVOLUTION}) {
  270. for (size_t ic : {1, 2, 3, 4, 5, 7}) {
  271. for (size_t oc : {1, 2, 3, 4, 5, 7}) {
  272. for (size_t i : {9, 63}) {
  273. cur_param.mode = mode;
  274. cur_param.nonlineMode = nlmode;
  275. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  276. cur_param.pad_h = cur_param.pad_w = 1;
  277. //! no bias
  278. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  279. TensorShape{oc, ic, 3, 3}, TensorShape{});
  280. //! bias
  281. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  282. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  283. //! bias channel
  284. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  285. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  286. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  287. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  288. TensorShape{2, oc, ic, 3, 3},
  289. TensorShape{2, 2 * oc, i, i});
  290. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  291. TensorShape{2, oc, ic, 3, 3},
  292. TensorShape{1, 2 * oc, 1, 1});
  293. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  294. cur_param.pad_h = cur_param.pad_w = 0;
  295. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  296. TensorShape{oc, ic, 1, 1}, TensorShape{});
  297. } } } }
  298. // clang-format on
  299. return args;
  300. }
  301. std::vector<TestArg> get_quantized_args() {
  302. using NLMode = param::ConvBias::NonlineMode;
  303. auto arg_p1 = get_quantized_args_with_nlmode(NLMode::IDENTITY),
  304. arg_p2 = get_quantized_args_with_nlmode(NLMode::RELU),
  305. arg_p3 = get_quantized_args_with_nlmode(NLMode::H_SWISH);
  306. std::vector<TestArg> args;
  307. args.insert(args.end(), arg_p1.begin(), arg_p1.end());
  308. args.insert(args.end(), arg_p2.begin(), arg_p2.end());
  309. args.insert(args.end(), arg_p3.begin(), arg_p3.end());
  310. return args;
  311. }
  312. std::vector<TestArg> get_int8_nchw4_args(size_t kernel_size) {
  313. std::vector<TestArg> args;
  314. param::ConvBias cur_param;
  315. using NLMode = param::ConvBias::NonlineMode;
  316. // clang-format off
  317. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  318. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  319. for (size_t b : {64, 16}) {
  320. for (size_t ic : {16, 32}) {
  321. for (size_t oc : {16, 32}) {
  322. for (size_t h : {8}) {
  323. for (size_t w : {8, 11}) {
  324. for (int p : {0, static_cast<int>(kernel_size / 2)}) {
  325. for (size_t s : {2, 1}) {
  326. if (kernel_size == 7) {
  327. b = std::min(b, 32_z);
  328. }
  329. size_t f = kernel_size;
  330. cur_param.mode = mode;
  331. cur_param.nonlineMode = nlmode;
  332. cur_param.format = param::ConvBias::Format::NCHW4;
  333. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  334. cur_param.pad_h = cur_param.pad_w = p;
  335. cur_param.stride_h = cur_param.stride_w = s;
  336. //! bias channel
  337. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  338. TensorShape{oc, ic / 4, f, f, 4},
  339. TensorShape{1, oc / 4, 1, 1, 4});
  340. } } } } } } } } }
  341. // clang-format on
  342. return args;
  343. }
  344. std::vector<TestArg> get_int8_nchw44_args(size_t kernel_size, size_t pack_size,
  345. bool compute_float32,
  346. bool group_mode) {
  347. std::vector<TestArg> args;
  348. param::ConvBias cur_param;
  349. megdnn_assert(pack_size > 0, "not support pack_size");
  350. megdnn_assert(kernel_size > 0, "not support kernel_size");
  351. using NLMode = param::ConvBias::NonlineMode;
  352. // clang-format off
  353. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  354. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  355. for (size_t b : {1,2}) {
  356. for (size_t ic : {8,16}) {
  357. for (size_t oc : {8,16}) {
  358. for (size_t h : {9,23}) {
  359. for (size_t w : {9,23}) {
  360. for (int p : {0, static_cast<int>(kernel_size / 2)}) {
  361. for (size_t s : {1}) {
  362. if (kernel_size == 7) {
  363. b = std::min(b, 32_z);
  364. }
  365. size_t f = kernel_size;
  366. cur_param.mode = mode;
  367. cur_param.nonlineMode = nlmode;
  368. if (pack_size == 4){
  369. cur_param.format = param::ConvBias::Format::NCHW44;
  370. } else if(pack_size == 8){
  371. cur_param.format = param::ConvBias::Format::NCHW88;
  372. }
  373. if(compute_float32){
  374. cur_param.compute_mode =
  375. param::ConvBias::ComputeMode::FLOAT32;
  376. }
  377. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  378. cur_param.pad_h = cur_param.pad_w = p;
  379. cur_param.stride_h = cur_param.stride_w = s;
  380. if (!group_mode) {
  381. //! no bias
  382. args.emplace_back(cur_param,
  383. TensorShape{b, ic / pack_size, h, w, pack_size},
  384. TensorShape{oc / pack_size, ic / pack_size, f, f,
  385. pack_size, pack_size},
  386. TensorShape{});
  387. //! bias channel
  388. args.emplace_back(cur_param,
  389. TensorShape{b, ic / pack_size, h, w, pack_size},
  390. TensorShape{oc / pack_size, ic / pack_size, f, f,
  391. pack_size, pack_size},
  392. TensorShape{1, oc / pack_size, 1, 1, pack_size});
  393. //! bias
  394. args.emplace_back(
  395. cur_param, TensorShape{b, ic / pack_size, h, w, pack_size},
  396. TensorShape{oc / pack_size, ic / pack_size, f, f, pack_size,
  397. pack_size},
  398. TensorShape{b, oc / pack_size, (h - f + 2 * p) / s + 1,
  399. (w - f + 2 * p) / s + 1, pack_size});
  400. } else {
  401. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  402. args.emplace_back(
  403. cur_param,
  404. TensorShape{2, 2 * ic / pack_size, h, w, pack_size},
  405. TensorShape{2, oc / pack_size, ic / pack_size, 3, 3,
  406. pack_size, pack_size},
  407. TensorShape{2, 2 * oc / pack_size, (h - f + 2 * p) / s + 1,
  408. (w - f + 2 * p) / s + 1, pack_size});
  409. args.emplace_back(
  410. cur_param,
  411. TensorShape{2, 2 * ic / pack_size, h, w, pack_size},
  412. TensorShape{2, oc / pack_size, ic / pack_size, f, f,
  413. pack_size, pack_size},
  414. TensorShape{1, 2 * oc / pack_size, 1, 1, pack_size});
  415. args.emplace_back(
  416. cur_param,
  417. TensorShape{2, 2 * ic / pack_size, h, w, pack_size},
  418. TensorShape{2, oc / pack_size, ic / pack_size, f, f,
  419. pack_size, pack_size},
  420. TensorShape{});
  421. }
  422. } } } } } } } } }
  423. // clang-format on
  424. return args;
  425. }
  426. std::vector<TestArg> get_int8_nchw4_args_check_bounds(size_t kernel_size) {
  427. std::vector<TestArg> args;
  428. param::ConvBias cur_param;
  429. using NLMode = param::ConvBias::NonlineMode;
  430. // clang-format off
  431. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  432. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  433. for (size_t b : {7, 8, 4, 1}) {
  434. for (size_t ic : {16, 32}) {
  435. for (size_t oc : {16, 8, 4}) {
  436. for (size_t h : {8}) {
  437. for (size_t w : {8, 11}) {
  438. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  439. for (size_t s : {1, 2}) {
  440. size_t f = kernel_size;
  441. cur_param.mode = mode;
  442. cur_param.nonlineMode = nlmode;
  443. cur_param.format = param::ConvBias::Format::NCHW4;
  444. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  445. cur_param.pad_h = cur_param.pad_w = p;
  446. cur_param.stride_h = cur_param.stride_w = s;
  447. //! bias channel
  448. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  449. TensorShape{oc, ic / 4, f, f, 4},
  450. TensorShape{1, oc / 4, 1, 1, 4});
  451. } } } } } } } } }
  452. // clang-format on
  453. return args;
  454. }
  455. std::vector<TestArg> get_int8_nchw4_args_small_batch(size_t kernel_size) {
  456. std::vector<TestArg> args;
  457. param::ConvBias cur_param;
  458. using NLMode = param::ConvBias::NonlineMode;
  459. // clang-format off
  460. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  461. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  462. for (size_t b : {12, 8, 4}) {
  463. for (size_t ic : {16, 32}) {
  464. for (size_t oc : {16, 8, 4}) {
  465. for (size_t h : {8}) {
  466. for (size_t w : {8, 9, 10, 11, 12, 13, 14, 15, 16}) {
  467. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  468. for (size_t s : {1, 2}) {
  469. size_t f = kernel_size;
  470. cur_param.mode = mode;
  471. cur_param.nonlineMode = nlmode;
  472. cur_param.format = param::ConvBias::Format::NCHW4;
  473. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  474. cur_param.pad_h = cur_param.pad_w = p;
  475. cur_param.stride_h = cur_param.stride_w = s;
  476. //! bias channel
  477. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  478. TensorShape{oc, ic / 4, f, f, 4},
  479. TensorShape{1, oc / 4, 1, 1, 4});
  480. } } } } } } } } }
  481. // clang-format on
  482. return args;
  483. }
  484. std::vector<TestArg> get_int8_nchw4_small_channel_args(size_t kernel_size) {
  485. std::vector<TestArg> args;
  486. param::ConvBias cur_param;
  487. using NLMode = param::ConvBias::NonlineMode;
  488. // clang-format off
  489. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  490. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  491. for (size_t b : {64, 16}) {
  492. for (size_t ic : {4, 12}) {
  493. for (size_t oc : {128, 32}) {
  494. for (size_t h : {8}) {
  495. for (size_t w : {8, 11}) {
  496. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  497. for (size_t s : {1, 2}) {
  498. size_t f = kernel_size;
  499. cur_param.mode = mode;
  500. cur_param.nonlineMode = nlmode;
  501. cur_param.format =
  502. param::ConvBias::Format::NCHW4;
  503. cur_param.sparse =
  504. param::ConvBias::Sparse::DENSE;
  505. cur_param.pad_h = cur_param.pad_w = p;
  506. cur_param.stride_h =
  507. cur_param.stride_w = s;
  508. //! bias channel
  509. args.emplace_back(
  510. cur_param,
  511. TensorShape{b, ic / 4, h, w, 4},
  512. TensorShape{oc, ic / 4, f, f,
  513. 4},
  514. TensorShape{1, oc / 4, 1, 1,
  515. 4});
  516. } } } } } } } } }
  517. // clang-format on
  518. return args;
  519. }
  520. std::vector<TestArg> get_int8_nchw4_small_channel_args_check_bounds(
  521. size_t kernel_size) {
  522. std::vector<TestArg> args;
  523. param::ConvBias cur_param;
  524. using NLMode = param::ConvBias::NonlineMode;
  525. // clang-format off
  526. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  527. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  528. for (size_t b : {8, 7, 4, 1}) {
  529. for (size_t ic : {4, 12}) {
  530. for (size_t oc : {16, 8, 12, 4}) {
  531. for (size_t h : {8}) {
  532. for (size_t w : {8, 11}) {
  533. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  534. for (size_t s : {1, 2}) {
  535. size_t f = kernel_size;
  536. cur_param.mode = mode;
  537. cur_param.nonlineMode = nlmode;
  538. cur_param.format = param::ConvBias::Format::NCHW4;
  539. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  540. cur_param.pad_h = cur_param.pad_w = p;
  541. cur_param.stride_h = cur_param.stride_w = s;
  542. //! bias channel
  543. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  544. TensorShape{oc, ic / 4, f, f, 4},
  545. TensorShape{1, oc / 4, 1, 1, 4});
  546. } } } } } } } } }
  547. // clang-format on
  548. return args;
  549. }
  550. std::vector<TestArg> get_int8_chwn4_args(size_t kernel_size) {
  551. auto args = get_int8_nchw4_args(kernel_size);
  552. for (auto& arg : args) {
  553. convert_arg_from_nchw4_to_chwn4(arg);
  554. }
  555. return args;
  556. }
  557. std::vector<TestArg> get_int8_chwn4_args_check_bounds(size_t kernel_size) {
  558. auto args = get_int8_nchw4_args_check_bounds(kernel_size);
  559. for (auto& arg : args) {
  560. convert_arg_from_nchw4_to_chwn4(arg);
  561. }
  562. return args;
  563. }
  564. std::vector<TestArg> get_int8_chwn4_small_channel_args(size_t kernel_size) {
  565. auto args = get_int8_nchw4_small_channel_args(kernel_size);
  566. for (auto& arg : args) {
  567. convert_arg_from_nchw4_to_chwn4(arg);
  568. }
  569. return args;
  570. }
  571. std::vector<TestArg> get_int8_chwn4_small_channel_args_check_bounds(
  572. size_t kernel_size) {
  573. auto args = get_int8_nchw4_small_channel_args_check_bounds(kernel_size);
  574. for (auto& arg : args) {
  575. convert_arg_from_nchw4_to_chwn4(arg);
  576. }
  577. return args;
  578. }
  579. std::vector<TestArg> get_int8_chwn4_args_small_batch(size_t kernel_size) {
  580. auto args = get_int8_nchw4_args_small_batch(kernel_size);
  581. for (auto& arg : args) {
  582. convert_arg_from_nchw4_to_chwn4(arg);
  583. }
  584. return args;
  585. }
  586. std::vector<TestArg> get_int8_nchw4_tensorcore_args(size_t kernel_size) {
  587. std::vector<TestArg> args;
  588. param::ConvBias cur_param;
  589. using NLMode = param::ConvBias::NonlineMode;
  590. // clang-format off
  591. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  592. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  593. size_t b = 64, oc = 128;
  594. for (size_t ic : {32, 64}) {
  595. for (size_t h : {8}) {
  596. for (size_t w : {11}) {
  597. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  598. for (size_t s : {1, 2}) {
  599. size_t f = kernel_size;
  600. cur_param.mode = mode;
  601. cur_param.nonlineMode = nlmode;
  602. cur_param.format = param::ConvBias::Format::NCHW4;
  603. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  604. cur_param.pad_h = cur_param.pad_w = p;
  605. cur_param.stride_h = cur_param.stride_w = s;
  606. //! bias channel
  607. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  608. TensorShape{oc, ic / 4, f, f, 4},
  609. TensorShape{1, oc / 4, 1, 1, 4});
  610. } } } } }
  611. } }
  612. // clang-format on
  613. return args;
  614. }
  615. std::vector<TestArg> get_int8_chwn4_tensorcore_args(size_t kernel_size) {
  616. auto args = get_int8_nchw4_tensorcore_args(kernel_size);
  617. for (auto& arg : args) {
  618. convert_arg_from_nchw4_to_chwn4(arg);
  619. }
  620. return args;
  621. }
  622. void check_conv_bias(DType src_dtype, DType filter_dtype, DType bias_dtype,
  623. DType dst_dtype, Handle* handle, const char* algo,
  624. param::ConvBias::Format format,
  625. const std::vector<TestArg>& args, bool fuse_z,
  626. bool stable_test) {
  627. megdnn_assert((src_dtype.enumv() == filter_dtype.enumv()) ||
  628. (src_dtype.enumv() == DTypeEnum::Quantized4Asymm &&
  629. filter_dtype.enumv() == DTypeEnum::QuantizedS4));
  630. Checker<ConvBiasForward> checker(handle, !stable_test);
  631. if (algo) {
  632. checker.set_before_exec_callback(
  633. ConvBiasAlgoChecker<ConvBiasForward>(algo));
  634. }
  635. std::unique_ptr<RNG> rng;
  636. std::unique_ptr<RNG> flt_rng;
  637. std::unique_ptr<RNG> bias_rng;
  638. std::unique_ptr<RNG> const_rng;
  639. std::unique_ptr<RNG> zero_rng;
  640. // TODO: check range of rng
  641. if (src_dtype.enumv() == DTypeEnum::QuantizedS8) {
  642. rng = std::make_unique<UniformIntRNG>(-3, 3);
  643. flt_rng = std::make_unique<UniformIntRNG>(-3, 3);
  644. const_rng = std::make_unique<UniformIntRNG>(1, 1);
  645. zero_rng = std::make_unique<UniformIntRNG>(0, 0);
  646. megdnn_assert(bias_dtype.enumv() == DTypeEnum::QuantizedS32);
  647. bias_rng = std::make_unique<UniformIntRNG>(-50, 50);
  648. checker.set_epsilon(1 + 1e-3)
  649. .set_max_avg_error(1e-1)
  650. .set_max_avg_biased_error(1e-3);
  651. } else if (src_dtype.enumv() == DTypeEnum::Quantized4Asymm) {
  652. rng = std::make_unique<UniformIntRNG>(0, 6);
  653. flt_rng = std::make_unique<UniformIntRNG>(-3, 3);
  654. const_rng = std::make_unique<UniformIntRNG>(1, 1);
  655. zero_rng = std::make_unique<UniformIntRNG>(0, 0);
  656. megdnn_assert(bias_dtype.enumv() == DTypeEnum::QuantizedS32);
  657. bias_rng = std::make_unique<UniformIntRNG>(-50, 50);
  658. checker.set_epsilon(1 + 1e-3)
  659. .set_max_avg_error(1e-1)
  660. .set_max_avg_biased_error(1e-3);
  661. } else if (src_dtype.enumv() == DTypeEnum::QuantizedS4) {
  662. rng = std::make_unique<UniformIntRNG>(-3, 3);
  663. flt_rng = std::make_unique<UniformIntRNG>(-3, 3);
  664. const_rng = std::make_unique<UniformIntRNG>(1, 1);
  665. zero_rng = std::make_unique<UniformIntRNG>(0, 0);
  666. megdnn_assert(bias_dtype.enumv() == DTypeEnum::QuantizedS32);
  667. bias_rng = std::make_unique<UniformIntRNG>(-50, 50);
  668. checker.set_epsilon(1 + 1e-3)
  669. .set_max_avg_error(1e-1)
  670. .set_max_avg_biased_error(1e-3);
  671. } else if (src_dtype.enumv() == DTypeEnum::Float16) {
  672. rng = std::make_unique<NormalRNG>(2.f);
  673. flt_rng = std::make_unique<NormalRNG>(2.f);
  674. megdnn_assert(bias_dtype.enumv() == DTypeEnum::Float16);
  675. bias_rng = std::make_unique<NormalRNG>(2.f);
  676. checker.set_epsilon(1e-2);
  677. } else if (src_dtype.enumv() == DTypeEnum::Float32) {
  678. rng = std::make_unique<NormalRNG>(2.f);
  679. flt_rng = std::make_unique<NormalRNG>(2.f);
  680. megdnn_assert(bias_dtype.enumv() == DTypeEnum::Float32);
  681. bias_rng = std::make_unique<NormalRNG>(2.f);
  682. }
  683. using Param = param::ConvBias;
  684. using Format = Param::Format;
  685. auto get_z_shape = [&fuse_z, &format](TestArg arg) -> TensorShape {
  686. TensorShape z{};
  687. if (fuse_z) {
  688. size_t hi, wi, sh, sw, ph, pw, fh, fw;
  689. z = arg.src;
  690. size_t spatial_idx = 2;
  691. if (format == Format::NCHW4) {
  692. hi = arg.src[2];
  693. wi = arg.src[3];
  694. fh = arg.filter[2];
  695. fw = arg.filter[3];
  696. z[1] = arg.filter[0] / 4;
  697. } else if (format == Format::NCHW32) {
  698. hi = arg.src[2];
  699. wi = arg.src[3];
  700. fh = arg.filter[2];
  701. fw = arg.filter[3];
  702. z[1] = arg.filter[0] / 32;
  703. } else if (format == Format::NCHW64) {
  704. hi = arg.src[2];
  705. wi = arg.src[3];
  706. fh = arg.filter[2];
  707. fw = arg.filter[3];
  708. z[1] = arg.filter[0] / 64;
  709. } else {
  710. megdnn_assert(format == Format::CHWN4);
  711. hi = arg.src[1];
  712. wi = arg.src[2];
  713. fh = arg.filter[1];
  714. fw = arg.filter[2];
  715. z[0] = arg.filter[3] / 4;
  716. spatial_idx = 1;
  717. }
  718. sh = arg.param.stride_h;
  719. sw = arg.param.stride_w;
  720. ph = arg.param.pad_h;
  721. pw = arg.param.pad_w;
  722. size_t ho = infer_conv_shape(hi, fh, sh, ph);
  723. size_t wo = infer_conv_shape(wi, fw, sw, pw);
  724. z[spatial_idx] = ho;
  725. z[spatial_idx + 1] = wo;
  726. }
  727. return z;
  728. };
  729. megdnn_assert(rng != nullptr && flt_rng != nullptr && bias_rng != nullptr);
  730. checker.set_rng(0, rng.get())
  731. .set_rng(1, flt_rng.get())
  732. .set_rng(2, bias_rng.get())
  733. .set_rng(3, rng.get());
  734. if (stable_test) {
  735. checker.set_stable_check(true);
  736. checker.set_no_naive_check(true);
  737. }
  738. if (args.empty()) {
  739. std::vector<TestArg> default_args;
  740. if (format == Format::NCHW4) {
  741. default_args = get_int8_nchw4_args(3);
  742. } else if (format == Format::CHWN4) {
  743. default_args = get_int8_chwn4_args(3);
  744. }
  745. for (auto&& arg : default_args) {
  746. auto z = get_z_shape(arg);
  747. checker.set_dtype(0, src_dtype)
  748. .set_dtype(1, filter_dtype)
  749. .set_dtype(2, bias_dtype)
  750. .set_dtype(3, dst_dtype)
  751. .set_dtype(4, dst_dtype)
  752. .set_param(arg.param)
  753. .execs({arg.src, arg.filter, arg.bias, z, {}});
  754. }
  755. } else {
  756. for (auto&& arg : args) {
  757. auto z = get_z_shape(arg);
  758. checker.set_dtype(0, src_dtype)
  759. .set_dtype(1, filter_dtype)
  760. .set_dtype(2, bias_dtype)
  761. .set_dtype(3, dst_dtype)
  762. .set_dtype(4, dst_dtype)
  763. .set_param(arg.param)
  764. .execs({arg.src, arg.filter, arg.bias, z, {}});
  765. }
  766. }
  767. }
  768. #if MEGDNN_WITH_BENCHMARK
  769. std::vector<conv_bias::TestArg> get_winograd_benchmark_args(size_t kernel,
  770. size_t pack_size) {
  771. std::vector<conv_bias::TestArg> args;
  772. auto pack = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  773. size_t p) {
  774. if (ic % pack_size != 0 || oc % pack_size != 0)
  775. return;
  776. if (w + 2 * p < kernel || h + 2 * p < kernel)
  777. return;
  778. param::ConvBias param;
  779. param.stride_h = 1;
  780. param.stride_w = 1;
  781. param.pad_h = p;
  782. param.pad_w = p;
  783. args.push_back(conv_bias::TestArg{param,
  784. TensorShape{1, ic, h, w},
  785. TensorShape{oc, ic, kernel, kernel},
  786. {1, oc, 1, 1}});
  787. };
  788. for (size_t ic : {8, 16, 32, 64}) {
  789. for (size_t oc : {8, 16, 32, 64}) {
  790. pack(oc, ic, 56, 56, kernel, kernel / 2);
  791. pack(oc, ic, 128, 128, kernel, kernel / 2);
  792. pack(oc, ic, 256, 256, kernel, kernel / 2);
  793. }
  794. }
  795. //! conv in vgg16
  796. pack(512, 512, 15, 15, kernel, kernel / 2);
  797. pack(512, 256, 15, 15, kernel, kernel / 2);
  798. pack(256, 256, 29, 29, kernel, kernel / 2);
  799. pack(256, 128, 29, 29, kernel, kernel / 2);
  800. pack(128, 128, 57, 57, kernel, kernel / 2);
  801. pack(128, 64, 57, 57, kernel, kernel / 2);
  802. pack(64, 64, 123, 123, kernel, kernel / 2);
  803. pack(64, 24, 123, 123, kernel, kernel / 2);
  804. pack(24, 24, 224, 224, kernel, kernel / 2);
  805. //! conv in resnet18
  806. pack(64, 64, 56, 56, kernel, kernel / 2);
  807. pack(128, 128, 28, 28, kernel, kernel / 2);
  808. pack(256, 256, 14, 14, kernel, kernel / 2);
  809. pack(512, 512, 7, 7, kernel, kernel / 2);
  810. return args;
  811. }
  812. void benchmark_winograd(const char* algo_name, Handle* handle, size_t kernel,
  813. size_t pack_size) {
  814. auto&& args = get_winograd_benchmark_args(kernel, pack_size);
  815. using namespace conv_bias;
  816. constexpr size_t RUN = 10;
  817. Benchmarker<Convolution> benchmark(handle);
  818. benchmark.set_display(false);
  819. benchmark.set_times(RUN);
  820. Benchmarker<ConvBias> benchmark_winograd(handle);
  821. benchmark_winograd.set_display(false);
  822. benchmark_winograd.set_times(RUN);
  823. for (auto&& arg : args) {
  824. TensorLayout dst_layout;
  825. auto opr = handle->create_operator<ConvBias>();
  826. opr->param() = arg.param;
  827. opr->deduce_layout({arg.src, dtype::Float32()},
  828. {arg.filter, dtype::Float32()},
  829. {arg.bias, dtype::Float32()}, {}, dst_layout);
  830. //! dst.nr_elems * IC * FH * FW * 2
  831. float computations = dst_layout.total_nr_elems() * arg.filter[1] *
  832. arg.filter[2] * arg.filter[3] * 2.0 /
  833. (1024 * 1024 * 1024) * 1e3;
  834. param::Convolution conv_param;
  835. conv_param.pad_h = arg.param.pad_h;
  836. conv_param.pad_w = arg.param.pad_w;
  837. conv_param.stride_h = arg.param.stride_h;
  838. conv_param.stride_w = arg.param.stride_w;
  839. auto used = benchmark.set_param(conv_param)
  840. .exec({arg.src, arg.filter, {}}) /
  841. RUN;
  842. benchmark_winograd.set_param(arg.param);
  843. auto used_winograd =
  844. algo_benchmark<ConvBias>(benchmark_winograd,
  845. {arg.src, arg.filter, {}, {}, {}},
  846. algo_name) /
  847. RUN;
  848. printf("%s %s: normal: %f ms %f Gflops winograd: %f ms %f GFlops "
  849. "speedup: "
  850. "%f\n",
  851. arg.src.to_string().c_str(), arg.filter.to_string().c_str(),
  852. used, computations / used, used_winograd,
  853. computations / used_winograd, used / used_winograd);
  854. }
  855. }
  856. #endif // MEGDNN_WITH_BENCHMARK
  857. std::vector<conv_bias::TestArg> get_conv_bias_args(
  858. std::vector<size_t> kernel, size_t stride, bool no_pad, bool no_bias,
  859. bool no_nonlinemode, bool quantized_nlmod, bool only_broadcast_bias) {
  860. using namespace conv_bias;
  861. using Param = param::ConvBias;
  862. using NLMode = param::ConvBias::NonlineMode;
  863. std::vector<TestArg> args;
  864. auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h,
  865. size_t kernel, size_t stride, NLMode nlmode) {
  866. Param param;
  867. param.stride_h = stride;
  868. param.stride_w = stride;
  869. if (!no_pad) {
  870. param.pad_h = kernel / 2;
  871. param.pad_w = kernel / 2;
  872. } else {
  873. param.pad_h = 0;
  874. param.pad_w = 0;
  875. }
  876. param.nonlineMode = nlmode;
  877. args.emplace_back(param, TensorShape{n, ic, h, w},
  878. TensorShape{oc, ic, kernel, kernel}, TensorShape{});
  879. if (!no_bias) {
  880. args.emplace_back(param, TensorShape{n, ic, h, w},
  881. TensorShape{oc, ic, kernel, kernel},
  882. TensorShape{1, oc, 1, 1});
  883. if (!only_broadcast_bias) {
  884. args.emplace_back(
  885. param, TensorShape{n, ic, h, w},
  886. TensorShape{oc, ic, kernel, kernel},
  887. TensorShape{
  888. n, oc,
  889. (h + 2 * param.pad_h - kernel) / stride + 1,
  890. (w + 2 * param.pad_h - kernel) / stride + 1});
  891. }
  892. }
  893. param.sparse = param::ConvBias::Sparse::GROUP;
  894. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  895. TensorShape{2, oc, ic, kernel, kernel},
  896. TensorShape{});
  897. if (!no_bias) {
  898. if (!only_broadcast_bias) {
  899. args.emplace_back(
  900. param, TensorShape{n, 2 * ic, h, w},
  901. TensorShape{2, oc, ic, kernel, kernel},
  902. TensorShape{
  903. n, 2 * oc,
  904. (h + param.pad_h * 2 - kernel) / stride + 1,
  905. (w + param.pad_w * 2 - kernel) / stride + 1});
  906. }
  907. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  908. TensorShape{2, oc, ic, kernel, kernel},
  909. TensorShape{1, 2 * oc, 1, 1});
  910. }
  911. };
  912. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  913. if (!no_nonlinemode) {
  914. nonlinemode.emplace_back(NLMode::RELU);
  915. nonlinemode.emplace_back(NLMode::H_SWISH);
  916. if (!quantized_nlmod) {
  917. nonlinemode.emplace_back(NLMode::SIGMOID);
  918. }
  919. }
  920. for (size_t n : {1, 2}) {
  921. for (auto nlmode : nonlinemode) {
  922. for (size_t ic : {1, 3, 7}) {
  923. for (size_t oc : {1, 3, 7}) {
  924. for (size_t size : {8, 16, 20}) {
  925. for (size_t kern : kernel) {
  926. pack(n, oc, ic, size, size, kern, stride, nlmode);
  927. }
  928. }
  929. }
  930. }
  931. }
  932. }
  933. return args;
  934. }
  935. std::vector<megdnn::test::conv_bias::TestArg> get_conv_bias_1x1_args(
  936. bool no_bias, bool no_nonlinemode, bool quantized_nlmod,
  937. bool only_broadcast_bias) {
  938. using namespace conv_bias;
  939. using Param = param::ConvBias;
  940. using NLMode = param::ConvBias::NonlineMode;
  941. using CONVMode = param::ConvBias::Mode;
  942. std::vector<TestArg> args;
  943. auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h,
  944. size_t stride, NLMode nlmode, CONVMode convmode) {
  945. Param param;
  946. param.stride_h = stride;
  947. param.stride_w = stride;
  948. param.pad_h = 0;
  949. param.pad_w = 0;
  950. param.mode = convmode;
  951. param.nonlineMode = nlmode;
  952. args.emplace_back(param, TensorShape{n, ic, h, w},
  953. TensorShape{oc, ic, 1, 1}, TensorShape{});
  954. if (!no_bias) {
  955. args.emplace_back(param, TensorShape{n, ic, h, w},
  956. TensorShape{oc, ic, 1, 1},
  957. TensorShape{1, oc, 1, 1});
  958. if (!only_broadcast_bias) {
  959. args.emplace_back(param, TensorShape{n, ic, h, w},
  960. TensorShape{oc, ic, 1, 1},
  961. TensorShape{n, oc, (h - 1) / stride + 1,
  962. (w - 1) / stride + 1});
  963. }
  964. }
  965. param.sparse = param::ConvBias::Sparse::GROUP;
  966. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  967. TensorShape{2, oc, ic, 1, 1}, TensorShape{});
  968. if (!no_bias) {
  969. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  970. TensorShape{2, oc, ic, 1, 1},
  971. TensorShape{1, 2 * oc, 1, 1});
  972. if (!only_broadcast_bias) {
  973. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  974. TensorShape{2, oc, ic, 1, 1},
  975. TensorShape{n, 2 * oc, (h - 1) / stride + 1,
  976. (w - 1) / stride + 1});
  977. }
  978. }
  979. };
  980. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  981. if (!no_nonlinemode) {
  982. nonlinemode.emplace_back(NLMode::RELU);
  983. nonlinemode.emplace_back(NLMode::H_SWISH);
  984. if (!quantized_nlmod) {
  985. nonlinemode.emplace_back(NLMode::SIGMOID);
  986. }
  987. }
  988. std::vector<CONVMode> convmodes{param::ConvBias::Mode::CONVOLUTION,
  989. param::ConvBias::Mode::CROSS_CORRELATION};
  990. for (size_t n : {1, 2})
  991. for (size_t oc : {1, 9, 33})
  992. for (size_t ic : {1, 16, 64})
  993. for (size_t size : {1, 7, 14, 28})
  994. for (auto nlmode : nonlinemode)
  995. for (auto convmode : convmodes) {
  996. pack(n, oc, ic, size, size, 1, nlmode, convmode);
  997. }
  998. return args;
  999. }
  1000. void check_conv_bias(std::vector<conv_bias::TestArg> args, Handle* handle,
  1001. const char* algo_name) {
  1002. using namespace conv_bias;
  1003. Checker<ConvBias> checker(handle);
  1004. checker.set_before_exec_callback(
  1005. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1006. for (auto&& arg : args) {
  1007. checker.set_param(arg.param).execs(
  1008. {arg.src, arg.filter, arg.bias, {}, {}});
  1009. }
  1010. }
  1011. void checker_conv_bias_int8x8x16(std::vector<conv_bias::TestArg> args,
  1012. Handle* handle, const char* algo_name) {
  1013. using namespace conv_bias;
  1014. Checker<ConvBias> checker(handle);
  1015. checker.set_before_exec_callback(
  1016. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1017. checker.set_dtype(0, dtype::Int8());
  1018. checker.set_dtype(1, dtype::Int8());
  1019. checker.set_dtype(2, dtype::Int16());
  1020. checker.set_dtype(4, dtype::Int16());
  1021. for (auto&& arg : args) {
  1022. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  1023. }
  1024. }
  1025. void check_conv_bias_preprocess(std::vector<conv_bias::TestArg> args,
  1026. Handle* handle, RNG* rng, float epsilon,
  1027. DType type0, DType type1, DType type2,
  1028. DType type3, const char* algo_name) {
  1029. using namespace conv_bias;
  1030. Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
  1031. handle);
  1032. checker.set_dtype(0, type0);
  1033. checker.set_dtype(1, type1);
  1034. checker.set_dtype(2, type2);
  1035. checker.set_dtype(4, type3);
  1036. checker.set_epsilon(epsilon);
  1037. if (NULL != rng) {
  1038. checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng).set_rng(3, rng);
  1039. }
  1040. checker.set_before_exec_callback(
  1041. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1042. for (auto&& arg : args) {
  1043. checker.set_param(arg.param).execs(
  1044. {arg.src, arg.filter, arg.bias, {}, {}});
  1045. }
  1046. }
  1047. void checker_conv_bias_common(std::vector<conv_bias::TestArg> args, Handle* handle,
  1048. RNG* rng, float epsilon, DType type0, DType type1,
  1049. DType type2, DType type3, const char* algo_name) {
  1050. using namespace conv_bias;
  1051. Checker<ConvBias> checker(handle);
  1052. checker.set_before_exec_callback(
  1053. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1054. checker.set_dtype(0, type0);
  1055. checker.set_dtype(1, type1);
  1056. checker.set_dtype(2, type2);
  1057. checker.set_dtype(4, type3);
  1058. checker.set_epsilon(epsilon);
  1059. if (NULL != rng) {
  1060. checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng).set_rng(3, rng);
  1061. }
  1062. for (auto&& arg : args) {
  1063. checker.set_param(arg.param).execs(
  1064. {arg.src, arg.filter, arg.bias, {}, {}});
  1065. }
  1066. }
  1067. void checker_conv_bias_mul_int8x8x32(std::vector<conv_bias::TestArg> args,
  1068. Handle* handle, const char* algo_name) {
  1069. using namespace conv_bias;
  1070. float epsilon = 0.001;
  1071. #if MEGDNN_ARMV7
  1072. epsilon = 1.0;
  1073. #endif
  1074. Checker<ConvBias> checker(handle);
  1075. checker.set_before_exec_callback(
  1076. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1077. checker.set_dtype(0, dtype::Int8());
  1078. checker.set_dtype(1, dtype::Int8());
  1079. checker.set_dtype(2, dtype::Int32());
  1080. checker.set_dtype(4, dtype::Int32());
  1081. checker.set_epsilon(epsilon);
  1082. for (auto&& arg : args) {
  1083. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  1084. }
  1085. UniformIntRNG rng{-50, 50};
  1086. for (auto&& arg : args) {
  1087. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  1088. .set_dtype(1, dtype::QuantizedS8(2.5f))
  1089. .set_dtype(2, dtype::QuantizedS32(6.25f))
  1090. .set_dtype(4, dtype::QuantizedS32(6.25f))
  1091. .set_rng(0, &rng)
  1092. .set_rng(1, &rng)
  1093. .set_rng(2, &rng)
  1094. .set_param(arg.param)
  1095. .set_epsilon(epsilon)
  1096. .execs({arg.src, arg.filter, {}, {}, {}});
  1097. }
  1098. }
  1099. void checker_conv_bias_int8x8x32_preprocess(
  1100. std::vector<conv_bias::TestArg> args, Handle* handle,
  1101. const char* algo_name) {
  1102. using namespace conv_bias;
  1103. Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
  1104. handle);
  1105. checker.set_before_exec_callback(
  1106. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1107. checker.set_dtype(0, dtype::Int8());
  1108. checker.set_dtype(1, dtype::Int8());
  1109. checker.set_dtype(2, dtype::Int32());
  1110. checker.set_dtype(4, dtype::Int32());
  1111. for (auto&& arg : args) {
  1112. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  1113. }
  1114. UniformIntRNG rng{-50, 50};
  1115. for (auto&& arg : args) {
  1116. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  1117. .set_dtype(1, dtype::QuantizedS8(2.5f))
  1118. .set_dtype(2, dtype::QuantizedS32(6.25f))
  1119. .set_dtype(4, dtype::QuantizedS32(6.25f))
  1120. .set_rng(0, &rng)
  1121. .set_rng(1, &rng)
  1122. .set_rng(2, &rng)
  1123. .set_param(arg.param)
  1124. .execs({arg.src, arg.filter, {}, {}, {}});
  1125. }
  1126. }
  1127. std::vector<conv_bias::TestArg> get_nchw44_conv_bias_args(
  1128. std::vector<size_t> kernel_vec,
  1129. std::vector<param::ConvBias::NonlineMode> nlmode_vec,
  1130. std::vector<megdnn::BiasMode> biasmode_vec, size_t stride, bool no_pad,
  1131. bool is_input_nchw, bool is_nchw44_dot) {
  1132. using namespace conv_bias;
  1133. using NLMode = param::ConvBias::NonlineMode;
  1134. std::vector<TestArg> args;
  1135. MEGDNN_MARK_USED_VAR(no_pad);
  1136. auto pack = [&](size_t n, size_t oc, size_t ic, size_t h, size_t w,
  1137. size_t kernel, size_t stride, size_t group, NLMode nlmode,
  1138. megdnn::BiasMode bias_mode, int any_pad = -1) {
  1139. constexpr int pack_c = 4;
  1140. const size_t pad = any_pad >= 0 ? any_pad : kernel / 2;
  1141. auto oc_per_group = oc / group;
  1142. auto ic_per_group = ic / group;
  1143. bool ok_group = (oc % group == 0 && ic % group == 0) &&
  1144. oc_per_group % pack_c == 0 && oc_per_group > 0 &&
  1145. ic_per_group > 0;
  1146. bool nchw_disable = group > 1 || ic_per_group >= 4;
  1147. bool nchw44_disable = ic_per_group % pack_c != 0;
  1148. bool invalid_pad = (w + 2 * pad < kernel) || (h + 2 * pad < kernel);
  1149. if (!(ok_group) || invalid_pad) {
  1150. return;
  1151. }
  1152. if ((is_input_nchw && nchw_disable) ||
  1153. (!is_input_nchw && nchw44_disable)) {
  1154. return;
  1155. }
  1156. size_t kernel_h = kernel;
  1157. size_t kernel_w = kernel;
  1158. param::ConvBias param;
  1159. if (!is_nchw44_dot) {
  1160. param.format = param::ConvBias::Format::NCHW44;
  1161. } else {
  1162. param.format = param::ConvBias::Format::NCHW44_DOT;
  1163. }
  1164. param.stride_h = stride;
  1165. param.stride_w = stride;
  1166. param.pad_h = pad;
  1167. param.pad_w = pad;
  1168. param.nonlineMode = nlmode;
  1169. auto src_tensor_shape = TensorShape{n, ic / pack_c, h, w, pack_c};
  1170. auto weight_tensor_shape = TensorShape{
  1171. oc / pack_c, ic / pack_c, kernel_h, kernel_w, pack_c, pack_c};
  1172. auto bias_tensor_shape = TensorShape{};
  1173. if (bias_mode == megdnn::BiasMode::BROADCAST_CHANNEL_BIAS) {
  1174. bias_tensor_shape = {1, oc / pack_c, 1, 1, pack_c};
  1175. } else if (bias_mode == megdnn::BiasMode::BIAS) {
  1176. bias_tensor_shape = {n, oc / pack_c,
  1177. (h + 2 * pad - kernel) / stride + 1,
  1178. (w + 2 * pad - kernel) / stride + 1, pack_c};
  1179. }
  1180. if (group == 1) {
  1181. param.sparse = param::ConvBias::Sparse::DENSE;
  1182. } else if (group > 1 && ic / group == 1 && oc / group == 1) {
  1183. megdnn_assert(0, "not support channel wise");
  1184. param.sparse = param::ConvBias::Sparse::GROUP;
  1185. weight_tensor_shape = TensorShape{group / pack_c, 1, 1,
  1186. kernel_h, kernel_w, pack_c};
  1187. } else if (group > 1 && oc_per_group % pack_c == 0 && oc / group > 0 &&
  1188. ic_per_group % pack_c == 0 && ic / group > 0) {
  1189. param.sparse = param::ConvBias::Sparse::GROUP;
  1190. weight_tensor_shape = TensorShape{group,
  1191. oc_per_group / pack_c,
  1192. ic_per_group / pack_c,
  1193. kernel_h,
  1194. kernel_w,
  1195. pack_c,
  1196. pack_c};
  1197. }
  1198. if (is_input_nchw) {
  1199. src_tensor_shape = TensorShape{n, ic, h, w};
  1200. weight_tensor_shape =
  1201. TensorShape{oc / pack_c, kernel_h, kernel_w, ic, pack_c};
  1202. }
  1203. args.emplace_back(param, src_tensor_shape, weight_tensor_shape,
  1204. bias_tensor_shape);
  1205. };
  1206. for (auto bias : biasmode_vec)
  1207. for (auto nlmode : nlmode_vec)
  1208. for (size_t n : {1, 2})
  1209. for (size_t kernel : kernel_vec)
  1210. for (size_t oc : {4, 12})
  1211. for (size_t ic : {1, 3, 4, 12})
  1212. for (size_t h : {1, 3, 12})
  1213. for (size_t w : {1, 16, 23}) {
  1214. for (size_t group = 1;
  1215. group <=
  1216. std::min(std::min(oc, ic), 4_z);
  1217. ++group) {
  1218. if (kernel != 1 && (h == 1 || w == 1)) {
  1219. continue;
  1220. }
  1221. pack(n, oc, ic, h, w, kernel, stride,
  1222. group, nlmode, bias);
  1223. }
  1224. }
  1225. return args;
  1226. }
  1227. } // namespace conv_bias
  1228. } // namespace test
  1229. } // namespace megdnn
  1230. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台