You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

conv_bias.cpp 45 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171
  1. /**
  2. * \file dnn/test/common/conv_bias.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/common/conv_bias.h"
  12. #include "megdnn/opr_param_defs.h"
  13. #include "src/common/utils.h"
  14. #include "test/common/benchmarker.h"
  15. namespace megdnn {
  16. namespace test {
  17. namespace conv_bias {
  18. namespace {
  19. void convert_arg_from_nchw4_to_chwn4(TestArg& arg) {
  20. arg.param.format = param::ConvBias::Format::CHWN4;
  21. arg.src = TensorShape{arg.src[1], arg.src[2], arg.src[3], arg.src[0], 4};
  22. arg.filter = TensorShape{arg.filter[1], arg.filter[2], arg.filter[3],
  23. arg.filter[0], 4};
  24. arg.bias =
  25. TensorShape{arg.bias[1], arg.bias[2], arg.bias[3], arg.bias[0], 4};
  26. }
  27. } // namespace
  28. std::vector<TestArg> get_args() {
  29. std::vector<TestArg> args;
  30. param::ConvBias cur_param;
  31. using NLMode = param::ConvBias::NonlineMode;
  32. // clang-format off
  33. for (auto nlmode :
  34. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  35. for (size_t i : {9, 63}) {
  36. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  37. cur_param.nonlineMode = nlmode;
  38. // fallback case
  39. args.emplace_back(cur_param, TensorShape{10, 1, i, i},
  40. TensorShape{1, 1, 8, 8}, TensorShape{1, 1, 1, 1});
  41. args.emplace_back(cur_param, TensorShape{10, 4, i, i},
  42. TensorShape{3, 4, 4, 4}, TensorShape{1, 3, 1, 1});
  43. cur_param.mode = param::ConvBias::Mode::CONVOLUTION;
  44. args.emplace_back(cur_param, TensorShape{10, 4, i, i},
  45. TensorShape{1, 4, 3, 3}, TensorShape{1, 1, 1, 1});
  46. args.emplace_back(cur_param, TensorShape{1, 4, i, i},
  47. TensorShape{5, 4, 3, 3}, TensorShape{1, 5, 1, 1});
  48. } }
  49. // clang-format on
  50. return args;
  51. }
  52. std::vector<TestArg> get_chanwise_args() {
  53. std::vector<TestArg> args;
  54. param::ConvBias cur_param;
  55. using NLMode = param::ConvBias::NonlineMode;
  56. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  57. cur_param.sparse = ConvBias::Param::Sparse::GROUP;
  58. for (auto nlmode :
  59. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  60. cur_param.nonlineMode = nlmode;
  61. // simple case
  62. for (uint32_t s : {1, 2})
  63. for (uint32_t p : {0, 1, 2, 3})
  64. for (size_t f : {2, 3, 5, 7})
  65. for (size_t ocpg : {1, 3}) {
  66. cur_param.pad_h = cur_param.pad_w = p;
  67. cur_param.stride_h = cur_param.stride_w = s;
  68. args.emplace_back(cur_param, TensorShape{2, 3, 16, 16},
  69. TensorShape{3, ocpg, 1, f, f},
  70. TensorShape{1, 3 * ocpg, 1, 1});
  71. }
  72. args.emplace_back(cur_param, TensorShape{32, 12, 20, 10},
  73. TensorShape{12, 2, 1, 4, 5},
  74. TensorShape{1, 24, 1, 1});
  75. // padding larger than kern
  76. args.emplace_back(cur_param, TensorShape{32, 12, 20, 10},
  77. TensorShape{12, 2, 1, 4, 5},
  78. TensorShape{1, 24, 1, 1});
  79. }
  80. return args;
  81. }
  82. std::vector<TestArg> get_args_1x1() {
  83. std::vector<TestArg> args;
  84. param::ConvBias cur_param;
  85. using NLMode = param::ConvBias::NonlineMode;
  86. for (auto nlmode :
  87. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  88. cur_param.nonlineMode = nlmode;
  89. for (size_t i : {16, 19}) {
  90. cur_param.mode = param::ConvBias::Mode::CONVOLUTION;
  91. args.emplace_back(cur_param, TensorShape{2, 20, i, i + 1},
  92. TensorShape{30, 20, 1, 1},
  93. TensorShape{1, 30, 1, 1});
  94. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  95. args.emplace_back(cur_param, TensorShape{2, 20, i, i + 1},
  96. TensorShape{30, 20, 1, 1},
  97. TensorShape{1, 30, 1, 1});
  98. }
  99. }
  100. return args;
  101. }
  102. std::vector<TestArg> get_winograd_args(size_t kernel_size) {
  103. std::vector<TestArg> args;
  104. param::ConvBias cur_param;
  105. using NLMode = param::ConvBias::NonlineMode;
  106. // clang-format off
  107. for (auto nlmode :
  108. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  109. for (size_t ic : {1, 3, 4, 7}) {
  110. for (size_t oc : {1, 3, 4, 7}) {
  111. for (size_t i : {9, 63}) {
  112. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  113. cur_param.nonlineMode = nlmode;
  114. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  115. cur_param.pad_h = cur_param.pad_w = 0;
  116. //! no bias
  117. args.emplace_back(cur_param, TensorShape{1, ic, i, i},
  118. TensorShape{oc, ic, kernel_size, kernel_size},
  119. TensorShape{});
  120. //! bias
  121. args.emplace_back(
  122. cur_param, TensorShape{2, ic, i, i},
  123. TensorShape{oc, ic, kernel_size, kernel_size},
  124. TensorShape{2, oc, (i + cur_param.pad_h * 2 - kernel_size) + 1,
  125. (i + cur_param.pad_w * 2 - kernel_size) + 1});
  126. //! bias channel
  127. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  128. TensorShape{oc, ic, kernel_size, kernel_size},
  129. TensorShape{1, oc, 1, 1});
  130. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  131. args.emplace_back(
  132. cur_param, TensorShape{2, 2 * ic, i, i},
  133. TensorShape{2, oc, ic, kernel_size, kernel_size},
  134. TensorShape{2, 2 * oc,
  135. (i + cur_param.pad_h * 2 - kernel_size) + 1,
  136. (i + cur_param.pad_w * 2 - kernel_size) + 1});
  137. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  138. TensorShape{2, oc, ic, kernel_size, kernel_size},
  139. TensorShape{1, 2 * oc, 1, 1});
  140. } } } }
  141. // clang-format on
  142. //! test for multi-thread OC parallel
  143. for (size_t i : {9, 63}) {
  144. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  145. cur_param.pad_h = cur_param.pad_w = 1;
  146. args.emplace_back(cur_param, TensorShape{1, 8, i, i},
  147. TensorShape{128, 8, kernel_size, kernel_size},
  148. TensorShape{1, 128, 1, 1});
  149. args.emplace_back(cur_param, TensorShape{2, 8, i, i},
  150. TensorShape{128, 8, kernel_size, kernel_size},
  151. TensorShape{1, 128, 1, 1});
  152. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  153. args.emplace_back(cur_param, TensorShape{2, 2 * 8, i, i},
  154. TensorShape{2, 128, 8, kernel_size, kernel_size},
  155. TensorShape{1, 2 * 128, 1, 1});
  156. }
  157. return args;
  158. }
  159. std::vector<TestArg> get_winograd_mk_packed_args(size_t pack_size) {
  160. std::vector<TestArg> args;
  161. param::ConvBias cur_param;
  162. using NLMode = param::ConvBias::NonlineMode;
  163. // clang-format off
  164. for (auto nlmode :
  165. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  166. for (size_t ic : {pack_size, 2 * pack_size}) {
  167. for (size_t oc : {pack_size, 2 * pack_size}) {
  168. for (size_t i : {9, 63}) {
  169. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  170. cur_param.nonlineMode = nlmode;
  171. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  172. cur_param.pad_h = cur_param.pad_w = 1;
  173. args.emplace_back(cur_param, TensorShape{1, pack_size, 3, 3},
  174. TensorShape{pack_size, pack_size, 3, 3},
  175. TensorShape{1, pack_size, 1, 1});
  176. //! no bias
  177. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  178. TensorShape{oc, ic, 3, 3}, TensorShape{});
  179. //! bias
  180. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  181. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  182. //! bias channel
  183. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  184. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  185. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  186. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  187. TensorShape{2, oc, ic, 3, 3},
  188. TensorShape{2, 2 * oc, i, i});
  189. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  190. TensorShape{2, oc, ic, 3, 3},
  191. TensorShape{1, 2 * oc, 1, 1});
  192. } } } }
  193. // clang-format on
  194. //! test for multi-thread OC parallel
  195. for (size_t i : {9, 63}) {
  196. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  197. cur_param.pad_h = cur_param.pad_w = 1;
  198. args.emplace_back(cur_param, TensorShape{1, 8, i, i},
  199. TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1});
  200. args.emplace_back(cur_param, TensorShape{2, 8, i, i},
  201. TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1});
  202. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  203. args.emplace_back(cur_param, TensorShape{2, 2 * 8, i, i},
  204. TensorShape{2, 128, 8, 3, 3},
  205. TensorShape{1, 2 * 128, 1, 1});
  206. }
  207. return args;
  208. }
  209. std::vector<TestArg> get_quantized_winograd_mk_packed_args(
  210. size_t pack_size, bool compute_float32) {
  211. std::vector<TestArg> args;
  212. param::ConvBias cur_param;
  213. using NLMode = param::ConvBias::NonlineMode;
  214. // clang-format off
  215. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  216. for (size_t ic : {pack_size, 2 * pack_size}) {
  217. for (size_t oc : {pack_size, 2 * pack_size}) {
  218. for (size_t i : {9, 63}) {
  219. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  220. cur_param.nonlineMode = nlmode;
  221. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  222. cur_param.pad_h = cur_param.pad_w = 1;
  223. if(compute_float32){
  224. cur_param.compute_mode = param::ConvBias::ComputeMode::FLOAT32;
  225. }
  226. args.emplace_back(cur_param, TensorShape{1, pack_size, 3, 3},
  227. TensorShape{pack_size, pack_size, 3, 3},
  228. TensorShape{1, pack_size, 1, 1});
  229. //! no bias
  230. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  231. TensorShape{oc, ic, 3, 3}, TensorShape{});
  232. //! bias
  233. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  234. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  235. //! bias channel
  236. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  237. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  238. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  239. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  240. TensorShape{2, oc, ic, 3, 3},
  241. TensorShape{2, 2 * oc, i, i});
  242. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  243. TensorShape{2, oc, ic, 3, 3},
  244. TensorShape{1, 2 * oc, 1, 1});
  245. } } } }
  246. // clang-format on
  247. //! test for multi-thread OC parallel
  248. for (size_t i : {9, 63}) {
  249. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  250. cur_param.pad_h = cur_param.pad_w = 1;
  251. args.emplace_back(cur_param, TensorShape{1, 8, i, i},
  252. TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1});
  253. args.emplace_back(cur_param, TensorShape{2, 8, i, i},
  254. TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1});
  255. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  256. args.emplace_back(cur_param, TensorShape{2, 2 * 8, i, i},
  257. TensorShape{2, 128, 8, 3, 3},
  258. TensorShape{1, 2 * 128, 1, 1});
  259. }
  260. return args;
  261. }
  262. std::vector<TestArg> get_quantized_args_with_nlmode(
  263. param::ConvBias::NonlineMode nlmode) {
  264. std::vector<TestArg> args;
  265. param::ConvBias cur_param;
  266. // clang-format off
  267. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION,
  268. param::ConvBias::Mode::CONVOLUTION}) {
  269. for (size_t ic : {1, 2, 3, 4, 5, 7}) {
  270. for (size_t oc : {1, 2, 3, 4, 5, 7}) {
  271. for (size_t i : {9, 63}) {
  272. cur_param.mode = mode;
  273. cur_param.nonlineMode = nlmode;
  274. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  275. cur_param.pad_h = cur_param.pad_w = 1;
  276. //! no bias
  277. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  278. TensorShape{oc, ic, 3, 3}, TensorShape{});
  279. //! bias
  280. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  281. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  282. //! bias channel
  283. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  284. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  285. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  286. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  287. TensorShape{2, oc, ic, 3, 3},
  288. TensorShape{2, 2 * oc, i, i});
  289. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  290. TensorShape{2, oc, ic, 3, 3},
  291. TensorShape{1, 2 * oc, 1, 1});
  292. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  293. cur_param.pad_h = cur_param.pad_w = 0;
  294. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  295. TensorShape{oc, ic, 1, 1}, TensorShape{});
  296. } } } }
  297. // clang-format on
  298. return args;
  299. }
  300. std::vector<TestArg> get_quantized_args() {
  301. using NLMode = param::ConvBias::NonlineMode;
  302. auto arg_p1 = get_quantized_args_with_nlmode(NLMode::IDENTITY),
  303. arg_p2 = get_quantized_args_with_nlmode(NLMode::RELU),
  304. arg_p3 = get_quantized_args_with_nlmode(NLMode::H_SWISH);
  305. std::vector<TestArg> args;
  306. args.insert(args.end(), arg_p1.begin(), arg_p1.end());
  307. args.insert(args.end(), arg_p2.begin(), arg_p2.end());
  308. args.insert(args.end(), arg_p3.begin(), arg_p3.end());
  309. return args;
  310. }
  311. std::vector<TestArg> get_int8_nchw4_args(size_t kernel_size) {
  312. std::vector<TestArg> args;
  313. param::ConvBias cur_param;
  314. using NLMode = param::ConvBias::NonlineMode;
  315. // clang-format off
  316. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  317. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  318. for (size_t b : {64, 16}) {
  319. for (size_t ic : {16, 32}) {
  320. for (size_t oc : {16, 32}) {
  321. for (size_t h : {8}) {
  322. for (size_t w : {8, 11}) {
  323. for (int p : {0, static_cast<int>(kernel_size / 2)}) {
  324. for (size_t s : {2, 1}) {
  325. if (kernel_size == 7) {
  326. b = std::min(b, 32_z);
  327. }
  328. size_t f = kernel_size;
  329. cur_param.mode = mode;
  330. cur_param.nonlineMode = nlmode;
  331. cur_param.format = param::ConvBias::Format::NCHW4;
  332. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  333. cur_param.pad_h = cur_param.pad_w = p;
  334. cur_param.stride_h = cur_param.stride_w = s;
  335. //! bias channel
  336. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  337. TensorShape{oc, ic / 4, f, f, 4},
  338. TensorShape{1, oc / 4, 1, 1, 4});
  339. } } } } } } } } }
  340. // clang-format on
  341. return args;
  342. }
  343. std::vector<TestArg> get_int8_nchw44_args(size_t kernel_size, size_t pack_size,
  344. bool compute_float32,
  345. bool group_mode) {
  346. std::vector<TestArg> args;
  347. param::ConvBias cur_param;
  348. megdnn_assert(pack_size > 0, "not support pack_size");
  349. megdnn_assert(kernel_size > 0, "not support kernel_size");
  350. using NLMode = param::ConvBias::NonlineMode;
  351. //// clang-format off
  352. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  353. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  354. for (size_t b : {1,2}) {
  355. for (size_t ic : {8,16}) {
  356. for (size_t oc : {8,16}) {
  357. for (size_t h : {9,23}) {
  358. for (size_t w : {9,23}) {
  359. for (int p : {0, static_cast<int>(kernel_size / 2)}) {
  360. for (size_t s : {1}) {
  361. if (kernel_size == 7) {
  362. b = std::min(b, 32_z);
  363. }
  364. size_t f = kernel_size;
  365. cur_param.mode = mode;
  366. cur_param.nonlineMode = nlmode;
  367. if (pack_size == 4){
  368. cur_param.format = param::ConvBias::Format::NCHW44;
  369. } else if(pack_size == 8){
  370. cur_param.format = param::ConvBias::Format::NCHW88;
  371. }
  372. if(compute_float32){
  373. cur_param.compute_mode =
  374. param::ConvBias::ComputeMode::FLOAT32;
  375. }
  376. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  377. cur_param.pad_h = cur_param.pad_w = p;
  378. cur_param.stride_h = cur_param.stride_w = s;
  379. if (!group_mode) {
  380. //! no bias
  381. args.emplace_back(cur_param,
  382. TensorShape{b, ic / pack_size, h, w, pack_size},
  383. TensorShape{oc / pack_size, ic / pack_size, f, f,
  384. pack_size, pack_size},
  385. TensorShape{});
  386. //! bias channel
  387. args.emplace_back(cur_param,
  388. TensorShape{b, ic / pack_size, h, w, pack_size},
  389. TensorShape{oc / pack_size, ic / pack_size, f, f,
  390. pack_size, pack_size},
  391. TensorShape{1, oc / pack_size, 1, 1, pack_size});
  392. //! bias
  393. args.emplace_back(
  394. cur_param, TensorShape{b, ic / pack_size, h, w, pack_size},
  395. TensorShape{oc / pack_size, ic / pack_size, f, f, pack_size,
  396. pack_size},
  397. TensorShape{b, oc / pack_size, (h - f + 2 * p) / s + 1,
  398. (w - f + 2 * p) / s + 1, pack_size});
  399. } else {
  400. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  401. args.emplace_back(
  402. cur_param,
  403. TensorShape{2, 2 * ic / pack_size, h, w, pack_size},
  404. TensorShape{2, oc / pack_size, ic / pack_size, 3, 3,
  405. pack_size, pack_size},
  406. TensorShape{2, 2 * oc / pack_size, (h - f + 2 * p) / s + 1,
  407. (w - f + 2 * p) / s + 1, pack_size});
  408. args.emplace_back(
  409. cur_param,
  410. TensorShape{2, 2 * ic / pack_size, h, w, pack_size},
  411. TensorShape{2, oc / pack_size, ic / pack_size, f, f,
  412. pack_size, pack_size},
  413. TensorShape{1, 2 * oc / pack_size, 1, 1, pack_size});
  414. args.emplace_back(
  415. cur_param,
  416. TensorShape{2, 2 * ic / pack_size, h, w, pack_size},
  417. TensorShape{2, oc / pack_size, ic / pack_size, f, f,
  418. pack_size, pack_size},
  419. TensorShape{});
  420. }
  421. } } } } } } } } }
  422. // clang-format on
  423. return args;
  424. }
  425. std::vector<TestArg> get_int8_nchw4_args_check_bounds(size_t kernel_size) {
  426. std::vector<TestArg> args;
  427. param::ConvBias cur_param;
  428. using NLMode = param::ConvBias::NonlineMode;
  429. // clang-format off
  430. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  431. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  432. for (size_t b : {7, 8, 4, 1}) {
  433. for (size_t ic : {16, 32}) {
  434. for (size_t oc : {16, 8, 4}) {
  435. for (size_t h : {8}) {
  436. for (size_t w : {8, 11}) {
  437. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  438. for (size_t s : {1, 2}) {
  439. size_t f = kernel_size;
  440. cur_param.mode = mode;
  441. cur_param.nonlineMode = nlmode;
  442. cur_param.format = param::ConvBias::Format::NCHW4;
  443. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  444. cur_param.pad_h = cur_param.pad_w = p;
  445. cur_param.stride_h = cur_param.stride_w = s;
  446. //! bias channel
  447. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  448. TensorShape{oc, ic / 4, f, f, 4},
  449. TensorShape{1, oc / 4, 1, 1, 4});
  450. } } } } } } } } }
  451. // clang-format on
  452. return args;
  453. }
  454. std::vector<TestArg> get_int8_nchw4_args_small_batch(size_t kernel_size) {
  455. std::vector<TestArg> args;
  456. param::ConvBias cur_param;
  457. using NLMode = param::ConvBias::NonlineMode;
  458. // clang-format off
  459. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  460. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  461. for (size_t b : {12, 8, 4}) {
  462. for (size_t ic : {16, 32}) {
  463. for (size_t oc : {16, 8, 4}) {
  464. for (size_t h : {8}) {
  465. for (size_t w : {8, 9, 10, 11, 12, 13, 14, 15, 16}) {
  466. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  467. for (size_t s : {1, 2}) {
  468. size_t f = kernel_size;
  469. cur_param.mode = mode;
  470. cur_param.nonlineMode = nlmode;
  471. cur_param.format = param::ConvBias::Format::NCHW4;
  472. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  473. cur_param.pad_h = cur_param.pad_w = p;
  474. cur_param.stride_h = cur_param.stride_w = s;
  475. //! bias channel
  476. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  477. TensorShape{oc, ic / 4, f, f, 4},
  478. TensorShape{1, oc / 4, 1, 1, 4});
  479. } } } } } } } } }
  480. // clang-format on
  481. return args;
  482. }
  483. std::vector<TestArg> get_int8_nchw4_small_channel_args(size_t kernel_size) {
  484. std::vector<TestArg> args;
  485. param::ConvBias cur_param;
  486. using NLMode = param::ConvBias::NonlineMode;
  487. // clang-format off
  488. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  489. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  490. for (size_t b : {64, 16}) {
  491. for (size_t ic : {4, 12}) {
  492. for (size_t oc : {128, 32}) {
  493. for (size_t h : {8}) {
  494. for (size_t w : {8, 11}) {
  495. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  496. for (size_t s : {1, 2}) {
  497. size_t f = kernel_size;
  498. cur_param.mode = mode;
  499. cur_param.nonlineMode = nlmode;
  500. cur_param.format =
  501. param::ConvBias::Format::NCHW4;
  502. cur_param.sparse =
  503. param::ConvBias::Sparse::DENSE;
  504. cur_param.pad_h = cur_param.pad_w = p;
  505. cur_param.stride_h =
  506. cur_param.stride_w = s;
  507. //! bias channel
  508. args.emplace_back(
  509. cur_param,
  510. TensorShape{b, ic / 4, h, w, 4},
  511. TensorShape{oc, ic / 4, f, f,
  512. 4},
  513. TensorShape{1, oc / 4, 1, 1,
  514. 4});
  515. } } } } } } } } }
  516. // clang-format on
  517. return args;
  518. }
  519. std::vector<TestArg> get_int8_nchw4_small_channel_args_check_bounds(
  520. size_t kernel_size) {
  521. std::vector<TestArg> args;
  522. param::ConvBias cur_param;
  523. using NLMode = param::ConvBias::NonlineMode;
  524. // clang-format off
  525. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  526. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  527. for (size_t b : {8, 7, 4, 1}) {
  528. for (size_t ic : {4, 12}) {
  529. for (size_t oc : {16, 8, 12, 4}) {
  530. for (size_t h : {8}) {
  531. for (size_t w : {8, 11}) {
  532. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  533. for (size_t s : {1, 2}) {
  534. size_t f = kernel_size;
  535. cur_param.mode = mode;
  536. cur_param.nonlineMode = nlmode;
  537. cur_param.format = param::ConvBias::Format::NCHW4;
  538. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  539. cur_param.pad_h = cur_param.pad_w = p;
  540. cur_param.stride_h = cur_param.stride_w = s;
  541. //! bias channel
  542. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  543. TensorShape{oc, ic / 4, f, f, 4},
  544. TensorShape{1, oc / 4, 1, 1, 4});
  545. } } } } } } } } }
  546. // clang-format on
  547. return args;
  548. }
  549. std::vector<TestArg> get_int8_chwn4_args(size_t kernel_size) {
  550. auto args = get_int8_nchw4_args(kernel_size);
  551. for (auto& arg : args) {
  552. convert_arg_from_nchw4_to_chwn4(arg);
  553. }
  554. return args;
  555. }
  556. std::vector<TestArg> get_int8_chwn4_args_check_bounds(size_t kernel_size) {
  557. auto args = get_int8_nchw4_args_check_bounds(kernel_size);
  558. for (auto& arg : args) {
  559. convert_arg_from_nchw4_to_chwn4(arg);
  560. }
  561. return args;
  562. }
  563. std::vector<TestArg> get_int8_chwn4_small_channel_args(size_t kernel_size) {
  564. auto args = get_int8_nchw4_small_channel_args(kernel_size);
  565. for (auto& arg : args) {
  566. convert_arg_from_nchw4_to_chwn4(arg);
  567. }
  568. return args;
  569. }
  570. std::vector<TestArg> get_int8_chwn4_small_channel_args_check_bounds(
  571. size_t kernel_size) {
  572. auto args = get_int8_nchw4_small_channel_args_check_bounds(kernel_size);
  573. for (auto& arg : args) {
  574. convert_arg_from_nchw4_to_chwn4(arg);
  575. }
  576. return args;
  577. }
  578. std::vector<TestArg> get_int8_chwn4_args_small_batch(size_t kernel_size) {
  579. auto args = get_int8_nchw4_args_small_batch(kernel_size);
  580. for (auto& arg : args) {
  581. convert_arg_from_nchw4_to_chwn4(arg);
  582. }
  583. return args;
  584. }
  585. std::vector<TestArg> get_int8_nchw4_tensorcore_args(size_t kernel_size) {
  586. std::vector<TestArg> args;
  587. param::ConvBias cur_param;
  588. using NLMode = param::ConvBias::NonlineMode;
  589. // clang-format off
  590. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  591. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  592. size_t b = 64, oc = 128;
  593. for (size_t ic : {32, 64}) {
  594. for (size_t h : {8}) {
  595. for (size_t w : {11}) {
  596. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  597. for (size_t s : {1, 2}) {
  598. size_t f = kernel_size;
  599. cur_param.mode = mode;
  600. cur_param.nonlineMode = nlmode;
  601. cur_param.format = param::ConvBias::Format::NCHW4;
  602. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  603. cur_param.pad_h = cur_param.pad_w = p;
  604. cur_param.stride_h = cur_param.stride_w = s;
  605. //! bias channel
  606. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  607. TensorShape{oc, ic / 4, f, f, 4},
  608. TensorShape{1, oc / 4, 1, 1, 4});
  609. } } } } }
  610. } }
  611. // clang-format on
  612. return args;
  613. }
  614. std::vector<TestArg> get_int8_chwn4_tensorcore_args(size_t kernel_size) {
  615. auto args = get_int8_nchw4_tensorcore_args(kernel_size);
  616. for (auto& arg : args) {
  617. convert_arg_from_nchw4_to_chwn4(arg);
  618. }
  619. return args;
  620. }
  621. void check_conv_bias(DType src_dtype, DType filter_dtype, DType bias_dtype,
  622. DType dst_dtype, Handle* handle, const char* algo,
  623. param::ConvBias::Format format,
  624. const std::vector<TestArg>& args, bool fuse_z) {
  625. megdnn_assert(src_dtype.enumv() == filter_dtype.enumv());
  626. Checker<ConvBiasForward> checker(handle);
  627. if (algo) {
  628. checker.set_before_exec_callback(
  629. ConvBiasAlgoChecker<ConvBiasForward>(algo));
  630. }
  631. std::unique_ptr<RNG> rng;
  632. std::unique_ptr<RNG> bias_rng;
  633. std::unique_ptr<RNG> const_rng;
  634. // TODO: check range of rng
  635. if (src_dtype.enumv() == DTypeEnum::QuantizedS8) {
  636. rng = std::make_unique<UniformIntRNG>(-3, 3);
  637. const_rng = std::make_unique<UniformIntRNG>(1, 1);
  638. megdnn_assert(bias_dtype.enumv() == DTypeEnum::QuantizedS32);
  639. bias_rng = std::make_unique<UniformIntRNG>(-50, 50);
  640. checker.set_epsilon(1 + 1e-3)
  641. .set_max_avg_error(1e-1)
  642. .set_max_avg_biased_error(1e-1);
  643. } else if (src_dtype.enumv() == DTypeEnum::Float16) {
  644. rng = std::make_unique<NormalRNG>(2.f);
  645. megdnn_assert(bias_dtype.enumv() == DTypeEnum::Float16);
  646. bias_rng = std::make_unique<NormalRNG>(2.f);
  647. checker.set_epsilon(1e-2);
  648. } else if (src_dtype.enumv() == DTypeEnum::Float32) {
  649. rng = std::make_unique<NormalRNG>(2.f);
  650. megdnn_assert(bias_dtype.enumv() == DTypeEnum::Float32);
  651. bias_rng = std::make_unique<NormalRNG>(2.f);
  652. }
  653. using Param = param::ConvBias;
  654. using Format = Param::Format;
  655. auto get_z_shape = [&fuse_z, &format](TestArg arg) -> TensorShape {
  656. TensorShape z{};
  657. if (fuse_z) {
  658. size_t hi, wi, sh, sw, ph, pw, fh, fw;
  659. z = arg.src;
  660. size_t spatial_idx = 2;
  661. if (format == Format::NCHW4) {
  662. hi = arg.src[2];
  663. wi = arg.src[3];
  664. fh = arg.filter[2];
  665. fw = arg.filter[3];
  666. z[1] = arg.filter[0] / 4;
  667. } else {
  668. megdnn_assert(format == Format::CHWN4);
  669. hi = arg.src[1];
  670. wi = arg.src[2];
  671. fh = arg.filter[1];
  672. fw = arg.filter[2];
  673. z[0] = arg.filter[3] / 4;
  674. spatial_idx = 1;
  675. }
  676. sh = arg.param.stride_h;
  677. sw = arg.param.stride_w;
  678. ph = arg.param.pad_h;
  679. pw = arg.param.pad_w;
  680. size_t ho = infer_conv_shape(hi, fh, sh, ph);
  681. size_t wo = infer_conv_shape(wi, fw, sw, pw);
  682. z[spatial_idx] = ho;
  683. z[spatial_idx + 1] = wo;
  684. }
  685. return z;
  686. };
  687. megdnn_assert(rng != nullptr && bias_rng != nullptr);
  688. checker.set_rng(0, rng.get())
  689. .set_rng(1, rng.get())
  690. .set_rng(2, rng.get())
  691. .set_rng(3, rng.get());
  692. if (args.empty()) {
  693. std::vector<TestArg> default_args;
  694. if (format == Format::NCHW4) {
  695. default_args = get_int8_nchw4_args(3);
  696. } else if (format == Format::CHWN4) {
  697. default_args = get_int8_chwn4_args(3);
  698. }
  699. for (auto&& arg : default_args) {
  700. auto z = get_z_shape(arg);
  701. checker.set_dtype(0, src_dtype)
  702. .set_dtype(1, filter_dtype)
  703. .set_dtype(2, bias_dtype)
  704. .set_dtype(3, dst_dtype)
  705. .set_dtype(4, dst_dtype)
  706. .set_param(arg.param)
  707. .execs({arg.src, arg.filter, arg.bias, z, {}});
  708. }
  709. } else {
  710. for (auto&& arg : args) {
  711. auto z = get_z_shape(arg);
  712. checker.set_dtype(0, src_dtype)
  713. .set_dtype(1, filter_dtype)
  714. .set_dtype(2, bias_dtype)
  715. .set_dtype(3, dst_dtype)
  716. .set_dtype(4, dst_dtype)
  717. .set_param(arg.param)
  718. .execs({arg.src, arg.filter, arg.bias, z, {}});
  719. }
  720. }
  721. }
  722. #if MEGDNN_WITH_BENCHMARK
  723. std::vector<conv_bias::TestArg> get_winograd_benchmark_args(size_t kernel,
  724. size_t pack_size) {
  725. std::vector<conv_bias::TestArg> args;
  726. auto pack = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  727. size_t p) {
  728. if (ic % pack_size != 0 || oc % pack_size != 0)
  729. return;
  730. if (w + 2 * p < kernel || h + 2 * p < kernel)
  731. return;
  732. param::ConvBias param;
  733. param.stride_h = 1;
  734. param.stride_w = 1;
  735. param.pad_h = p;
  736. param.pad_w = p;
  737. args.push_back(conv_bias::TestArg{param,
  738. TensorShape{1, ic, h, w},
  739. TensorShape{oc, ic, kernel, kernel},
  740. {1, oc, 1, 1}});
  741. };
  742. for (size_t ic : {8, 16, 32, 64}) {
  743. for (size_t oc : {8, 16, 32, 64}) {
  744. pack(oc, ic, 56, 56, kernel, kernel / 2);
  745. pack(oc, ic, 128, 128, kernel, kernel / 2);
  746. pack(oc, ic, 256, 256, kernel, kernel / 2);
  747. }
  748. }
  749. //! conv in vgg16
  750. pack(512, 512, 15, 15, kernel, kernel / 2);
  751. pack(512, 256, 15, 15, kernel, kernel / 2);
  752. pack(256, 256, 29, 29, kernel, kernel / 2);
  753. pack(256, 128, 29, 29, kernel, kernel / 2);
  754. pack(128, 128, 57, 57, kernel, kernel / 2);
  755. pack(128, 64, 57, 57, kernel, kernel / 2);
  756. pack(64, 64, 123, 123, kernel, kernel / 2);
  757. pack(64, 24, 123, 123, kernel, kernel / 2);
  758. pack(24, 24, 224, 224, kernel, kernel / 2);
  759. //! conv in resnet18
  760. pack(64, 64, 56, 56, kernel, kernel / 2);
  761. pack(128, 128, 28, 28, kernel, kernel / 2);
  762. pack(256, 256, 14, 14, kernel, kernel / 2);
  763. pack(512, 512, 7, 7, kernel, kernel / 2);
  764. return args;
  765. }
  766. void benchmark_winograd(const char* algo_name, Handle* handle, size_t kernel,
  767. size_t pack_size) {
  768. auto&& args = get_winograd_benchmark_args(kernel, pack_size);
  769. using namespace conv_bias;
  770. constexpr size_t RUN = 10;
  771. Benchmarker<Convolution> benchmark(handle);
  772. benchmark.set_display(false);
  773. benchmark.set_times(RUN);
  774. Benchmarker<ConvBias> benchmark_winograd(handle);
  775. benchmark_winograd.set_display(false);
  776. benchmark_winograd.set_times(RUN);
  777. for (auto&& arg : args) {
  778. TensorLayout dst_layout;
  779. auto opr = handle->create_operator<ConvBias>();
  780. opr->param() = arg.param;
  781. opr->deduce_layout({arg.src, dtype::Float32()},
  782. {arg.filter, dtype::Float32()},
  783. {arg.bias, dtype::Float32()}, {}, dst_layout);
  784. //! dst.nr_elems * IC * FH * FW * 2
  785. float computations = dst_layout.total_nr_elems() * arg.filter[1] *
  786. arg.filter[2] * arg.filter[3] * 2.0 /
  787. (1024 * 1024 * 1024) * 1e3;
  788. param::Convolution conv_param;
  789. conv_param.pad_h = arg.param.pad_h;
  790. conv_param.pad_w = arg.param.pad_w;
  791. conv_param.stride_h = arg.param.stride_h;
  792. conv_param.stride_w = arg.param.stride_w;
  793. auto used = benchmark.set_param(conv_param)
  794. .exec({arg.src, arg.filter, {}}) /
  795. RUN;
  796. benchmark_winograd.set_param(arg.param);
  797. auto used_winograd =
  798. algo_benchmark<ConvBias>(benchmark_winograd,
  799. {arg.src, arg.filter, {}, {}, {}},
  800. algo_name) /
  801. RUN;
  802. printf("%s %s: normal: %f ms %f Gflops winograd: %f ms %f GFlops "
  803. "speedup: "
  804. "%f\n",
  805. arg.src.to_string().c_str(), arg.filter.to_string().c_str(),
  806. used, computations / used, used_winograd,
  807. computations / used_winograd, used / used_winograd);
  808. }
  809. }
  810. #endif // MEGDNN_WITH_BENCHMARK
  811. std::vector<conv_bias::TestArg> get_conv_bias_args(
  812. std::vector<size_t> kernel, size_t stride, bool no_pad, bool no_bias,
  813. bool no_nonlinemode, bool quantized_nlmod, bool only_broadcast_bias) {
  814. using namespace conv_bias;
  815. using Param = param::ConvBias;
  816. using NLMode = param::ConvBias::NonlineMode;
  817. std::vector<TestArg> args;
  818. auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h,
  819. size_t kernel, size_t stride, NLMode nlmode) {
  820. Param param;
  821. param.stride_h = stride;
  822. param.stride_w = stride;
  823. if (!no_pad) {
  824. param.pad_h = kernel / 2;
  825. param.pad_w = kernel / 2;
  826. } else {
  827. param.pad_h = 0;
  828. param.pad_w = 0;
  829. }
  830. param.nonlineMode = nlmode;
  831. args.emplace_back(param, TensorShape{n, ic, h, w},
  832. TensorShape{oc, ic, kernel, kernel}, TensorShape{});
  833. if (!no_bias) {
  834. args.emplace_back(param, TensorShape{n, ic, h, w},
  835. TensorShape{oc, ic, kernel, kernel},
  836. TensorShape{1, oc, 1, 1});
  837. if (!only_broadcast_bias) {
  838. args.emplace_back(
  839. param, TensorShape{n, ic, h, w},
  840. TensorShape{oc, ic, kernel, kernel},
  841. TensorShape{
  842. n, oc,
  843. (h + 2 * param.pad_h - kernel) / stride + 1,
  844. (w + 2 * param.pad_h - kernel) / stride + 1});
  845. }
  846. }
  847. param.sparse = param::ConvBias::Sparse::GROUP;
  848. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  849. TensorShape{2, oc, ic, kernel, kernel},
  850. TensorShape{});
  851. if (!no_bias) {
  852. if (!only_broadcast_bias) {
  853. args.emplace_back(
  854. param, TensorShape{n, 2 * ic, h, w},
  855. TensorShape{2, oc, ic, kernel, kernel},
  856. TensorShape{
  857. n, 2 * oc,
  858. (h + param.pad_h * 2 - kernel) / stride + 1,
  859. (w + param.pad_w * 2 - kernel) / stride + 1});
  860. }
  861. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  862. TensorShape{2, oc, ic, kernel, kernel},
  863. TensorShape{1, 2 * oc, 1, 1});
  864. }
  865. };
  866. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  867. if (!no_nonlinemode) {
  868. nonlinemode.emplace_back(NLMode::RELU);
  869. nonlinemode.emplace_back(NLMode::H_SWISH);
  870. if (!quantized_nlmod) {
  871. nonlinemode.emplace_back(NLMode::SIGMOID);
  872. }
  873. }
  874. for (size_t n : {1, 2}) {
  875. for (auto nlmode : nonlinemode) {
  876. for (size_t ic : {1, 3, 7}) {
  877. for (size_t oc : {1, 3, 7}) {
  878. for (size_t size : {8, 16, 20}) {
  879. for (size_t kern : kernel) {
  880. pack(n, oc, ic, size, size, kern, stride, nlmode);
  881. }
  882. }
  883. }
  884. }
  885. }
  886. }
  887. return args;
  888. }
  889. std::vector<megdnn::test::conv_bias::TestArg> get_conv_bias_1x1_args(
  890. bool no_bias, bool no_nonlinemode, bool quantized_nlmod,
  891. bool only_broadcast_bias) {
  892. using namespace conv_bias;
  893. using Param = param::ConvBias;
  894. using NLMode = param::ConvBias::NonlineMode;
  895. using CONVMode = param::ConvBias::Mode;
  896. std::vector<TestArg> args;
  897. auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h,
  898. size_t stride, NLMode nlmode, CONVMode convmode) {
  899. Param param;
  900. param.stride_h = stride;
  901. param.stride_w = stride;
  902. param.pad_h = 0;
  903. param.pad_w = 0;
  904. param.mode = convmode;
  905. param.nonlineMode = nlmode;
  906. args.emplace_back(param, TensorShape{n, ic, h, w},
  907. TensorShape{oc, ic, 1, 1}, TensorShape{});
  908. if (!no_bias) {
  909. args.emplace_back(param, TensorShape{n, ic, h, w},
  910. TensorShape{oc, ic, 1, 1},
  911. TensorShape{1, oc, 1, 1});
  912. if (!only_broadcast_bias) {
  913. args.emplace_back(param, TensorShape{n, ic, h, w},
  914. TensorShape{oc, ic, 1, 1},
  915. TensorShape{n, oc, (h - 1) / stride + 1,
  916. (w - 1) / stride + 1});
  917. }
  918. }
  919. param.sparse = param::ConvBias::Sparse::GROUP;
  920. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  921. TensorShape{2, oc, ic, 1, 1}, TensorShape{});
  922. if (!no_bias) {
  923. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  924. TensorShape{2, oc, ic, 1, 1},
  925. TensorShape{1, 2 * oc, 1, 1});
  926. if (!only_broadcast_bias) {
  927. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  928. TensorShape{2, oc, ic, 1, 1},
  929. TensorShape{n, 2 * oc, (h - 1) / stride + 1,
  930. (w - 1) / stride + 1});
  931. }
  932. }
  933. };
  934. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  935. if (!no_nonlinemode) {
  936. nonlinemode.emplace_back(NLMode::RELU);
  937. nonlinemode.emplace_back(NLMode::H_SWISH);
  938. if (!quantized_nlmod) {
  939. nonlinemode.emplace_back(NLMode::SIGMOID);
  940. }
  941. }
  942. std::vector<CONVMode> convmodes{param::ConvBias::Mode::CONVOLUTION,
  943. param::ConvBias::Mode::CROSS_CORRELATION};
  944. for (size_t n : {1, 2})
  945. for (size_t oc : {1, 9, 33})
  946. for (size_t ic : {1, 16, 64})
  947. for (size_t size : {7, 14, 28})
  948. for (auto nlmode : nonlinemode)
  949. for (auto convmode : convmodes) {
  950. pack(n, oc, ic, size, size, 1, nlmode, convmode);
  951. }
  952. return args;
  953. }
  954. void check_conv_bias(std::vector<conv_bias::TestArg> args, Handle* handle,
  955. const char* algo_name) {
  956. using namespace conv_bias;
  957. Checker<ConvBias> checker(handle);
  958. checker.set_before_exec_callback(
  959. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  960. for (auto&& arg : args) {
  961. checker.set_param(arg.param).execs(
  962. {arg.src, arg.filter, arg.bias, {}, {}});
  963. }
  964. }
  965. void checker_conv_bias_int8x8x16(std::vector<conv_bias::TestArg> args,
  966. Handle* handle, const char* algo_name) {
  967. using namespace conv_bias;
  968. Checker<ConvBias> checker(handle);
  969. checker.set_before_exec_callback(
  970. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  971. checker.set_dtype(0, dtype::Int8());
  972. checker.set_dtype(1, dtype::Int8());
  973. checker.set_dtype(2, dtype::Int16());
  974. checker.set_dtype(4, dtype::Int16());
  975. for (auto&& arg : args) {
  976. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  977. }
  978. }
  979. void winograd_algo_extra_impl(const TensorNDArray& tensors, uint32_t m,
  980. param::ConvBias param, Handle* handle,
  981. param::MatrixMul::Format format) {
  982. megdnn_assert(param.format == param::ConvBias::Format::NCHW ||
  983. param.format == param::ConvBias::Format::NCHW44);
  984. auto winograd_preprocess_opr =
  985. handle->create_operator<WinogradFilterPreprocess>();
  986. winograd_preprocess_opr->param().output_block_size = m;
  987. winograd_preprocess_opr->param().format = format;
  988. winograd_preprocess_opr->param().compute_mode =
  989. param.compute_mode;
  990. TensorLayout filter_transform_layout;
  991. winograd_preprocess_opr->deduce_layout(tensors[1].layout,
  992. filter_transform_layout);
  993. size_t winograd_preprocess_workspace_in_bytes =
  994. winograd_preprocess_opr->get_workspace_in_bytes(
  995. tensors[1].layout, filter_transform_layout);
  996. auto conv_bias_opr = handle->create_operator<ConvBias>();
  997. conv_bias_opr->param() = param;
  998. if (param.format == param::ConvBias::Format::NCHW) {
  999. conv_bias_opr->param().format = param::ConvBias::Format::NCHW_WINOGRAD;
  1000. } else {
  1001. conv_bias_opr->param().format =
  1002. param::ConvBias::Format::NCHW44_WINOGRAD;
  1003. }
  1004. conv_bias_opr->param().output_block_size = m;
  1005. size_t conv_bias_workspace_in_bytes = conv_bias_opr->get_workspace_in_bytes(
  1006. tensors[0].layout, filter_transform_layout, tensors[2].layout,
  1007. tensors[3].layout, tensors[4].layout, nullptr);
  1008. WorkspaceBundle wb(nullptr, {filter_transform_layout.span().dist_byte(),
  1009. conv_bias_workspace_in_bytes,
  1010. winograd_preprocess_workspace_in_bytes});
  1011. wb.set(malloc(wb.total_size_in_bytes()));
  1012. TensorND filter_transform_tensor(wb.get(0),
  1013. std::move(filter_transform_layout));
  1014. winograd_preprocess_opr->exec(tensors[1], filter_transform_tensor,
  1015. wb.get_workspace(2));
  1016. conv_bias_opr->exec(tensors[0], filter_transform_tensor, tensors[2],
  1017. tensors[3], tensors[4], nullptr, wb.get_workspace(1));
  1018. free(wb.ptr());
  1019. };
  1020. } // namespace conv_bias
  1021. } // namespace test
  1022. } // namespace megdnn
  1023. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台