You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

conv_bias.cpp 56 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449
  1. #include "test/common/conv_bias.h"
  2. #include "megdnn/opr_param_defs.h"
  3. #include "src/common/utils.h"
  4. #include "test/common/benchmarker.h"
  5. namespace megdnn {
  6. namespace test {
  7. namespace conv_bias {
  8. namespace {
  9. void convert_arg_from_nchw4_to_chwn4(TestArg& arg) {
  10. arg.param.format = param::ConvBias::Format::CHWN4;
  11. arg.src = TensorShape{arg.src[1], arg.src[2], arg.src[3], arg.src[0], 4};
  12. arg.filter =
  13. TensorShape{arg.filter[1], arg.filter[2], arg.filter[3], arg.filter[0], 4};
  14. arg.bias = TensorShape{arg.bias[1], arg.bias[2], arg.bias[3], arg.bias[0], 4};
  15. }
  16. } // namespace
  17. std::vector<TestArg> get_args() {
  18. std::vector<TestArg> args;
  19. param::ConvBias cur_param;
  20. using NLMode = param::ConvBias::NonlineMode;
  21. // clang-format off
  22. for (auto nlmode :
  23. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  24. for (size_t i : {9, 63}) {
  25. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  26. cur_param.nonlineMode = nlmode;
  27. // fallback case
  28. args.emplace_back(cur_param, TensorShape{10, 1, i, i},
  29. TensorShape{1, 1, 8, 8}, TensorShape{1, 1, 1, 1});
  30. args.emplace_back(cur_param, TensorShape{10, 4, i, i},
  31. TensorShape{3, 4, 4, 4}, TensorShape{1, 3, 1, 1});
  32. cur_param.mode = param::ConvBias::Mode::CONVOLUTION;
  33. args.emplace_back(cur_param, TensorShape{10, 4, i, i},
  34. TensorShape{1, 4, 3, 3}, TensorShape{1, 1, 1, 1});
  35. args.emplace_back(cur_param, TensorShape{1, 4, i, i},
  36. TensorShape{5, 4, 3, 3}, TensorShape{1, 5, 1, 1});
  37. } }
  38. // clang-format on
  39. return args;
  40. }
  41. std::vector<TestArg> get_chanwise_args() {
  42. std::vector<TestArg> args;
  43. param::ConvBias cur_param;
  44. using NLMode = param::ConvBias::NonlineMode;
  45. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  46. cur_param.sparse = ConvBias::Param::Sparse::GROUP;
  47. for (auto nlmode :
  48. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  49. cur_param.nonlineMode = nlmode;
  50. // simple case
  51. for (uint32_t s : {1, 2})
  52. for (uint32_t p : {0, 1, 2, 3})
  53. for (size_t f : {2, 3, 5, 7})
  54. for (size_t ocpg : {1, 3}) {
  55. cur_param.pad_h = cur_param.pad_w = p;
  56. cur_param.stride_h = cur_param.stride_w = s;
  57. args.emplace_back(
  58. cur_param, TensorShape{2, 3, 16, 16},
  59. TensorShape{3, ocpg, 1, f, f},
  60. TensorShape{1, 3 * ocpg, 1, 1});
  61. }
  62. args.emplace_back(
  63. cur_param, TensorShape{32, 12, 20, 10}, TensorShape{12, 2, 1, 4, 5},
  64. TensorShape{1, 24, 1, 1});
  65. // padding larger than kern
  66. args.emplace_back(
  67. cur_param, TensorShape{32, 12, 20, 10}, TensorShape{12, 2, 1, 4, 5},
  68. TensorShape{1, 24, 1, 1});
  69. }
  70. return args;
  71. }
  72. std::vector<TestArg> get_args_1x1() {
  73. std::vector<TestArg> args;
  74. param::ConvBias cur_param;
  75. using NLMode = param::ConvBias::NonlineMode;
  76. for (auto nlmode :
  77. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  78. cur_param.nonlineMode = nlmode;
  79. for (size_t i : {16, 19}) {
  80. cur_param.mode = param::ConvBias::Mode::CONVOLUTION;
  81. args.emplace_back(
  82. cur_param, TensorShape{2, 20, i, i + 1}, TensorShape{30, 20, 1, 1},
  83. TensorShape{1, 30, 1, 1});
  84. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  85. args.emplace_back(
  86. cur_param, TensorShape{2, 20, i, i + 1}, TensorShape{30, 20, 1, 1},
  87. TensorShape{1, 30, 1, 1});
  88. }
  89. }
  90. return args;
  91. }
  92. std::vector<TestArg> get_winograd_args(size_t kernel_size) {
  93. std::vector<TestArg> args;
  94. param::ConvBias cur_param;
  95. using NLMode = param::ConvBias::NonlineMode;
  96. // clang-format off
  97. for (auto nlmode :
  98. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  99. for (size_t ic : {1, 3, 4, 7}) {
  100. for (size_t oc : {1, 3, 4, 7}) {
  101. for (size_t i : {9, 63}) {
  102. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  103. cur_param.nonlineMode = nlmode;
  104. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  105. cur_param.pad_h = cur_param.pad_w = 0;
  106. //! no bias
  107. args.emplace_back(cur_param, TensorShape{1, ic, i, i},
  108. TensorShape{oc, ic, kernel_size, kernel_size},
  109. TensorShape{});
  110. //! bias
  111. args.emplace_back(
  112. cur_param, TensorShape{2, ic, i, i},
  113. TensorShape{oc, ic, kernel_size, kernel_size},
  114. TensorShape{2, oc, (i + cur_param.pad_h * 2 - kernel_size) + 1,
  115. (i + cur_param.pad_w * 2 - kernel_size) + 1});
  116. //! bias channel
  117. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  118. TensorShape{oc, ic, kernel_size, kernel_size},
  119. TensorShape{1, oc, 1, 1});
  120. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  121. args.emplace_back(
  122. cur_param, TensorShape{2, 2 * ic, i, i},
  123. TensorShape{2, oc, ic, kernel_size, kernel_size},
  124. TensorShape{2, 2 * oc,
  125. (i + cur_param.pad_h * 2 - kernel_size) + 1,
  126. (i + cur_param.pad_w * 2 - kernel_size) + 1});
  127. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  128. TensorShape{2, oc, ic, kernel_size, kernel_size},
  129. TensorShape{1, 2 * oc, 1, 1});
  130. } } } }
  131. // clang-format on
  132. //! test for multi-thread OC parallel
  133. for (size_t i : {9, 63}) {
  134. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  135. cur_param.pad_h = cur_param.pad_w = 1;
  136. args.emplace_back(
  137. cur_param, TensorShape{1, 8, i, i},
  138. TensorShape{128, 8, kernel_size, kernel_size},
  139. TensorShape{1, 128, 1, 1});
  140. args.emplace_back(
  141. cur_param, TensorShape{2, 8, i, i},
  142. TensorShape{128, 8, kernel_size, kernel_size},
  143. TensorShape{1, 128, 1, 1});
  144. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  145. args.emplace_back(
  146. cur_param, TensorShape{2, 2 * 8, i, i},
  147. TensorShape{2, 128, 8, kernel_size, kernel_size},
  148. TensorShape{1, 2 * 128, 1, 1});
  149. }
  150. return args;
  151. }
  152. std::vector<TestArg> get_winograd_mk_packed_args(size_t pack_size) {
  153. std::vector<TestArg> args;
  154. param::ConvBias cur_param;
  155. using NLMode = param::ConvBias::NonlineMode;
  156. // clang-format off
  157. for (auto nlmode :
  158. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  159. for (size_t ic : {pack_size, 2 * pack_size}) {
  160. for (size_t oc : {pack_size, 2 * pack_size}) {
  161. for (size_t i : {9, 63}) {
  162. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  163. cur_param.nonlineMode = nlmode;
  164. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  165. cur_param.pad_h = cur_param.pad_w = 1;
  166. args.emplace_back(cur_param, TensorShape{1, pack_size, 3, 3},
  167. TensorShape{pack_size, pack_size, 3, 3},
  168. TensorShape{1, pack_size, 1, 1});
  169. //! no bias
  170. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  171. TensorShape{oc, ic, 3, 3}, TensorShape{});
  172. //! bias
  173. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  174. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  175. //! bias channel
  176. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  177. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  178. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  179. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  180. TensorShape{2, oc, ic, 3, 3},
  181. TensorShape{2, 2 * oc, i, i});
  182. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  183. TensorShape{2, oc, ic, 3, 3},
  184. TensorShape{1, 2 * oc, 1, 1});
  185. } } } }
  186. // clang-format on
  187. //! test for multi-thread OC parallel
  188. for (size_t i : {9, 63}) {
  189. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  190. cur_param.pad_h = cur_param.pad_w = 1;
  191. args.emplace_back(
  192. cur_param, TensorShape{1, 8, i, i}, TensorShape{128, 8, 3, 3},
  193. TensorShape{1, 128, 1, 1});
  194. args.emplace_back(
  195. cur_param, TensorShape{2, 8, i, i}, TensorShape{128, 8, 3, 3},
  196. TensorShape{1, 128, 1, 1});
  197. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  198. args.emplace_back(
  199. cur_param, TensorShape{2, 2 * 8, i, i}, TensorShape{2, 128, 8, 3, 3},
  200. TensorShape{1, 2 * 128, 1, 1});
  201. }
  202. return args;
  203. }
  204. std::vector<TestArg> get_quantized_winograd_mk_packed_args(
  205. size_t pack_size, bool compute_float32) {
  206. std::vector<TestArg> args;
  207. param::ConvBias cur_param;
  208. using NLMode = param::ConvBias::NonlineMode;
  209. // clang-format off
  210. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  211. for (size_t ic : {pack_size, 2 * pack_size}) {
  212. for (size_t oc : {pack_size, 2 * pack_size}) {
  213. for (size_t i : {9, 63}) {
  214. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  215. cur_param.nonlineMode = nlmode;
  216. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  217. cur_param.pad_h = cur_param.pad_w = 1;
  218. if(compute_float32){
  219. cur_param.compute_mode = param::ConvBias::ComputeMode::FLOAT32;
  220. }
  221. args.emplace_back(cur_param, TensorShape{1, pack_size, 3, 3},
  222. TensorShape{pack_size, pack_size, 3, 3},
  223. TensorShape{1, pack_size, 1, 1});
  224. //! no bias
  225. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  226. TensorShape{oc, ic, 3, 3}, TensorShape{});
  227. //! bias
  228. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  229. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  230. //! bias channel
  231. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  232. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  233. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  234. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  235. TensorShape{2, oc, ic, 3, 3},
  236. TensorShape{2, 2 * oc, i, i});
  237. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  238. TensorShape{2, oc, ic, 3, 3},
  239. TensorShape{1, 2 * oc, 1, 1});
  240. } } } }
  241. // clang-format on
  242. //! test for multi-thread OC parallel
  243. for (size_t i : {9, 63}) {
  244. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  245. cur_param.pad_h = cur_param.pad_w = 1;
  246. args.emplace_back(
  247. cur_param, TensorShape{1, 8, i, i}, TensorShape{128, 8, 3, 3},
  248. TensorShape{1, 128, 1, 1});
  249. args.emplace_back(
  250. cur_param, TensorShape{2, 8, i, i}, TensorShape{128, 8, 3, 3},
  251. TensorShape{1, 128, 1, 1});
  252. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  253. args.emplace_back(
  254. cur_param, TensorShape{2, 2 * 8, i, i}, TensorShape{2, 128, 8, 3, 3},
  255. TensorShape{1, 2 * 128, 1, 1});
  256. }
  257. return args;
  258. }
  259. std::vector<TestArg> get_quantized_args_with_nlmode(
  260. param::ConvBias::NonlineMode nlmode) {
  261. std::vector<TestArg> args;
  262. param::ConvBias cur_param;
  263. // clang-format off
  264. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION,
  265. param::ConvBias::Mode::CONVOLUTION}) {
  266. for (size_t ic : {1, 2, 3, 4, 5, 7}) {
  267. for (size_t oc : {1, 2, 3, 4, 5, 7}) {
  268. for (size_t i : {9, 63}) {
  269. cur_param.mode = mode;
  270. cur_param.nonlineMode = nlmode;
  271. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  272. cur_param.pad_h = cur_param.pad_w = 1;
  273. //! no bias
  274. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  275. TensorShape{oc, ic, 3, 3}, TensorShape{});
  276. //! bias
  277. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  278. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  279. //! bias channel
  280. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  281. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  282. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  283. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  284. TensorShape{2, oc, ic, 3, 3},
  285. TensorShape{2, 2 * oc, i, i});
  286. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  287. TensorShape{2, oc, ic, 3, 3},
  288. TensorShape{1, 2 * oc, 1, 1});
  289. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  290. cur_param.pad_h = cur_param.pad_w = 0;
  291. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  292. TensorShape{oc, ic, 1, 1}, TensorShape{});
  293. } } } }
  294. // clang-format on
  295. return args;
  296. }
  297. std::vector<TestArg> get_quantized_args() {
  298. using NLMode = param::ConvBias::NonlineMode;
  299. auto arg_p1 = get_quantized_args_with_nlmode(NLMode::IDENTITY),
  300. arg_p2 = get_quantized_args_with_nlmode(NLMode::RELU),
  301. arg_p3 = get_quantized_args_with_nlmode(NLMode::H_SWISH);
  302. std::vector<TestArg> args;
  303. args.insert(args.end(), arg_p1.begin(), arg_p1.end());
  304. args.insert(args.end(), arg_p2.begin(), arg_p2.end());
  305. args.insert(args.end(), arg_p3.begin(), arg_p3.end());
  306. return args;
  307. }
  308. std::vector<TestArg> get_int8_nchw4_args(size_t kernel_size) {
  309. std::vector<TestArg> args;
  310. param::ConvBias cur_param;
  311. using NLMode = param::ConvBias::NonlineMode;
  312. // clang-format off
  313. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  314. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  315. for (size_t b : {64, 16}) {
  316. for (size_t ic : {16, 32}) {
  317. for (size_t oc : {16, 32}) {
  318. for (size_t h : {8}) {
  319. for (size_t w : {8, 11}) {
  320. for (int p : {0, static_cast<int>(kernel_size / 2)}) {
  321. for (size_t s : {2, 1}) {
  322. if (kernel_size == 7) {
  323. b = std::min(b, 32_z);
  324. }
  325. size_t f = kernel_size;
  326. cur_param.mode = mode;
  327. cur_param.nonlineMode = nlmode;
  328. cur_param.format = param::ConvBias::Format::NCHW4;
  329. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  330. cur_param.pad_h = cur_param.pad_w = p;
  331. cur_param.stride_h = cur_param.stride_w = s;
  332. //! bias channel
  333. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  334. TensorShape{oc, ic / 4, f, f, 4},
  335. TensorShape{1, oc / 4, 1, 1, 4});
  336. } } } } } } } } }
  337. // clang-format on
  338. return args;
  339. }
  340. std::vector<TestArg> get_int8_nchw44_args(
  341. size_t kernel_size, size_t pack_size, bool compute_float32, bool group_mode) {
  342. std::vector<TestArg> args;
  343. param::ConvBias cur_param;
  344. megdnn_assert(pack_size > 0, "not support pack_size");
  345. megdnn_assert(kernel_size > 0, "not support kernel_size");
  346. using NLMode = param::ConvBias::NonlineMode;
  347. // clang-format off
  348. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU, NLMode::H_SWISH}) {
  349. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  350. for (size_t b : {1,2}) {
  351. for (size_t ic : {8,16}) {
  352. for (size_t oc : {8,16}) {
  353. for (size_t h : {9,23}) {
  354. for (size_t w : {9,23}) {
  355. for (int p : {0, static_cast<int>(kernel_size / 2)}) {
  356. for (size_t s : {1}) {
  357. if (kernel_size == 7) {
  358. b = std::min(b, 32_z);
  359. }
  360. size_t f = kernel_size;
  361. cur_param.mode = mode;
  362. cur_param.nonlineMode = nlmode;
  363. if (pack_size == 4){
  364. cur_param.format = param::ConvBias::Format::NCHW44;
  365. } else if(pack_size == 8){
  366. cur_param.format = param::ConvBias::Format::NCHW88;
  367. }
  368. if(compute_float32){
  369. cur_param.compute_mode =
  370. param::ConvBias::ComputeMode::FLOAT32;
  371. }
  372. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  373. cur_param.pad_h = cur_param.pad_w = p;
  374. cur_param.stride_h = cur_param.stride_w = s;
  375. if (!group_mode) {
  376. //! no bias
  377. args.emplace_back(cur_param,
  378. TensorShape{b, ic / pack_size, h, w, pack_size},
  379. TensorShape{oc / pack_size, ic / pack_size, f, f,
  380. pack_size, pack_size},
  381. TensorShape{});
  382. //! bias channel
  383. args.emplace_back(cur_param,
  384. TensorShape{b, ic / pack_size, h, w, pack_size},
  385. TensorShape{oc / pack_size, ic / pack_size, f, f,
  386. pack_size, pack_size},
  387. TensorShape{1, oc / pack_size, 1, 1, pack_size});
  388. //! bias
  389. args.emplace_back(
  390. cur_param, TensorShape{b, ic / pack_size, h, w, pack_size},
  391. TensorShape{oc / pack_size, ic / pack_size, f, f, pack_size,
  392. pack_size},
  393. TensorShape{b, oc / pack_size, (h - f + 2 * p) / s + 1,
  394. (w - f + 2 * p) / s + 1, pack_size});
  395. } else {
  396. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  397. args.emplace_back(
  398. cur_param,
  399. TensorShape{2, 2 * ic / pack_size, h, w, pack_size},
  400. TensorShape{2, oc / pack_size, ic / pack_size, 3, 3,
  401. pack_size, pack_size},
  402. TensorShape{2, 2 * oc / pack_size, (h - f + 2 * p) / s + 1,
  403. (w - f + 2 * p) / s + 1, pack_size});
  404. args.emplace_back(
  405. cur_param,
  406. TensorShape{2, 2 * ic / pack_size, h, w, pack_size},
  407. TensorShape{2, oc / pack_size, ic / pack_size, f, f,
  408. pack_size, pack_size},
  409. TensorShape{1, 2 * oc / pack_size, 1, 1, pack_size});
  410. args.emplace_back(
  411. cur_param,
  412. TensorShape{2, 2 * ic / pack_size, h, w, pack_size},
  413. TensorShape{2, oc / pack_size, ic / pack_size, f, f,
  414. pack_size, pack_size},
  415. TensorShape{});
  416. }
  417. } } } } } } } } }
  418. // clang-format on
  419. return args;
  420. }
  421. std::vector<TestArg> get_int8_nchw4_args_check_bounds(size_t kernel_size) {
  422. std::vector<TestArg> args;
  423. param::ConvBias cur_param;
  424. using NLMode = param::ConvBias::NonlineMode;
  425. // clang-format off
  426. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  427. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  428. for (size_t b : {7, 8, 4, 1}) {
  429. for (size_t ic : {16, 32}) {
  430. for (size_t oc : {16, 8, 4}) {
  431. for (size_t h : {8}) {
  432. for (size_t w : {8, 11}) {
  433. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  434. for (size_t s : {1, 2}) {
  435. size_t f = kernel_size;
  436. cur_param.mode = mode;
  437. cur_param.nonlineMode = nlmode;
  438. cur_param.format = param::ConvBias::Format::NCHW4;
  439. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  440. cur_param.pad_h = cur_param.pad_w = p;
  441. cur_param.stride_h = cur_param.stride_w = s;
  442. //! bias channel
  443. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  444. TensorShape{oc, ic / 4, f, f, 4},
  445. TensorShape{1, oc / 4, 1, 1, 4});
  446. } } } } } } } } }
  447. // clang-format on
  448. return args;
  449. }
  450. std::vector<TestArg> get_int8_nchw4_args_small_batch(size_t kernel_size) {
  451. std::vector<TestArg> args;
  452. param::ConvBias cur_param;
  453. using NLMode = param::ConvBias::NonlineMode;
  454. // clang-format off
  455. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU, NLMode::H_SWISH}) {
  456. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  457. for (size_t b : {12, 8, 4}) {
  458. for (size_t ic : {16, 32}) {
  459. for (size_t oc : {16, 8, 4}) {
  460. for (size_t h : {8}) {
  461. for (size_t w : {8, 9, 10, 11, 12, 13, 14, 15, 16}) {
  462. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  463. for (size_t s : {1, 2}) {
  464. size_t f = kernel_size;
  465. cur_param.mode = mode;
  466. cur_param.nonlineMode = nlmode;
  467. cur_param.format = param::ConvBias::Format::NCHW4;
  468. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  469. cur_param.pad_h = cur_param.pad_w = p;
  470. cur_param.stride_h = cur_param.stride_w = s;
  471. //! bias channel
  472. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  473. TensorShape{oc, ic / 4, f, f, 4},
  474. TensorShape{1, oc / 4, 1, 1, 4});
  475. } } } } } } } } }
  476. // clang-format on
  477. return args;
  478. }
  479. std::vector<TestArg> get_int8_nchw4_small_channel_args(size_t kernel_size) {
  480. std::vector<TestArg> args;
  481. param::ConvBias cur_param;
  482. using NLMode = param::ConvBias::NonlineMode;
  483. // clang-format off
  484. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU, NLMode::H_SWISH}) {
  485. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  486. for (size_t b : {64, 16}) {
  487. for (size_t ic : {4, 12}) {
  488. for (size_t oc : {128, 32}) {
  489. for (size_t h : {8}) {
  490. for (size_t w : {8, 11}) {
  491. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  492. for (size_t s : {1, 2}) {
  493. size_t f = kernel_size;
  494. cur_param.mode = mode;
  495. cur_param.nonlineMode = nlmode;
  496. cur_param.format =
  497. param::ConvBias::Format::NCHW4;
  498. cur_param.sparse =
  499. param::ConvBias::Sparse::DENSE;
  500. cur_param.pad_h = cur_param.pad_w = p;
  501. cur_param.stride_h =
  502. cur_param.stride_w = s;
  503. //! bias channel
  504. args.emplace_back(
  505. cur_param,
  506. TensorShape{b, ic / 4, h, w, 4},
  507. TensorShape{oc, ic / 4, f, f,
  508. 4},
  509. TensorShape{1, oc / 4, 1, 1,
  510. 4});
  511. } } } } } } } } }
  512. // clang-format on
  513. return args;
  514. }
  515. std::vector<TestArg> get_int8_nchw4_small_channel_args_check_bounds(
  516. size_t kernel_size) {
  517. std::vector<TestArg> args;
  518. param::ConvBias cur_param;
  519. using NLMode = param::ConvBias::NonlineMode;
  520. // clang-format off
  521. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  522. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  523. for (size_t b : {8, 7, 4, 1}) {
  524. for (size_t ic : {4, 12}) {
  525. for (size_t oc : {16, 8, 12, 4}) {
  526. for (size_t h : {8}) {
  527. for (size_t w : {8, 11}) {
  528. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  529. for (size_t s : {1, 2}) {
  530. size_t f = kernel_size;
  531. cur_param.mode = mode;
  532. cur_param.nonlineMode = nlmode;
  533. cur_param.format = param::ConvBias::Format::NCHW4;
  534. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  535. cur_param.pad_h = cur_param.pad_w = p;
  536. cur_param.stride_h = cur_param.stride_w = s;
  537. //! bias channel
  538. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  539. TensorShape{oc, ic / 4, f, f, 4},
  540. TensorShape{1, oc / 4, 1, 1, 4});
  541. } } } } } } } } }
  542. // clang-format on
  543. return args;
  544. }
  545. std::vector<TestArg> get_int8_chwn4_args(size_t kernel_size) {
  546. auto args = get_int8_nchw4_args(kernel_size);
  547. for (auto& arg : args) {
  548. convert_arg_from_nchw4_to_chwn4(arg);
  549. }
  550. return args;
  551. }
  552. std::vector<TestArg> get_int8_chwn4_args_check_bounds(size_t kernel_size) {
  553. auto args = get_int8_nchw4_args_check_bounds(kernel_size);
  554. for (auto& arg : args) {
  555. convert_arg_from_nchw4_to_chwn4(arg);
  556. }
  557. return args;
  558. }
  559. std::vector<TestArg> get_int8_chwn4_small_channel_args(size_t kernel_size) {
  560. auto args = get_int8_nchw4_small_channel_args(kernel_size);
  561. for (auto& arg : args) {
  562. convert_arg_from_nchw4_to_chwn4(arg);
  563. }
  564. return args;
  565. }
  566. std::vector<TestArg> get_int8_chwn4_small_channel_args_check_bounds(
  567. size_t kernel_size) {
  568. auto args = get_int8_nchw4_small_channel_args_check_bounds(kernel_size);
  569. for (auto& arg : args) {
  570. convert_arg_from_nchw4_to_chwn4(arg);
  571. }
  572. return args;
  573. }
  574. std::vector<TestArg> get_int8_chwn4_args_small_batch(size_t kernel_size) {
  575. auto args = get_int8_nchw4_args_small_batch(kernel_size);
  576. for (auto& arg : args) {
  577. convert_arg_from_nchw4_to_chwn4(arg);
  578. }
  579. return args;
  580. }
  581. std::vector<TestArg> get_int8_nchw4_tensorcore_args(size_t kernel_size) {
  582. std::vector<TestArg> args;
  583. param::ConvBias cur_param;
  584. using NLMode = param::ConvBias::NonlineMode;
  585. // clang-format off
  586. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU, NLMode::H_SWISH}) {
  587. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  588. size_t b = 64, oc = 128;
  589. for (size_t ic : {32, 64}) {
  590. for (size_t h : {8}) {
  591. for (size_t w : {11}) {
  592. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  593. for (size_t s : {1, 2}) {
  594. size_t f = kernel_size;
  595. cur_param.mode = mode;
  596. cur_param.nonlineMode = nlmode;
  597. cur_param.format = param::ConvBias::Format::NCHW4;
  598. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  599. cur_param.pad_h = cur_param.pad_w = p;
  600. cur_param.stride_h = cur_param.stride_w = s;
  601. //! bias channel
  602. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  603. TensorShape{oc, ic / 4, f, f, 4},
  604. TensorShape{1, oc / 4, 1, 1, 4});
  605. } } } } }
  606. } }
  607. // clang-format on
  608. return args;
  609. }
  610. std::vector<TestArg> get_int8_chwn4_tensorcore_args(size_t kernel_size) {
  611. auto args = get_int8_nchw4_tensorcore_args(kernel_size);
  612. for (auto& arg : args) {
  613. convert_arg_from_nchw4_to_chwn4(arg);
  614. }
  615. return args;
  616. }
  617. void check_conv_bias(
  618. DType src_dtype, DType filter_dtype, DType bias_dtype, DType dst_dtype,
  619. Handle* handle, const char* algo, param::ConvBias::Format format,
  620. const std::vector<TestArg>& args, bool fuse_z, bool stable_test) {
  621. megdnn_assert(
  622. (src_dtype.enumv() == filter_dtype.enumv()) ||
  623. (src_dtype.enumv() == DTypeEnum::Quantized4Asymm &&
  624. filter_dtype.enumv() == DTypeEnum::QuantizedS4));
  625. Checker<ConvBiasForward> checker(handle, !stable_test);
  626. if (algo) {
  627. checker.set_before_exec_callback(ConvBiasAlgoChecker<ConvBiasForward>(algo));
  628. }
  629. std::unique_ptr<RNG> rng;
  630. std::unique_ptr<RNG> flt_rng;
  631. std::unique_ptr<RNG> bias_rng;
  632. std::unique_ptr<RNG> const_rng;
  633. std::unique_ptr<RNG> zero_rng;
  634. // TODO: check range of rng
  635. if (src_dtype.enumv() == DTypeEnum::QuantizedS8) {
  636. rng = std::make_unique<UniformIntRNG>(-3, 3);
  637. flt_rng = std::make_unique<UniformIntRNG>(-3, 3);
  638. const_rng = std::make_unique<UniformIntRNG>(1, 1);
  639. zero_rng = std::make_unique<UniformIntRNG>(0, 0);
  640. megdnn_assert(bias_dtype.enumv() == DTypeEnum::QuantizedS32);
  641. bias_rng = std::make_unique<UniformIntRNG>(-50, 50);
  642. checker.set_epsilon(1 + 1e-3).set_max_avg_error(1e-1).set_max_avg_biased_error(
  643. 1e-3);
  644. } else if (src_dtype.enumv() == DTypeEnum::Quantized4Asymm) {
  645. rng = std::make_unique<UniformIntRNG>(0, 6);
  646. flt_rng = std::make_unique<UniformIntRNG>(-3, 3);
  647. const_rng = std::make_unique<UniformIntRNG>(1, 1);
  648. zero_rng = std::make_unique<UniformIntRNG>(0, 0);
  649. megdnn_assert(bias_dtype.enumv() == DTypeEnum::QuantizedS32);
  650. bias_rng = std::make_unique<UniformIntRNG>(-50, 50);
  651. checker.set_epsilon(1 + 1e-3).set_max_avg_error(1e-1).set_max_avg_biased_error(
  652. 1e-3);
  653. } else if (src_dtype.enumv() == DTypeEnum::QuantizedS4) {
  654. rng = std::make_unique<UniformIntRNG>(-3, 3);
  655. flt_rng = std::make_unique<UniformIntRNG>(-3, 3);
  656. const_rng = std::make_unique<UniformIntRNG>(1, 1);
  657. zero_rng = std::make_unique<UniformIntRNG>(0, 0);
  658. megdnn_assert(bias_dtype.enumv() == DTypeEnum::QuantizedS32);
  659. bias_rng = std::make_unique<UniformIntRNG>(-50, 50);
  660. checker.set_epsilon(1 + 1e-3).set_max_avg_error(1e-1).set_max_avg_biased_error(
  661. 1e-3);
  662. } else if (src_dtype.enumv() == DTypeEnum::Float16) {
  663. rng = std::make_unique<NormalRNG>(2.f);
  664. flt_rng = std::make_unique<NormalRNG>(2.f);
  665. megdnn_assert(bias_dtype.enumv() == DTypeEnum::Float16);
  666. bias_rng = std::make_unique<NormalRNG>(2.f);
  667. checker.set_epsilon(1e-2);
  668. } else if (src_dtype.enumv() == DTypeEnum::Float32) {
  669. rng = std::make_unique<NormalRNG>(2.f);
  670. flt_rng = std::make_unique<NormalRNG>(2.f);
  671. megdnn_assert(bias_dtype.enumv() == DTypeEnum::Float32);
  672. bias_rng = std::make_unique<NormalRNG>(2.f);
  673. }
  674. using Param = param::ConvBias;
  675. using Format = Param::Format;
  676. auto get_z_shape = [&fuse_z, &format](TestArg arg) -> TensorShape {
  677. TensorShape z{};
  678. if (fuse_z) {
  679. size_t hi, wi, sh, sw, ph, pw, fh, fw;
  680. z = arg.src;
  681. size_t spatial_idx = 2;
  682. if (format == Format::NCHW4) {
  683. hi = arg.src[2];
  684. wi = arg.src[3];
  685. fh = arg.filter[2];
  686. fw = arg.filter[3];
  687. z[1] = arg.filter[0] / 4;
  688. } else if (format == Format::NCHW32) {
  689. hi = arg.src[2];
  690. wi = arg.src[3];
  691. fh = arg.filter[2];
  692. fw = arg.filter[3];
  693. z[1] = arg.filter[0] / 32;
  694. } else if (format == Format::NCHW64) {
  695. hi = arg.src[2];
  696. wi = arg.src[3];
  697. fh = arg.filter[2];
  698. fw = arg.filter[3];
  699. z[1] = arg.filter[0] / 64;
  700. } else {
  701. megdnn_assert(format == Format::CHWN4);
  702. hi = arg.src[1];
  703. wi = arg.src[2];
  704. fh = arg.filter[1];
  705. fw = arg.filter[2];
  706. z[0] = arg.filter[3] / 4;
  707. spatial_idx = 1;
  708. }
  709. sh = arg.param.stride_h;
  710. sw = arg.param.stride_w;
  711. ph = arg.param.pad_h;
  712. pw = arg.param.pad_w;
  713. size_t ho = infer_conv_shape(hi, fh, sh, ph);
  714. size_t wo = infer_conv_shape(wi, fw, sw, pw);
  715. z[spatial_idx] = ho;
  716. z[spatial_idx + 1] = wo;
  717. }
  718. return z;
  719. };
  720. megdnn_assert(rng != nullptr && flt_rng != nullptr && bias_rng != nullptr);
  721. checker.set_rng(0, rng.get())
  722. .set_rng(1, flt_rng.get())
  723. .set_rng(2, bias_rng.get())
  724. .set_rng(3, rng.get());
  725. if (stable_test) {
  726. checker.set_stable_check(true);
  727. checker.set_no_naive_check(true);
  728. }
  729. if (args.empty()) {
  730. std::vector<TestArg> default_args;
  731. if (format == Format::NCHW4) {
  732. default_args = get_int8_nchw4_args(3);
  733. } else if (format == Format::CHWN4) {
  734. default_args = get_int8_chwn4_args(3);
  735. }
  736. for (auto&& arg : default_args) {
  737. auto z = get_z_shape(arg);
  738. checker.set_dtype(0, src_dtype)
  739. .set_dtype(1, filter_dtype)
  740. .set_dtype(2, bias_dtype)
  741. .set_dtype(3, dst_dtype)
  742. .set_dtype(4, dst_dtype)
  743. .set_param(arg.param)
  744. .execs({arg.src, arg.filter, arg.bias, z, {}});
  745. }
  746. } else {
  747. for (auto&& arg : args) {
  748. auto z = get_z_shape(arg);
  749. checker.set_dtype(0, src_dtype)
  750. .set_dtype(1, filter_dtype)
  751. .set_dtype(2, bias_dtype)
  752. .set_dtype(3, dst_dtype)
  753. .set_dtype(4, dst_dtype)
  754. .set_param(arg.param)
  755. .execs({arg.src, arg.filter, arg.bias, z, {}});
  756. }
  757. }
  758. }
  759. #if MEGDNN_WITH_BENCHMARK
  760. std::vector<conv_bias::TestArg> get_winograd_benchmark_args(
  761. size_t kernel, size_t pack_size) {
  762. std::vector<conv_bias::TestArg> args;
  763. auto pack = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel, size_t p) {
  764. if (ic % pack_size != 0 || oc % pack_size != 0)
  765. return;
  766. if (w + 2 * p < kernel || h + 2 * p < kernel)
  767. return;
  768. param::ConvBias param;
  769. param.stride_h = 1;
  770. param.stride_w = 1;
  771. param.pad_h = p;
  772. param.pad_w = p;
  773. args.push_back(conv_bias::TestArg{
  774. param,
  775. TensorShape{1, ic, h, w},
  776. TensorShape{oc, ic, kernel, kernel},
  777. {1, oc, 1, 1}});
  778. };
  779. for (size_t ic : {8, 16, 32, 64}) {
  780. for (size_t oc : {8, 16, 32, 64}) {
  781. pack(oc, ic, 56, 56, kernel, kernel / 2);
  782. pack(oc, ic, 128, 128, kernel, kernel / 2);
  783. pack(oc, ic, 256, 256, kernel, kernel / 2);
  784. }
  785. }
  786. //! conv in vgg16
  787. pack(512, 512, 15, 15, kernel, kernel / 2);
  788. pack(512, 256, 15, 15, kernel, kernel / 2);
  789. pack(256, 256, 29, 29, kernel, kernel / 2);
  790. pack(256, 128, 29, 29, kernel, kernel / 2);
  791. pack(128, 128, 57, 57, kernel, kernel / 2);
  792. pack(128, 64, 57, 57, kernel, kernel / 2);
  793. pack(64, 64, 123, 123, kernel, kernel / 2);
  794. pack(64, 24, 123, 123, kernel, kernel / 2);
  795. pack(24, 24, 224, 224, kernel, kernel / 2);
  796. //! conv in resnet18
  797. pack(64, 64, 56, 56, kernel, kernel / 2);
  798. pack(128, 128, 28, 28, kernel, kernel / 2);
  799. pack(256, 256, 14, 14, kernel, kernel / 2);
  800. pack(512, 512, 7, 7, kernel, kernel / 2);
  801. return args;
  802. }
  803. void benchmark_winograd(
  804. const char* algo_name, Handle* handle, size_t kernel, size_t pack_size) {
  805. auto&& args = get_winograd_benchmark_args(kernel, pack_size);
  806. using namespace conv_bias;
  807. constexpr size_t RUN = 10;
  808. Benchmarker<Convolution> benchmark(handle);
  809. benchmark.set_display(false);
  810. benchmark.set_times(RUN);
  811. Benchmarker<ConvBias> benchmark_winograd(handle);
  812. benchmark_winograd.set_display(false);
  813. benchmark_winograd.set_times(RUN);
  814. for (auto&& arg : args) {
  815. TensorLayout dst_layout;
  816. auto opr = handle->create_operator<ConvBias>();
  817. opr->param() = arg.param;
  818. opr->deduce_layout(
  819. {arg.src, dtype::Float32()}, {arg.filter, dtype::Float32()},
  820. {arg.bias, dtype::Float32()}, {}, dst_layout);
  821. //! dst.nr_elems * IC * FH * FW * 2
  822. float computations = dst_layout.total_nr_elems() * arg.filter[1] *
  823. arg.filter[2] * arg.filter[3] * 2.0 /
  824. (1024 * 1024 * 1024) * 1e3;
  825. param::Convolution conv_param;
  826. conv_param.pad_h = arg.param.pad_h;
  827. conv_param.pad_w = arg.param.pad_w;
  828. conv_param.stride_h = arg.param.stride_h;
  829. conv_param.stride_w = arg.param.stride_w;
  830. auto used =
  831. benchmark.set_param(conv_param).exec({arg.src, arg.filter, {}}) / RUN;
  832. benchmark_winograd.set_param(arg.param);
  833. auto used_winograd = algo_benchmark<ConvBias>(
  834. benchmark_winograd,
  835. {arg.src, arg.filter, {}, {}, {}}, algo_name) /
  836. RUN;
  837. printf("%s %s: normal: %f ms %f Gflops winograd: %f ms %f GFlops "
  838. "speedup: "
  839. "%f\n",
  840. arg.src.to_string().c_str(), arg.filter.to_string().c_str(), used,
  841. computations / used, used_winograd, computations / used_winograd,
  842. used / used_winograd);
  843. }
  844. }
  845. #endif // MEGDNN_WITH_BENCHMARK
  846. std::vector<conv_bias::TestArg> get_conv_bias_args(
  847. std::vector<size_t> kernel, size_t stride, bool no_pad, bool no_bias,
  848. bool no_nonlinemode, bool quantized_nlmod, bool only_broadcast_bias) {
  849. using namespace conv_bias;
  850. using Param = param::ConvBias;
  851. using NLMode = param::ConvBias::NonlineMode;
  852. std::vector<TestArg> args;
  853. auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  854. size_t stride, NLMode nlmode) {
  855. Param param;
  856. param.stride_h = stride;
  857. param.stride_w = stride;
  858. if (!no_pad) {
  859. param.pad_h = kernel / 2;
  860. param.pad_w = kernel / 2;
  861. } else {
  862. param.pad_h = 0;
  863. param.pad_w = 0;
  864. }
  865. param.nonlineMode = nlmode;
  866. args.emplace_back(
  867. param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, kernel, kernel},
  868. TensorShape{});
  869. if (!no_bias) {
  870. args.emplace_back(
  871. param, TensorShape{n, ic, h, w},
  872. TensorShape{oc, ic, kernel, kernel}, TensorShape{1, oc, 1, 1});
  873. if (!only_broadcast_bias) {
  874. args.emplace_back(
  875. param, TensorShape{n, ic, h, w},
  876. TensorShape{oc, ic, kernel, kernel},
  877. TensorShape{
  878. n, oc, (h + 2 * param.pad_h - kernel) / stride + 1,
  879. (w + 2 * param.pad_h - kernel) / stride + 1});
  880. }
  881. }
  882. param.sparse = param::ConvBias::Sparse::GROUP;
  883. args.emplace_back(
  884. param, TensorShape{n, 2 * ic, h, w},
  885. TensorShape{2, oc, ic, kernel, kernel}, TensorShape{});
  886. if (!no_bias) {
  887. if (!only_broadcast_bias) {
  888. args.emplace_back(
  889. param, TensorShape{n, 2 * ic, h, w},
  890. TensorShape{2, oc, ic, kernel, kernel},
  891. TensorShape{
  892. n, 2 * oc, (h + param.pad_h * 2 - kernel) / stride + 1,
  893. (w + param.pad_w * 2 - kernel) / stride + 1});
  894. }
  895. args.emplace_back(
  896. param, TensorShape{n, 2 * ic, h, w},
  897. TensorShape{2, oc, ic, kernel, kernel},
  898. TensorShape{1, 2 * oc, 1, 1});
  899. }
  900. };
  901. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  902. if (!no_nonlinemode) {
  903. nonlinemode.emplace_back(NLMode::RELU);
  904. nonlinemode.emplace_back(NLMode::H_SWISH);
  905. if (!quantized_nlmod) {
  906. nonlinemode.emplace_back(NLMode::SIGMOID);
  907. }
  908. }
  909. for (size_t n : {1, 2}) {
  910. for (auto nlmode : nonlinemode) {
  911. for (size_t ic : {1, 3, 7}) {
  912. for (size_t oc : {1, 3, 7}) {
  913. for (size_t size : {8, 16, 20}) {
  914. for (size_t kern : kernel) {
  915. pack(n, oc, ic, size, size, kern, stride, nlmode);
  916. }
  917. }
  918. }
  919. }
  920. }
  921. }
  922. return args;
  923. }
  924. std::vector<megdnn::test::conv_bias::TestArg> get_conv_bias_1x1_args(
  925. bool no_bias, bool no_nonlinemode, bool quantized_nlmod,
  926. bool only_broadcast_bias) {
  927. using namespace conv_bias;
  928. using Param = param::ConvBias;
  929. using NLMode = param::ConvBias::NonlineMode;
  930. using CONVMode = param::ConvBias::Mode;
  931. std::vector<TestArg> args;
  932. auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h, size_t stride,
  933. NLMode nlmode, CONVMode convmode) {
  934. Param param;
  935. param.stride_h = stride;
  936. param.stride_w = stride;
  937. param.pad_h = 0;
  938. param.pad_w = 0;
  939. param.mode = convmode;
  940. param.nonlineMode = nlmode;
  941. args.emplace_back(
  942. param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, 1, 1},
  943. TensorShape{});
  944. if (!no_bias) {
  945. args.emplace_back(
  946. param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, 1, 1},
  947. TensorShape{1, oc, 1, 1});
  948. if (!only_broadcast_bias) {
  949. args.emplace_back(
  950. param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, 1, 1},
  951. TensorShape{n, oc, (h - 1) / stride + 1, (w - 1) / stride + 1});
  952. }
  953. }
  954. param.sparse = param::ConvBias::Sparse::GROUP;
  955. args.emplace_back(
  956. param, TensorShape{n, 2 * ic, h, w}, TensorShape{2, oc, ic, 1, 1},
  957. TensorShape{});
  958. if (!no_bias) {
  959. args.emplace_back(
  960. param, TensorShape{n, 2 * ic, h, w}, TensorShape{2, oc, ic, 1, 1},
  961. TensorShape{1, 2 * oc, 1, 1});
  962. if (!only_broadcast_bias) {
  963. args.emplace_back(
  964. param, TensorShape{n, 2 * ic, h, w},
  965. TensorShape{2, oc, ic, 1, 1},
  966. TensorShape{
  967. n, 2 * oc, (h - 1) / stride + 1, (w - 1) / stride + 1});
  968. }
  969. }
  970. };
  971. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  972. if (!no_nonlinemode) {
  973. nonlinemode.emplace_back(NLMode::RELU);
  974. nonlinemode.emplace_back(NLMode::H_SWISH);
  975. if (!quantized_nlmod) {
  976. nonlinemode.emplace_back(NLMode::SIGMOID);
  977. }
  978. }
  979. std::vector<CONVMode> convmodes{
  980. param::ConvBias::Mode::CONVOLUTION,
  981. param::ConvBias::Mode::CROSS_CORRELATION};
  982. for (size_t n : {1, 2})
  983. for (size_t oc : {1, 9, 33})
  984. for (size_t ic : {1, 16, 64})
  985. for (size_t size : {1, 7, 14, 28})
  986. for (auto nlmode : nonlinemode)
  987. for (auto convmode : convmodes) {
  988. pack(n, oc, ic, size, size, 1, nlmode, convmode);
  989. }
  990. return args;
  991. }
  992. void check_conv_bias(
  993. std::vector<conv_bias::TestArg> args, Handle* handle, const char* algo_name) {
  994. using namespace conv_bias;
  995. Checker<ConvBias> checker(handle);
  996. checker.set_before_exec_callback(
  997. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  998. for (auto&& arg : args) {
  999. checker.set_param(arg.param).execs({arg.src, arg.filter, arg.bias, {}, {}});
  1000. }
  1001. }
  1002. void checker_conv_bias_int8x8x16(
  1003. std::vector<conv_bias::TestArg> args, Handle* handle, const char* algo_name) {
  1004. using namespace conv_bias;
  1005. Checker<ConvBias> checker(handle);
  1006. checker.set_before_exec_callback(
  1007. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1008. checker.set_dtype(0, dtype::Int8());
  1009. checker.set_dtype(1, dtype::Int8());
  1010. checker.set_dtype(2, dtype::Int16());
  1011. checker.set_dtype(4, dtype::Int16());
  1012. for (auto&& arg : args) {
  1013. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  1014. }
  1015. }
  1016. void check_conv_bias_preprocess(
  1017. std::vector<conv_bias::TestArg> args, Handle* handle, RNG* rng, float epsilon,
  1018. DType type0, DType type1, DType type2, DType type3, const char* algo_name) {
  1019. using namespace conv_bias;
  1020. Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(handle);
  1021. checker.set_dtype(0, type0);
  1022. checker.set_dtype(1, type1);
  1023. checker.set_dtype(2, type2);
  1024. checker.set_dtype(4, type3);
  1025. checker.set_epsilon(epsilon);
  1026. if (NULL != rng) {
  1027. checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng).set_rng(3, rng);
  1028. }
  1029. checker.set_before_exec_callback(
  1030. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1031. for (auto&& arg : args) {
  1032. checker.set_param(arg.param).execs({arg.src, arg.filter, arg.bias, {}, {}});
  1033. }
  1034. }
  1035. void checker_conv_bias_common(
  1036. std::vector<conv_bias::TestArg> args, Handle* handle, RNG* rng, float epsilon,
  1037. DType type0, DType type1, DType type2, DType type3, const char* algo_name) {
  1038. using namespace conv_bias;
  1039. Checker<ConvBias> checker(handle);
  1040. checker.set_before_exec_callback(
  1041. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1042. checker.set_dtype(0, type0);
  1043. checker.set_dtype(1, type1);
  1044. checker.set_dtype(2, type2);
  1045. checker.set_dtype(4, type3);
  1046. checker.set_epsilon(epsilon);
  1047. if (NULL != rng) {
  1048. checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng).set_rng(3, rng);
  1049. }
  1050. for (auto&& arg : args) {
  1051. checker.set_param(arg.param).execs({arg.src, arg.filter, arg.bias, {}, {}});
  1052. }
  1053. }
  1054. void checker_conv_bias_mul_int8x8x32(
  1055. std::vector<conv_bias::TestArg> args, Handle* handle, const char* algo_name) {
  1056. using namespace conv_bias;
  1057. float epsilon = 0.001;
  1058. #if MEGDNN_ARMV7
  1059. epsilon = 1.0;
  1060. #endif
  1061. Checker<ConvBias> checker(handle);
  1062. checker.set_before_exec_callback(
  1063. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1064. checker.set_dtype(0, dtype::Int8());
  1065. checker.set_dtype(1, dtype::Int8());
  1066. checker.set_dtype(2, dtype::Int32());
  1067. checker.set_dtype(4, dtype::Int32());
  1068. checker.set_epsilon(epsilon);
  1069. for (auto&& arg : args) {
  1070. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  1071. }
  1072. UniformIntRNG rng{-50, 50};
  1073. for (auto&& arg : args) {
  1074. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  1075. .set_dtype(1, dtype::QuantizedS8(2.5f))
  1076. .set_dtype(2, dtype::QuantizedS32(6.25f))
  1077. .set_dtype(4, dtype::QuantizedS32(6.25f))
  1078. .set_rng(0, &rng)
  1079. .set_rng(1, &rng)
  1080. .set_rng(2, &rng)
  1081. .set_param(arg.param)
  1082. .set_epsilon(epsilon)
  1083. .execs({arg.src, arg.filter, {}, {}, {}});
  1084. }
  1085. }
  1086. void checker_conv_bias_int8x8x32_preprocess(
  1087. std::vector<conv_bias::TestArg> args, Handle* handle, const char* algo_name) {
  1088. using namespace conv_bias;
  1089. Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(handle);
  1090. checker.set_before_exec_callback(
  1091. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1092. checker.set_dtype(0, dtype::Int8());
  1093. checker.set_dtype(1, dtype::Int8());
  1094. checker.set_dtype(2, dtype::Int32());
  1095. checker.set_dtype(4, dtype::Int32());
  1096. for (auto&& arg : args) {
  1097. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  1098. }
  1099. UniformIntRNG rng{-50, 50};
  1100. for (auto&& arg : args) {
  1101. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  1102. .set_dtype(1, dtype::QuantizedS8(2.5f))
  1103. .set_dtype(2, dtype::QuantizedS32(6.25f))
  1104. .set_dtype(4, dtype::QuantizedS32(6.25f))
  1105. .set_rng(0, &rng)
  1106. .set_rng(1, &rng)
  1107. .set_rng(2, &rng)
  1108. .set_param(arg.param)
  1109. .execs({arg.src, arg.filter, {}, {}, {}});
  1110. }
  1111. }
  1112. std::vector<conv_bias::TestArg> get_nchw44_conv_bias_args(
  1113. std::vector<size_t> kernel_vec,
  1114. std::vector<param::ConvBias::NonlineMode> nlmode_vec,
  1115. std::vector<megdnn::BiasMode> biasmode_vec, size_t stride, bool no_pad,
  1116. bool is_input_nchw, bool is_nchw44_dot) {
  1117. using namespace conv_bias;
  1118. using NLMode = param::ConvBias::NonlineMode;
  1119. std::vector<TestArg> args;
  1120. MEGDNN_MARK_USED_VAR(no_pad);
  1121. auto pack = [&](size_t n, size_t oc, size_t ic, size_t h, size_t w, size_t kernel,
  1122. size_t stride, size_t group, NLMode nlmode,
  1123. megdnn::BiasMode bias_mode, int any_pad = -1) {
  1124. constexpr int pack_c = 4;
  1125. const size_t pad = any_pad >= 0 ? any_pad : kernel / 2;
  1126. auto oc_per_group = oc / group;
  1127. auto ic_per_group = ic / group;
  1128. bool ok_group = (oc % group == 0 && ic % group == 0) &&
  1129. oc_per_group % pack_c == 0 && oc_per_group > 0 &&
  1130. ic_per_group > 0;
  1131. bool nchw_disable = group > 1 || ic_per_group >= 4;
  1132. bool nchw44_disable = ic_per_group % pack_c != 0;
  1133. bool invalid_pad = (w + 2 * pad < kernel) || (h + 2 * pad < kernel);
  1134. if (!(ok_group) || invalid_pad) {
  1135. return;
  1136. }
  1137. if ((is_input_nchw && nchw_disable) || (!is_input_nchw && nchw44_disable)) {
  1138. return;
  1139. }
  1140. size_t kernel_h = kernel;
  1141. size_t kernel_w = kernel;
  1142. param::ConvBias param;
  1143. if (!is_nchw44_dot) {
  1144. param.format = param::ConvBias::Format::NCHW44;
  1145. } else {
  1146. param.format = param::ConvBias::Format::NCHW44_DOT;
  1147. }
  1148. param.stride_h = stride;
  1149. param.stride_w = stride;
  1150. param.pad_h = pad;
  1151. param.pad_w = pad;
  1152. param.nonlineMode = nlmode;
  1153. auto src_tensor_shape = TensorShape{n, ic / pack_c, h, w, pack_c};
  1154. auto weight_tensor_shape = TensorShape{oc / pack_c, ic / pack_c, kernel_h,
  1155. kernel_w, pack_c, pack_c};
  1156. auto bias_tensor_shape = TensorShape{};
  1157. if (bias_mode == megdnn::BiasMode::BROADCAST_CHANNEL_BIAS) {
  1158. bias_tensor_shape = {1, oc / pack_c, 1, 1, pack_c};
  1159. } else if (bias_mode == megdnn::BiasMode::BIAS) {
  1160. bias_tensor_shape = {
  1161. n, oc / pack_c, (h + 2 * pad - kernel) / stride + 1,
  1162. (w + 2 * pad - kernel) / stride + 1, pack_c};
  1163. }
  1164. if (group == 1) {
  1165. param.sparse = param::ConvBias::Sparse::DENSE;
  1166. } else if (group > 1 && ic / group == 1 && oc / group == 1) {
  1167. megdnn_assert(0, "not support channel wise");
  1168. param.sparse = param::ConvBias::Sparse::GROUP;
  1169. weight_tensor_shape =
  1170. TensorShape{group / pack_c, 1, 1, kernel_h, kernel_w, pack_c};
  1171. } else if (
  1172. group > 1 && oc_per_group % pack_c == 0 && oc / group > 0 &&
  1173. ic_per_group % pack_c == 0 && ic / group > 0) {
  1174. param.sparse = param::ConvBias::Sparse::GROUP;
  1175. weight_tensor_shape = TensorShape{group,
  1176. oc_per_group / pack_c,
  1177. ic_per_group / pack_c,
  1178. kernel_h,
  1179. kernel_w,
  1180. pack_c,
  1181. pack_c};
  1182. }
  1183. if (is_input_nchw) {
  1184. src_tensor_shape = TensorShape{n, ic, h, w};
  1185. weight_tensor_shape =
  1186. TensorShape{oc / pack_c, kernel_h, kernel_w, ic, pack_c};
  1187. }
  1188. args.emplace_back(
  1189. param, src_tensor_shape, weight_tensor_shape, bias_tensor_shape);
  1190. };
  1191. for (auto bias : biasmode_vec)
  1192. for (auto nlmode : nlmode_vec)
  1193. for (size_t n : {1, 2})
  1194. for (size_t kernel : kernel_vec)
  1195. for (size_t oc : {4, 12})
  1196. for (size_t ic : {1, 3, 4, 12})
  1197. for (size_t h : {1, 3, 12})
  1198. for (size_t w : {1, 16, 23}) {
  1199. for (size_t group = 1;
  1200. group <= std::min(std::min(oc, ic), 4_z);
  1201. ++group) {
  1202. if (kernel != 1 && (h == 1 || w == 1)) {
  1203. continue;
  1204. }
  1205. pack(n, oc, ic, h, w, kernel, stride, group,
  1206. nlmode, bias);
  1207. }
  1208. }
  1209. return args;
  1210. }
  1211. std::vector<conv_bias::TestArg> get_nchw88_conv_bias_args(
  1212. std::vector<size_t> kernel_vec,
  1213. std::vector<param::ConvBias::NonlineMode> nlmode_vec,
  1214. std::vector<megdnn::BiasMode> biasmode_vec, size_t stride) {
  1215. using namespace conv_bias;
  1216. using NLMode = param::ConvBias::NonlineMode;
  1217. std::vector<TestArg> args;
  1218. auto pack = [&](size_t n, size_t oc, size_t ic, size_t h, size_t w, size_t kernel,
  1219. size_t stride, size_t group, NLMode nlmode,
  1220. megdnn::BiasMode bias_mode) {
  1221. constexpr int pack_c = 8;
  1222. const size_t pad = kernel / 2;
  1223. auto oc_per_group = oc / group;
  1224. auto ic_per_group = ic / group;
  1225. megdnn_assert(
  1226. oc_per_group % pack_c == 0 && ic_per_group % pack_c == 0,
  1227. "ocpg/icpg not divided by 8");
  1228. size_t kernel_h = kernel;
  1229. size_t kernel_w = kernel;
  1230. param::ConvBias param;
  1231. param.format = param::ConvBias::Format::NCHW88;
  1232. param.stride_h = stride;
  1233. param.stride_w = stride;
  1234. param.pad_h = pad;
  1235. param.pad_w = pad;
  1236. param.nonlineMode = nlmode;
  1237. auto src_tensor_shape = TensorShape{n, ic / pack_c, h, w, pack_c};
  1238. auto weight_tensor_shape = TensorShape{oc / pack_c, ic / pack_c, kernel_h,
  1239. kernel_w, pack_c, pack_c};
  1240. auto bias_tensor_shape = TensorShape{};
  1241. if (bias_mode == megdnn::BiasMode::BROADCAST_CHANNEL_BIAS) {
  1242. bias_tensor_shape = {1, oc / pack_c, 1, 1, pack_c};
  1243. } else if (bias_mode == megdnn::BiasMode::BIAS) {
  1244. bias_tensor_shape = {
  1245. n, oc / pack_c, (h + 2 * pad - kernel) / stride + 1,
  1246. (w + 2 * pad - kernel) / stride + 1, pack_c};
  1247. }
  1248. if (group == 1) {
  1249. param.sparse = param::ConvBias::Sparse::DENSE;
  1250. } else {
  1251. param.sparse = param::ConvBias::Sparse::GROUP;
  1252. weight_tensor_shape = TensorShape{group,
  1253. oc_per_group / pack_c,
  1254. ic_per_group / pack_c,
  1255. kernel_h,
  1256. kernel_w,
  1257. pack_c,
  1258. pack_c};
  1259. }
  1260. args.emplace_back(
  1261. param, src_tensor_shape, weight_tensor_shape, bias_tensor_shape);
  1262. };
  1263. for (auto bias : biasmode_vec)
  1264. for (auto nlmode : nlmode_vec)
  1265. for (size_t n : {1, 2})
  1266. for (size_t kernel : kernel_vec)
  1267. for (size_t oc : {8, 16})
  1268. for (size_t ic : {8, 16, 24})
  1269. for (size_t h : {1, 3, 12})
  1270. for (size_t w : {1, 8, 13}) {
  1271. for (size_t group = 1; group < oc / 8; ++group) {
  1272. if (ic % (group * 8) || oc % (group * 8)) {
  1273. continue;
  1274. }
  1275. if (kernel < h || kernel < w) {
  1276. continue;
  1277. }
  1278. pack(n, oc, ic, h, w, kernel, stride, group,
  1279. nlmode, bias);
  1280. }
  1281. }
  1282. return args;
  1283. }
  1284. } // namespace conv_bias
  1285. } // namespace test
  1286. } // namespace megdnn
  1287. // vim: syntax=cpp.doxygen