You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

conv_bias.cpp 57 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460
  1. /**
  2. * \file dnn/test/common/conv_bias.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "test/common/conv_bias.h"
  13. #include "megdnn/opr_param_defs.h"
  14. #include "src/common/utils.h"
  15. #include "test/common/benchmarker.h"
  16. namespace megdnn {
  17. namespace test {
  18. namespace conv_bias {
  19. namespace {
  20. void convert_arg_from_nchw4_to_chwn4(TestArg& arg) {
  21. arg.param.format = param::ConvBias::Format::CHWN4;
  22. arg.src = TensorShape{arg.src[1], arg.src[2], arg.src[3], arg.src[0], 4};
  23. arg.filter =
  24. TensorShape{arg.filter[1], arg.filter[2], arg.filter[3], arg.filter[0], 4};
  25. arg.bias = TensorShape{arg.bias[1], arg.bias[2], arg.bias[3], arg.bias[0], 4};
  26. }
  27. } // namespace
  28. std::vector<TestArg> get_args() {
  29. std::vector<TestArg> args;
  30. param::ConvBias cur_param;
  31. using NLMode = param::ConvBias::NonlineMode;
  32. // clang-format off
  33. for (auto nlmode :
  34. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  35. for (size_t i : {9, 63}) {
  36. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  37. cur_param.nonlineMode = nlmode;
  38. // fallback case
  39. args.emplace_back(cur_param, TensorShape{10, 1, i, i},
  40. TensorShape{1, 1, 8, 8}, TensorShape{1, 1, 1, 1});
  41. args.emplace_back(cur_param, TensorShape{10, 4, i, i},
  42. TensorShape{3, 4, 4, 4}, TensorShape{1, 3, 1, 1});
  43. cur_param.mode = param::ConvBias::Mode::CONVOLUTION;
  44. args.emplace_back(cur_param, TensorShape{10, 4, i, i},
  45. TensorShape{1, 4, 3, 3}, TensorShape{1, 1, 1, 1});
  46. args.emplace_back(cur_param, TensorShape{1, 4, i, i},
  47. TensorShape{5, 4, 3, 3}, TensorShape{1, 5, 1, 1});
  48. } }
  49. // clang-format on
  50. return args;
  51. }
  52. std::vector<TestArg> get_chanwise_args() {
  53. std::vector<TestArg> args;
  54. param::ConvBias cur_param;
  55. using NLMode = param::ConvBias::NonlineMode;
  56. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  57. cur_param.sparse = ConvBias::Param::Sparse::GROUP;
  58. for (auto nlmode :
  59. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  60. cur_param.nonlineMode = nlmode;
  61. // simple case
  62. for (uint32_t s : {1, 2})
  63. for (uint32_t p : {0, 1, 2, 3})
  64. for (size_t f : {2, 3, 5, 7})
  65. for (size_t ocpg : {1, 3}) {
  66. cur_param.pad_h = cur_param.pad_w = p;
  67. cur_param.stride_h = cur_param.stride_w = s;
  68. args.emplace_back(
  69. cur_param, TensorShape{2, 3, 16, 16},
  70. TensorShape{3, ocpg, 1, f, f},
  71. TensorShape{1, 3 * ocpg, 1, 1});
  72. }
  73. args.emplace_back(
  74. cur_param, TensorShape{32, 12, 20, 10}, TensorShape{12, 2, 1, 4, 5},
  75. TensorShape{1, 24, 1, 1});
  76. // padding larger than kern
  77. args.emplace_back(
  78. cur_param, TensorShape{32, 12, 20, 10}, TensorShape{12, 2, 1, 4, 5},
  79. TensorShape{1, 24, 1, 1});
  80. }
  81. return args;
  82. }
  83. std::vector<TestArg> get_args_1x1() {
  84. std::vector<TestArg> args;
  85. param::ConvBias cur_param;
  86. using NLMode = param::ConvBias::NonlineMode;
  87. for (auto nlmode :
  88. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  89. cur_param.nonlineMode = nlmode;
  90. for (size_t i : {16, 19}) {
  91. cur_param.mode = param::ConvBias::Mode::CONVOLUTION;
  92. args.emplace_back(
  93. cur_param, TensorShape{2, 20, i, i + 1}, TensorShape{30, 20, 1, 1},
  94. TensorShape{1, 30, 1, 1});
  95. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  96. args.emplace_back(
  97. cur_param, TensorShape{2, 20, i, i + 1}, TensorShape{30, 20, 1, 1},
  98. TensorShape{1, 30, 1, 1});
  99. }
  100. }
  101. return args;
  102. }
  103. std::vector<TestArg> get_winograd_args(size_t kernel_size) {
  104. std::vector<TestArg> args;
  105. param::ConvBias cur_param;
  106. using NLMode = param::ConvBias::NonlineMode;
  107. // clang-format off
  108. for (auto nlmode :
  109. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  110. for (size_t ic : {1, 3, 4, 7}) {
  111. for (size_t oc : {1, 3, 4, 7}) {
  112. for (size_t i : {9, 63}) {
  113. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  114. cur_param.nonlineMode = nlmode;
  115. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  116. cur_param.pad_h = cur_param.pad_w = 0;
  117. //! no bias
  118. args.emplace_back(cur_param, TensorShape{1, ic, i, i},
  119. TensorShape{oc, ic, kernel_size, kernel_size},
  120. TensorShape{});
  121. //! bias
  122. args.emplace_back(
  123. cur_param, TensorShape{2, ic, i, i},
  124. TensorShape{oc, ic, kernel_size, kernel_size},
  125. TensorShape{2, oc, (i + cur_param.pad_h * 2 - kernel_size) + 1,
  126. (i + cur_param.pad_w * 2 - kernel_size) + 1});
  127. //! bias channel
  128. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  129. TensorShape{oc, ic, kernel_size, kernel_size},
  130. TensorShape{1, oc, 1, 1});
  131. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  132. args.emplace_back(
  133. cur_param, TensorShape{2, 2 * ic, i, i},
  134. TensorShape{2, oc, ic, kernel_size, kernel_size},
  135. TensorShape{2, 2 * oc,
  136. (i + cur_param.pad_h * 2 - kernel_size) + 1,
  137. (i + cur_param.pad_w * 2 - kernel_size) + 1});
  138. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  139. TensorShape{2, oc, ic, kernel_size, kernel_size},
  140. TensorShape{1, 2 * oc, 1, 1});
  141. } } } }
  142. // clang-format on
  143. //! test for multi-thread OC parallel
  144. for (size_t i : {9, 63}) {
  145. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  146. cur_param.pad_h = cur_param.pad_w = 1;
  147. args.emplace_back(
  148. cur_param, TensorShape{1, 8, i, i},
  149. TensorShape{128, 8, kernel_size, kernel_size},
  150. TensorShape{1, 128, 1, 1});
  151. args.emplace_back(
  152. cur_param, TensorShape{2, 8, i, i},
  153. TensorShape{128, 8, kernel_size, kernel_size},
  154. TensorShape{1, 128, 1, 1});
  155. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  156. args.emplace_back(
  157. cur_param, TensorShape{2, 2 * 8, i, i},
  158. TensorShape{2, 128, 8, kernel_size, kernel_size},
  159. TensorShape{1, 2 * 128, 1, 1});
  160. }
  161. return args;
  162. }
  163. std::vector<TestArg> get_winograd_mk_packed_args(size_t pack_size) {
  164. std::vector<TestArg> args;
  165. param::ConvBias cur_param;
  166. using NLMode = param::ConvBias::NonlineMode;
  167. // clang-format off
  168. for (auto nlmode :
  169. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  170. for (size_t ic : {pack_size, 2 * pack_size}) {
  171. for (size_t oc : {pack_size, 2 * pack_size}) {
  172. for (size_t i : {9, 63}) {
  173. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  174. cur_param.nonlineMode = nlmode;
  175. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  176. cur_param.pad_h = cur_param.pad_w = 1;
  177. args.emplace_back(cur_param, TensorShape{1, pack_size, 3, 3},
  178. TensorShape{pack_size, pack_size, 3, 3},
  179. TensorShape{1, pack_size, 1, 1});
  180. //! no bias
  181. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  182. TensorShape{oc, ic, 3, 3}, TensorShape{});
  183. //! bias
  184. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  185. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  186. //! bias channel
  187. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  188. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  189. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  190. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  191. TensorShape{2, oc, ic, 3, 3},
  192. TensorShape{2, 2 * oc, i, i});
  193. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  194. TensorShape{2, oc, ic, 3, 3},
  195. TensorShape{1, 2 * oc, 1, 1});
  196. } } } }
  197. // clang-format on
  198. //! test for multi-thread OC parallel
  199. for (size_t i : {9, 63}) {
  200. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  201. cur_param.pad_h = cur_param.pad_w = 1;
  202. args.emplace_back(
  203. cur_param, TensorShape{1, 8, i, i}, TensorShape{128, 8, 3, 3},
  204. TensorShape{1, 128, 1, 1});
  205. args.emplace_back(
  206. cur_param, TensorShape{2, 8, i, i}, TensorShape{128, 8, 3, 3},
  207. TensorShape{1, 128, 1, 1});
  208. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  209. args.emplace_back(
  210. cur_param, TensorShape{2, 2 * 8, i, i}, TensorShape{2, 128, 8, 3, 3},
  211. TensorShape{1, 2 * 128, 1, 1});
  212. }
  213. return args;
  214. }
  215. std::vector<TestArg> get_quantized_winograd_mk_packed_args(
  216. size_t pack_size, bool compute_float32) {
  217. std::vector<TestArg> args;
  218. param::ConvBias cur_param;
  219. using NLMode = param::ConvBias::NonlineMode;
  220. // clang-format off
  221. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  222. for (size_t ic : {pack_size, 2 * pack_size}) {
  223. for (size_t oc : {pack_size, 2 * pack_size}) {
  224. for (size_t i : {9, 63}) {
  225. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  226. cur_param.nonlineMode = nlmode;
  227. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  228. cur_param.pad_h = cur_param.pad_w = 1;
  229. if(compute_float32){
  230. cur_param.compute_mode = param::ConvBias::ComputeMode::FLOAT32;
  231. }
  232. args.emplace_back(cur_param, TensorShape{1, pack_size, 3, 3},
  233. TensorShape{pack_size, pack_size, 3, 3},
  234. TensorShape{1, pack_size, 1, 1});
  235. //! no bias
  236. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  237. TensorShape{oc, ic, 3, 3}, TensorShape{});
  238. //! bias
  239. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  240. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  241. //! bias channel
  242. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  243. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  244. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  245. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  246. TensorShape{2, oc, ic, 3, 3},
  247. TensorShape{2, 2 * oc, i, i});
  248. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  249. TensorShape{2, oc, ic, 3, 3},
  250. TensorShape{1, 2 * oc, 1, 1});
  251. } } } }
  252. // clang-format on
  253. //! test for multi-thread OC parallel
  254. for (size_t i : {9, 63}) {
  255. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  256. cur_param.pad_h = cur_param.pad_w = 1;
  257. args.emplace_back(
  258. cur_param, TensorShape{1, 8, i, i}, TensorShape{128, 8, 3, 3},
  259. TensorShape{1, 128, 1, 1});
  260. args.emplace_back(
  261. cur_param, TensorShape{2, 8, i, i}, TensorShape{128, 8, 3, 3},
  262. TensorShape{1, 128, 1, 1});
  263. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  264. args.emplace_back(
  265. cur_param, TensorShape{2, 2 * 8, i, i}, TensorShape{2, 128, 8, 3, 3},
  266. TensorShape{1, 2 * 128, 1, 1});
  267. }
  268. return args;
  269. }
  270. std::vector<TestArg> get_quantized_args_with_nlmode(
  271. param::ConvBias::NonlineMode nlmode) {
  272. std::vector<TestArg> args;
  273. param::ConvBias cur_param;
  274. // clang-format off
  275. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION,
  276. param::ConvBias::Mode::CONVOLUTION}) {
  277. for (size_t ic : {1, 2, 3, 4, 5, 7}) {
  278. for (size_t oc : {1, 2, 3, 4, 5, 7}) {
  279. for (size_t i : {9, 63}) {
  280. cur_param.mode = mode;
  281. cur_param.nonlineMode = nlmode;
  282. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  283. cur_param.pad_h = cur_param.pad_w = 1;
  284. //! no bias
  285. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  286. TensorShape{oc, ic, 3, 3}, TensorShape{});
  287. //! bias
  288. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  289. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  290. //! bias channel
  291. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  292. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  293. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  294. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  295. TensorShape{2, oc, ic, 3, 3},
  296. TensorShape{2, 2 * oc, i, i});
  297. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  298. TensorShape{2, oc, ic, 3, 3},
  299. TensorShape{1, 2 * oc, 1, 1});
  300. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  301. cur_param.pad_h = cur_param.pad_w = 0;
  302. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  303. TensorShape{oc, ic, 1, 1}, TensorShape{});
  304. } } } }
  305. // clang-format on
  306. return args;
  307. }
  308. std::vector<TestArg> get_quantized_args() {
  309. using NLMode = param::ConvBias::NonlineMode;
  310. auto arg_p1 = get_quantized_args_with_nlmode(NLMode::IDENTITY),
  311. arg_p2 = get_quantized_args_with_nlmode(NLMode::RELU),
  312. arg_p3 = get_quantized_args_with_nlmode(NLMode::H_SWISH);
  313. std::vector<TestArg> args;
  314. args.insert(args.end(), arg_p1.begin(), arg_p1.end());
  315. args.insert(args.end(), arg_p2.begin(), arg_p2.end());
  316. args.insert(args.end(), arg_p3.begin(), arg_p3.end());
  317. return args;
  318. }
  319. std::vector<TestArg> get_int8_nchw4_args(size_t kernel_size) {
  320. std::vector<TestArg> args;
  321. param::ConvBias cur_param;
  322. using NLMode = param::ConvBias::NonlineMode;
  323. // clang-format off
  324. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  325. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  326. for (size_t b : {64, 16}) {
  327. for (size_t ic : {16, 32}) {
  328. for (size_t oc : {16, 32}) {
  329. for (size_t h : {8}) {
  330. for (size_t w : {8, 11}) {
  331. for (int p : {0, static_cast<int>(kernel_size / 2)}) {
  332. for (size_t s : {2, 1}) {
  333. if (kernel_size == 7) {
  334. b = std::min(b, 32_z);
  335. }
  336. size_t f = kernel_size;
  337. cur_param.mode = mode;
  338. cur_param.nonlineMode = nlmode;
  339. cur_param.format = param::ConvBias::Format::NCHW4;
  340. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  341. cur_param.pad_h = cur_param.pad_w = p;
  342. cur_param.stride_h = cur_param.stride_w = s;
  343. //! bias channel
  344. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  345. TensorShape{oc, ic / 4, f, f, 4},
  346. TensorShape{1, oc / 4, 1, 1, 4});
  347. } } } } } } } } }
  348. // clang-format on
  349. return args;
  350. }
  351. std::vector<TestArg> get_int8_nchw44_args(
  352. size_t kernel_size, size_t pack_size, bool compute_float32, bool group_mode) {
  353. std::vector<TestArg> args;
  354. param::ConvBias cur_param;
  355. megdnn_assert(pack_size > 0, "not support pack_size");
  356. megdnn_assert(kernel_size > 0, "not support kernel_size");
  357. using NLMode = param::ConvBias::NonlineMode;
  358. // clang-format off
  359. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU, NLMode::H_SWISH}) {
  360. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  361. for (size_t b : {1,2}) {
  362. for (size_t ic : {8,16}) {
  363. for (size_t oc : {8,16}) {
  364. for (size_t h : {9,23}) {
  365. for (size_t w : {9,23}) {
  366. for (int p : {0, static_cast<int>(kernel_size / 2)}) {
  367. for (size_t s : {1}) {
  368. if (kernel_size == 7) {
  369. b = std::min(b, 32_z);
  370. }
  371. size_t f = kernel_size;
  372. cur_param.mode = mode;
  373. cur_param.nonlineMode = nlmode;
  374. if (pack_size == 4){
  375. cur_param.format = param::ConvBias::Format::NCHW44;
  376. } else if(pack_size == 8){
  377. cur_param.format = param::ConvBias::Format::NCHW88;
  378. }
  379. if(compute_float32){
  380. cur_param.compute_mode =
  381. param::ConvBias::ComputeMode::FLOAT32;
  382. }
  383. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  384. cur_param.pad_h = cur_param.pad_w = p;
  385. cur_param.stride_h = cur_param.stride_w = s;
  386. if (!group_mode) {
  387. //! no bias
  388. args.emplace_back(cur_param,
  389. TensorShape{b, ic / pack_size, h, w, pack_size},
  390. TensorShape{oc / pack_size, ic / pack_size, f, f,
  391. pack_size, pack_size},
  392. TensorShape{});
  393. //! bias channel
  394. args.emplace_back(cur_param,
  395. TensorShape{b, ic / pack_size, h, w, pack_size},
  396. TensorShape{oc / pack_size, ic / pack_size, f, f,
  397. pack_size, pack_size},
  398. TensorShape{1, oc / pack_size, 1, 1, pack_size});
  399. //! bias
  400. args.emplace_back(
  401. cur_param, TensorShape{b, ic / pack_size, h, w, pack_size},
  402. TensorShape{oc / pack_size, ic / pack_size, f, f, pack_size,
  403. pack_size},
  404. TensorShape{b, oc / pack_size, (h - f + 2 * p) / s + 1,
  405. (w - f + 2 * p) / s + 1, pack_size});
  406. } else {
  407. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  408. args.emplace_back(
  409. cur_param,
  410. TensorShape{2, 2 * ic / pack_size, h, w, pack_size},
  411. TensorShape{2, oc / pack_size, ic / pack_size, 3, 3,
  412. pack_size, pack_size},
  413. TensorShape{2, 2 * oc / pack_size, (h - f + 2 * p) / s + 1,
  414. (w - f + 2 * p) / s + 1, pack_size});
  415. args.emplace_back(
  416. cur_param,
  417. TensorShape{2, 2 * ic / pack_size, h, w, pack_size},
  418. TensorShape{2, oc / pack_size, ic / pack_size, f, f,
  419. pack_size, pack_size},
  420. TensorShape{1, 2 * oc / pack_size, 1, 1, pack_size});
  421. args.emplace_back(
  422. cur_param,
  423. TensorShape{2, 2 * ic / pack_size, h, w, pack_size},
  424. TensorShape{2, oc / pack_size, ic / pack_size, f, f,
  425. pack_size, pack_size},
  426. TensorShape{});
  427. }
  428. } } } } } } } } }
  429. // clang-format on
  430. return args;
  431. }
  432. std::vector<TestArg> get_int8_nchw4_args_check_bounds(size_t kernel_size) {
  433. std::vector<TestArg> args;
  434. param::ConvBias cur_param;
  435. using NLMode = param::ConvBias::NonlineMode;
  436. // clang-format off
  437. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  438. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  439. for (size_t b : {7, 8, 4, 1}) {
  440. for (size_t ic : {16, 32}) {
  441. for (size_t oc : {16, 8, 4}) {
  442. for (size_t h : {8}) {
  443. for (size_t w : {8, 11}) {
  444. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  445. for (size_t s : {1, 2}) {
  446. size_t f = kernel_size;
  447. cur_param.mode = mode;
  448. cur_param.nonlineMode = nlmode;
  449. cur_param.format = param::ConvBias::Format::NCHW4;
  450. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  451. cur_param.pad_h = cur_param.pad_w = p;
  452. cur_param.stride_h = cur_param.stride_w = s;
  453. //! bias channel
  454. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  455. TensorShape{oc, ic / 4, f, f, 4},
  456. TensorShape{1, oc / 4, 1, 1, 4});
  457. } } } } } } } } }
  458. // clang-format on
  459. return args;
  460. }
  461. std::vector<TestArg> get_int8_nchw4_args_small_batch(size_t kernel_size) {
  462. std::vector<TestArg> args;
  463. param::ConvBias cur_param;
  464. using NLMode = param::ConvBias::NonlineMode;
  465. // clang-format off
  466. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU, NLMode::H_SWISH}) {
  467. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  468. for (size_t b : {12, 8, 4}) {
  469. for (size_t ic : {16, 32}) {
  470. for (size_t oc : {16, 8, 4}) {
  471. for (size_t h : {8}) {
  472. for (size_t w : {8, 9, 10, 11, 12, 13, 14, 15, 16}) {
  473. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  474. for (size_t s : {1, 2}) {
  475. size_t f = kernel_size;
  476. cur_param.mode = mode;
  477. cur_param.nonlineMode = nlmode;
  478. cur_param.format = param::ConvBias::Format::NCHW4;
  479. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  480. cur_param.pad_h = cur_param.pad_w = p;
  481. cur_param.stride_h = cur_param.stride_w = s;
  482. //! bias channel
  483. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  484. TensorShape{oc, ic / 4, f, f, 4},
  485. TensorShape{1, oc / 4, 1, 1, 4});
  486. } } } } } } } } }
  487. // clang-format on
  488. return args;
  489. }
  490. std::vector<TestArg> get_int8_nchw4_small_channel_args(size_t kernel_size) {
  491. std::vector<TestArg> args;
  492. param::ConvBias cur_param;
  493. using NLMode = param::ConvBias::NonlineMode;
  494. // clang-format off
  495. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU, NLMode::H_SWISH}) {
  496. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  497. for (size_t b : {64, 16}) {
  498. for (size_t ic : {4, 12}) {
  499. for (size_t oc : {128, 32}) {
  500. for (size_t h : {8}) {
  501. for (size_t w : {8, 11}) {
  502. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  503. for (size_t s : {1, 2}) {
  504. size_t f = kernel_size;
  505. cur_param.mode = mode;
  506. cur_param.nonlineMode = nlmode;
  507. cur_param.format =
  508. param::ConvBias::Format::NCHW4;
  509. cur_param.sparse =
  510. param::ConvBias::Sparse::DENSE;
  511. cur_param.pad_h = cur_param.pad_w = p;
  512. cur_param.stride_h =
  513. cur_param.stride_w = s;
  514. //! bias channel
  515. args.emplace_back(
  516. cur_param,
  517. TensorShape{b, ic / 4, h, w, 4},
  518. TensorShape{oc, ic / 4, f, f,
  519. 4},
  520. TensorShape{1, oc / 4, 1, 1,
  521. 4});
  522. } } } } } } } } }
  523. // clang-format on
  524. return args;
  525. }
  526. std::vector<TestArg> get_int8_nchw4_small_channel_args_check_bounds(
  527. size_t kernel_size) {
  528. std::vector<TestArg> args;
  529. param::ConvBias cur_param;
  530. using NLMode = param::ConvBias::NonlineMode;
  531. // clang-format off
  532. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  533. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  534. for (size_t b : {8, 7, 4, 1}) {
  535. for (size_t ic : {4, 12}) {
  536. for (size_t oc : {16, 8, 12, 4}) {
  537. for (size_t h : {8}) {
  538. for (size_t w : {8, 11}) {
  539. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  540. for (size_t s : {1, 2}) {
  541. size_t f = kernel_size;
  542. cur_param.mode = mode;
  543. cur_param.nonlineMode = nlmode;
  544. cur_param.format = param::ConvBias::Format::NCHW4;
  545. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  546. cur_param.pad_h = cur_param.pad_w = p;
  547. cur_param.stride_h = cur_param.stride_w = s;
  548. //! bias channel
  549. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  550. TensorShape{oc, ic / 4, f, f, 4},
  551. TensorShape{1, oc / 4, 1, 1, 4});
  552. } } } } } } } } }
  553. // clang-format on
  554. return args;
  555. }
  556. std::vector<TestArg> get_int8_chwn4_args(size_t kernel_size) {
  557. auto args = get_int8_nchw4_args(kernel_size);
  558. for (auto& arg : args) {
  559. convert_arg_from_nchw4_to_chwn4(arg);
  560. }
  561. return args;
  562. }
  563. std::vector<TestArg> get_int8_chwn4_args_check_bounds(size_t kernel_size) {
  564. auto args = get_int8_nchw4_args_check_bounds(kernel_size);
  565. for (auto& arg : args) {
  566. convert_arg_from_nchw4_to_chwn4(arg);
  567. }
  568. return args;
  569. }
  570. std::vector<TestArg> get_int8_chwn4_small_channel_args(size_t kernel_size) {
  571. auto args = get_int8_nchw4_small_channel_args(kernel_size);
  572. for (auto& arg : args) {
  573. convert_arg_from_nchw4_to_chwn4(arg);
  574. }
  575. return args;
  576. }
  577. std::vector<TestArg> get_int8_chwn4_small_channel_args_check_bounds(
  578. size_t kernel_size) {
  579. auto args = get_int8_nchw4_small_channel_args_check_bounds(kernel_size);
  580. for (auto& arg : args) {
  581. convert_arg_from_nchw4_to_chwn4(arg);
  582. }
  583. return args;
  584. }
  585. std::vector<TestArg> get_int8_chwn4_args_small_batch(size_t kernel_size) {
  586. auto args = get_int8_nchw4_args_small_batch(kernel_size);
  587. for (auto& arg : args) {
  588. convert_arg_from_nchw4_to_chwn4(arg);
  589. }
  590. return args;
  591. }
  592. std::vector<TestArg> get_int8_nchw4_tensorcore_args(size_t kernel_size) {
  593. std::vector<TestArg> args;
  594. param::ConvBias cur_param;
  595. using NLMode = param::ConvBias::NonlineMode;
  596. // clang-format off
  597. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU, NLMode::H_SWISH}) {
  598. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  599. size_t b = 64, oc = 128;
  600. for (size_t ic : {32, 64}) {
  601. for (size_t h : {8}) {
  602. for (size_t w : {11}) {
  603. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  604. for (size_t s : {1, 2}) {
  605. size_t f = kernel_size;
  606. cur_param.mode = mode;
  607. cur_param.nonlineMode = nlmode;
  608. cur_param.format = param::ConvBias::Format::NCHW4;
  609. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  610. cur_param.pad_h = cur_param.pad_w = p;
  611. cur_param.stride_h = cur_param.stride_w = s;
  612. //! bias channel
  613. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  614. TensorShape{oc, ic / 4, f, f, 4},
  615. TensorShape{1, oc / 4, 1, 1, 4});
  616. } } } } }
  617. } }
  618. // clang-format on
  619. return args;
  620. }
  621. std::vector<TestArg> get_int8_chwn4_tensorcore_args(size_t kernel_size) {
  622. auto args = get_int8_nchw4_tensorcore_args(kernel_size);
  623. for (auto& arg : args) {
  624. convert_arg_from_nchw4_to_chwn4(arg);
  625. }
  626. return args;
  627. }
  628. void check_conv_bias(
  629. DType src_dtype, DType filter_dtype, DType bias_dtype, DType dst_dtype,
  630. Handle* handle, const char* algo, param::ConvBias::Format format,
  631. const std::vector<TestArg>& args, bool fuse_z, bool stable_test) {
  632. megdnn_assert(
  633. (src_dtype.enumv() == filter_dtype.enumv()) ||
  634. (src_dtype.enumv() == DTypeEnum::Quantized4Asymm &&
  635. filter_dtype.enumv() == DTypeEnum::QuantizedS4));
  636. Checker<ConvBiasForward> checker(handle, !stable_test);
  637. if (algo) {
  638. checker.set_before_exec_callback(ConvBiasAlgoChecker<ConvBiasForward>(algo));
  639. }
  640. std::unique_ptr<RNG> rng;
  641. std::unique_ptr<RNG> flt_rng;
  642. std::unique_ptr<RNG> bias_rng;
  643. std::unique_ptr<RNG> const_rng;
  644. std::unique_ptr<RNG> zero_rng;
  645. // TODO: check range of rng
  646. if (src_dtype.enumv() == DTypeEnum::QuantizedS8) {
  647. rng = std::make_unique<UniformIntRNG>(-3, 3);
  648. flt_rng = std::make_unique<UniformIntRNG>(-3, 3);
  649. const_rng = std::make_unique<UniformIntRNG>(1, 1);
  650. zero_rng = std::make_unique<UniformIntRNG>(0, 0);
  651. megdnn_assert(bias_dtype.enumv() == DTypeEnum::QuantizedS32);
  652. bias_rng = std::make_unique<UniformIntRNG>(-50, 50);
  653. checker.set_epsilon(1 + 1e-3).set_max_avg_error(1e-1).set_max_avg_biased_error(
  654. 1e-3);
  655. } else if (src_dtype.enumv() == DTypeEnum::Quantized4Asymm) {
  656. rng = std::make_unique<UniformIntRNG>(0, 6);
  657. flt_rng = std::make_unique<UniformIntRNG>(-3, 3);
  658. const_rng = std::make_unique<UniformIntRNG>(1, 1);
  659. zero_rng = std::make_unique<UniformIntRNG>(0, 0);
  660. megdnn_assert(bias_dtype.enumv() == DTypeEnum::QuantizedS32);
  661. bias_rng = std::make_unique<UniformIntRNG>(-50, 50);
  662. checker.set_epsilon(1 + 1e-3).set_max_avg_error(1e-1).set_max_avg_biased_error(
  663. 1e-3);
  664. } else if (src_dtype.enumv() == DTypeEnum::QuantizedS4) {
  665. rng = std::make_unique<UniformIntRNG>(-3, 3);
  666. flt_rng = std::make_unique<UniformIntRNG>(-3, 3);
  667. const_rng = std::make_unique<UniformIntRNG>(1, 1);
  668. zero_rng = std::make_unique<UniformIntRNG>(0, 0);
  669. megdnn_assert(bias_dtype.enumv() == DTypeEnum::QuantizedS32);
  670. bias_rng = std::make_unique<UniformIntRNG>(-50, 50);
  671. checker.set_epsilon(1 + 1e-3).set_max_avg_error(1e-1).set_max_avg_biased_error(
  672. 1e-3);
  673. } else if (src_dtype.enumv() == DTypeEnum::Float16) {
  674. rng = std::make_unique<NormalRNG>(2.f);
  675. flt_rng = std::make_unique<NormalRNG>(2.f);
  676. megdnn_assert(bias_dtype.enumv() == DTypeEnum::Float16);
  677. bias_rng = std::make_unique<NormalRNG>(2.f);
  678. checker.set_epsilon(1e-2);
  679. } else if (src_dtype.enumv() == DTypeEnum::Float32) {
  680. rng = std::make_unique<NormalRNG>(2.f);
  681. flt_rng = std::make_unique<NormalRNG>(2.f);
  682. megdnn_assert(bias_dtype.enumv() == DTypeEnum::Float32);
  683. bias_rng = std::make_unique<NormalRNG>(2.f);
  684. }
  685. using Param = param::ConvBias;
  686. using Format = Param::Format;
  687. auto get_z_shape = [&fuse_z, &format](TestArg arg) -> TensorShape {
  688. TensorShape z{};
  689. if (fuse_z) {
  690. size_t hi, wi, sh, sw, ph, pw, fh, fw;
  691. z = arg.src;
  692. size_t spatial_idx = 2;
  693. if (format == Format::NCHW4) {
  694. hi = arg.src[2];
  695. wi = arg.src[3];
  696. fh = arg.filter[2];
  697. fw = arg.filter[3];
  698. z[1] = arg.filter[0] / 4;
  699. } else if (format == Format::NCHW32) {
  700. hi = arg.src[2];
  701. wi = arg.src[3];
  702. fh = arg.filter[2];
  703. fw = arg.filter[3];
  704. z[1] = arg.filter[0] / 32;
  705. } else if (format == Format::NCHW64) {
  706. hi = arg.src[2];
  707. wi = arg.src[3];
  708. fh = arg.filter[2];
  709. fw = arg.filter[3];
  710. z[1] = arg.filter[0] / 64;
  711. } else {
  712. megdnn_assert(format == Format::CHWN4);
  713. hi = arg.src[1];
  714. wi = arg.src[2];
  715. fh = arg.filter[1];
  716. fw = arg.filter[2];
  717. z[0] = arg.filter[3] / 4;
  718. spatial_idx = 1;
  719. }
  720. sh = arg.param.stride_h;
  721. sw = arg.param.stride_w;
  722. ph = arg.param.pad_h;
  723. pw = arg.param.pad_w;
  724. size_t ho = infer_conv_shape(hi, fh, sh, ph);
  725. size_t wo = infer_conv_shape(wi, fw, sw, pw);
  726. z[spatial_idx] = ho;
  727. z[spatial_idx + 1] = wo;
  728. }
  729. return z;
  730. };
  731. megdnn_assert(rng != nullptr && flt_rng != nullptr && bias_rng != nullptr);
  732. checker.set_rng(0, rng.get())
  733. .set_rng(1, flt_rng.get())
  734. .set_rng(2, bias_rng.get())
  735. .set_rng(3, rng.get());
  736. if (stable_test) {
  737. checker.set_stable_check(true);
  738. checker.set_no_naive_check(true);
  739. }
  740. if (args.empty()) {
  741. std::vector<TestArg> default_args;
  742. if (format == Format::NCHW4) {
  743. default_args = get_int8_nchw4_args(3);
  744. } else if (format == Format::CHWN4) {
  745. default_args = get_int8_chwn4_args(3);
  746. }
  747. for (auto&& arg : default_args) {
  748. auto z = get_z_shape(arg);
  749. checker.set_dtype(0, src_dtype)
  750. .set_dtype(1, filter_dtype)
  751. .set_dtype(2, bias_dtype)
  752. .set_dtype(3, dst_dtype)
  753. .set_dtype(4, dst_dtype)
  754. .set_param(arg.param)
  755. .execs({arg.src, arg.filter, arg.bias, z, {}});
  756. }
  757. } else {
  758. for (auto&& arg : args) {
  759. auto z = get_z_shape(arg);
  760. checker.set_dtype(0, src_dtype)
  761. .set_dtype(1, filter_dtype)
  762. .set_dtype(2, bias_dtype)
  763. .set_dtype(3, dst_dtype)
  764. .set_dtype(4, dst_dtype)
  765. .set_param(arg.param)
  766. .execs({arg.src, arg.filter, arg.bias, z, {}});
  767. }
  768. }
  769. }
  770. #if MEGDNN_WITH_BENCHMARK
  771. std::vector<conv_bias::TestArg> get_winograd_benchmark_args(
  772. size_t kernel, size_t pack_size) {
  773. std::vector<conv_bias::TestArg> args;
  774. auto pack = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel, size_t p) {
  775. if (ic % pack_size != 0 || oc % pack_size != 0)
  776. return;
  777. if (w + 2 * p < kernel || h + 2 * p < kernel)
  778. return;
  779. param::ConvBias param;
  780. param.stride_h = 1;
  781. param.stride_w = 1;
  782. param.pad_h = p;
  783. param.pad_w = p;
  784. args.push_back(conv_bias::TestArg{
  785. param,
  786. TensorShape{1, ic, h, w},
  787. TensorShape{oc, ic, kernel, kernel},
  788. {1, oc, 1, 1}});
  789. };
  790. for (size_t ic : {8, 16, 32, 64}) {
  791. for (size_t oc : {8, 16, 32, 64}) {
  792. pack(oc, ic, 56, 56, kernel, kernel / 2);
  793. pack(oc, ic, 128, 128, kernel, kernel / 2);
  794. pack(oc, ic, 256, 256, kernel, kernel / 2);
  795. }
  796. }
  797. //! conv in vgg16
  798. pack(512, 512, 15, 15, kernel, kernel / 2);
  799. pack(512, 256, 15, 15, kernel, kernel / 2);
  800. pack(256, 256, 29, 29, kernel, kernel / 2);
  801. pack(256, 128, 29, 29, kernel, kernel / 2);
  802. pack(128, 128, 57, 57, kernel, kernel / 2);
  803. pack(128, 64, 57, 57, kernel, kernel / 2);
  804. pack(64, 64, 123, 123, kernel, kernel / 2);
  805. pack(64, 24, 123, 123, kernel, kernel / 2);
  806. pack(24, 24, 224, 224, kernel, kernel / 2);
  807. //! conv in resnet18
  808. pack(64, 64, 56, 56, kernel, kernel / 2);
  809. pack(128, 128, 28, 28, kernel, kernel / 2);
  810. pack(256, 256, 14, 14, kernel, kernel / 2);
  811. pack(512, 512, 7, 7, kernel, kernel / 2);
  812. return args;
  813. }
  814. void benchmark_winograd(
  815. const char* algo_name, Handle* handle, size_t kernel, size_t pack_size) {
  816. auto&& args = get_winograd_benchmark_args(kernel, pack_size);
  817. using namespace conv_bias;
  818. constexpr size_t RUN = 10;
  819. Benchmarker<Convolution> benchmark(handle);
  820. benchmark.set_display(false);
  821. benchmark.set_times(RUN);
  822. Benchmarker<ConvBias> benchmark_winograd(handle);
  823. benchmark_winograd.set_display(false);
  824. benchmark_winograd.set_times(RUN);
  825. for (auto&& arg : args) {
  826. TensorLayout dst_layout;
  827. auto opr = handle->create_operator<ConvBias>();
  828. opr->param() = arg.param;
  829. opr->deduce_layout(
  830. {arg.src, dtype::Float32()}, {arg.filter, dtype::Float32()},
  831. {arg.bias, dtype::Float32()}, {}, dst_layout);
  832. //! dst.nr_elems * IC * FH * FW * 2
  833. float computations = dst_layout.total_nr_elems() * arg.filter[1] *
  834. arg.filter[2] * arg.filter[3] * 2.0 /
  835. (1024 * 1024 * 1024) * 1e3;
  836. param::Convolution conv_param;
  837. conv_param.pad_h = arg.param.pad_h;
  838. conv_param.pad_w = arg.param.pad_w;
  839. conv_param.stride_h = arg.param.stride_h;
  840. conv_param.stride_w = arg.param.stride_w;
  841. auto used =
  842. benchmark.set_param(conv_param).exec({arg.src, arg.filter, {}}) / RUN;
  843. benchmark_winograd.set_param(arg.param);
  844. auto used_winograd = algo_benchmark<ConvBias>(
  845. benchmark_winograd,
  846. {arg.src, arg.filter, {}, {}, {}}, algo_name) /
  847. RUN;
  848. printf("%s %s: normal: %f ms %f Gflops winograd: %f ms %f GFlops "
  849. "speedup: "
  850. "%f\n",
  851. arg.src.to_string().c_str(), arg.filter.to_string().c_str(), used,
  852. computations / used, used_winograd, computations / used_winograd,
  853. used / used_winograd);
  854. }
  855. }
  856. #endif // MEGDNN_WITH_BENCHMARK
  857. std::vector<conv_bias::TestArg> get_conv_bias_args(
  858. std::vector<size_t> kernel, size_t stride, bool no_pad, bool no_bias,
  859. bool no_nonlinemode, bool quantized_nlmod, bool only_broadcast_bias) {
  860. using namespace conv_bias;
  861. using Param = param::ConvBias;
  862. using NLMode = param::ConvBias::NonlineMode;
  863. std::vector<TestArg> args;
  864. auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  865. size_t stride, NLMode nlmode) {
  866. Param param;
  867. param.stride_h = stride;
  868. param.stride_w = stride;
  869. if (!no_pad) {
  870. param.pad_h = kernel / 2;
  871. param.pad_w = kernel / 2;
  872. } else {
  873. param.pad_h = 0;
  874. param.pad_w = 0;
  875. }
  876. param.nonlineMode = nlmode;
  877. args.emplace_back(
  878. param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, kernel, kernel},
  879. TensorShape{});
  880. if (!no_bias) {
  881. args.emplace_back(
  882. param, TensorShape{n, ic, h, w},
  883. TensorShape{oc, ic, kernel, kernel}, TensorShape{1, oc, 1, 1});
  884. if (!only_broadcast_bias) {
  885. args.emplace_back(
  886. param, TensorShape{n, ic, h, w},
  887. TensorShape{oc, ic, kernel, kernel},
  888. TensorShape{
  889. n, oc, (h + 2 * param.pad_h - kernel) / stride + 1,
  890. (w + 2 * param.pad_h - kernel) / stride + 1});
  891. }
  892. }
  893. param.sparse = param::ConvBias::Sparse::GROUP;
  894. args.emplace_back(
  895. param, TensorShape{n, 2 * ic, h, w},
  896. TensorShape{2, oc, ic, kernel, kernel}, TensorShape{});
  897. if (!no_bias) {
  898. if (!only_broadcast_bias) {
  899. args.emplace_back(
  900. param, TensorShape{n, 2 * ic, h, w},
  901. TensorShape{2, oc, ic, kernel, kernel},
  902. TensorShape{
  903. n, 2 * oc, (h + param.pad_h * 2 - kernel) / stride + 1,
  904. (w + param.pad_w * 2 - kernel) / stride + 1});
  905. }
  906. args.emplace_back(
  907. param, TensorShape{n, 2 * ic, h, w},
  908. TensorShape{2, oc, ic, kernel, kernel},
  909. TensorShape{1, 2 * oc, 1, 1});
  910. }
  911. };
  912. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  913. if (!no_nonlinemode) {
  914. nonlinemode.emplace_back(NLMode::RELU);
  915. nonlinemode.emplace_back(NLMode::H_SWISH);
  916. if (!quantized_nlmod) {
  917. nonlinemode.emplace_back(NLMode::SIGMOID);
  918. }
  919. }
  920. for (size_t n : {1, 2}) {
  921. for (auto nlmode : nonlinemode) {
  922. for (size_t ic : {1, 3, 7}) {
  923. for (size_t oc : {1, 3, 7}) {
  924. for (size_t size : {8, 16, 20}) {
  925. for (size_t kern : kernel) {
  926. pack(n, oc, ic, size, size, kern, stride, nlmode);
  927. }
  928. }
  929. }
  930. }
  931. }
  932. }
  933. return args;
  934. }
  935. std::vector<megdnn::test::conv_bias::TestArg> get_conv_bias_1x1_args(
  936. bool no_bias, bool no_nonlinemode, bool quantized_nlmod,
  937. bool only_broadcast_bias) {
  938. using namespace conv_bias;
  939. using Param = param::ConvBias;
  940. using NLMode = param::ConvBias::NonlineMode;
  941. using CONVMode = param::ConvBias::Mode;
  942. std::vector<TestArg> args;
  943. auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h, size_t stride,
  944. NLMode nlmode, CONVMode convmode) {
  945. Param param;
  946. param.stride_h = stride;
  947. param.stride_w = stride;
  948. param.pad_h = 0;
  949. param.pad_w = 0;
  950. param.mode = convmode;
  951. param.nonlineMode = nlmode;
  952. args.emplace_back(
  953. param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, 1, 1},
  954. TensorShape{});
  955. if (!no_bias) {
  956. args.emplace_back(
  957. param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, 1, 1},
  958. TensorShape{1, oc, 1, 1});
  959. if (!only_broadcast_bias) {
  960. args.emplace_back(
  961. param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, 1, 1},
  962. TensorShape{n, oc, (h - 1) / stride + 1, (w - 1) / stride + 1});
  963. }
  964. }
  965. param.sparse = param::ConvBias::Sparse::GROUP;
  966. args.emplace_back(
  967. param, TensorShape{n, 2 * ic, h, w}, TensorShape{2, oc, ic, 1, 1},
  968. TensorShape{});
  969. if (!no_bias) {
  970. args.emplace_back(
  971. param, TensorShape{n, 2 * ic, h, w}, TensorShape{2, oc, ic, 1, 1},
  972. TensorShape{1, 2 * oc, 1, 1});
  973. if (!only_broadcast_bias) {
  974. args.emplace_back(
  975. param, TensorShape{n, 2 * ic, h, w},
  976. TensorShape{2, oc, ic, 1, 1},
  977. TensorShape{
  978. n, 2 * oc, (h - 1) / stride + 1, (w - 1) / stride + 1});
  979. }
  980. }
  981. };
  982. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  983. if (!no_nonlinemode) {
  984. nonlinemode.emplace_back(NLMode::RELU);
  985. nonlinemode.emplace_back(NLMode::H_SWISH);
  986. if (!quantized_nlmod) {
  987. nonlinemode.emplace_back(NLMode::SIGMOID);
  988. }
  989. }
  990. std::vector<CONVMode> convmodes{
  991. param::ConvBias::Mode::CONVOLUTION,
  992. param::ConvBias::Mode::CROSS_CORRELATION};
  993. for (size_t n : {1, 2})
  994. for (size_t oc : {1, 9, 33})
  995. for (size_t ic : {1, 16, 64})
  996. for (size_t size : {1, 7, 14, 28})
  997. for (auto nlmode : nonlinemode)
  998. for (auto convmode : convmodes) {
  999. pack(n, oc, ic, size, size, 1, nlmode, convmode);
  1000. }
  1001. return args;
  1002. }
  1003. void check_conv_bias(
  1004. std::vector<conv_bias::TestArg> args, Handle* handle, const char* algo_name) {
  1005. using namespace conv_bias;
  1006. Checker<ConvBias> checker(handle);
  1007. checker.set_before_exec_callback(
  1008. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1009. for (auto&& arg : args) {
  1010. checker.set_param(arg.param).execs({arg.src, arg.filter, arg.bias, {}, {}});
  1011. }
  1012. }
  1013. void checker_conv_bias_int8x8x16(
  1014. std::vector<conv_bias::TestArg> args, Handle* handle, const char* algo_name) {
  1015. using namespace conv_bias;
  1016. Checker<ConvBias> checker(handle);
  1017. checker.set_before_exec_callback(
  1018. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1019. checker.set_dtype(0, dtype::Int8());
  1020. checker.set_dtype(1, dtype::Int8());
  1021. checker.set_dtype(2, dtype::Int16());
  1022. checker.set_dtype(4, dtype::Int16());
  1023. for (auto&& arg : args) {
  1024. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  1025. }
  1026. }
  1027. void check_conv_bias_preprocess(
  1028. std::vector<conv_bias::TestArg> args, Handle* handle, RNG* rng, float epsilon,
  1029. DType type0, DType type1, DType type2, DType type3, const char* algo_name) {
  1030. using namespace conv_bias;
  1031. Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(handle);
  1032. checker.set_dtype(0, type0);
  1033. checker.set_dtype(1, type1);
  1034. checker.set_dtype(2, type2);
  1035. checker.set_dtype(4, type3);
  1036. checker.set_epsilon(epsilon);
  1037. if (NULL != rng) {
  1038. checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng).set_rng(3, rng);
  1039. }
  1040. checker.set_before_exec_callback(
  1041. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1042. for (auto&& arg : args) {
  1043. checker.set_param(arg.param).execs({arg.src, arg.filter, arg.bias, {}, {}});
  1044. }
  1045. }
  1046. void checker_conv_bias_common(
  1047. std::vector<conv_bias::TestArg> args, Handle* handle, RNG* rng, float epsilon,
  1048. DType type0, DType type1, DType type2, DType type3, const char* algo_name) {
  1049. using namespace conv_bias;
  1050. Checker<ConvBias> checker(handle);
  1051. checker.set_before_exec_callback(
  1052. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1053. checker.set_dtype(0, type0);
  1054. checker.set_dtype(1, type1);
  1055. checker.set_dtype(2, type2);
  1056. checker.set_dtype(4, type3);
  1057. checker.set_epsilon(epsilon);
  1058. if (NULL != rng) {
  1059. checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng).set_rng(3, rng);
  1060. }
  1061. for (auto&& arg : args) {
  1062. checker.set_param(arg.param).execs({arg.src, arg.filter, arg.bias, {}, {}});
  1063. }
  1064. }
  1065. void checker_conv_bias_mul_int8x8x32(
  1066. std::vector<conv_bias::TestArg> args, Handle* handle, const char* algo_name) {
  1067. using namespace conv_bias;
  1068. float epsilon = 0.001;
  1069. #if MEGDNN_ARMV7
  1070. epsilon = 1.0;
  1071. #endif
  1072. Checker<ConvBias> checker(handle);
  1073. checker.set_before_exec_callback(
  1074. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1075. checker.set_dtype(0, dtype::Int8());
  1076. checker.set_dtype(1, dtype::Int8());
  1077. checker.set_dtype(2, dtype::Int32());
  1078. checker.set_dtype(4, dtype::Int32());
  1079. checker.set_epsilon(epsilon);
  1080. for (auto&& arg : args) {
  1081. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  1082. }
  1083. UniformIntRNG rng{-50, 50};
  1084. for (auto&& arg : args) {
  1085. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  1086. .set_dtype(1, dtype::QuantizedS8(2.5f))
  1087. .set_dtype(2, dtype::QuantizedS32(6.25f))
  1088. .set_dtype(4, dtype::QuantizedS32(6.25f))
  1089. .set_rng(0, &rng)
  1090. .set_rng(1, &rng)
  1091. .set_rng(2, &rng)
  1092. .set_param(arg.param)
  1093. .set_epsilon(epsilon)
  1094. .execs({arg.src, arg.filter, {}, {}, {}});
  1095. }
  1096. }
  1097. void checker_conv_bias_int8x8x32_preprocess(
  1098. std::vector<conv_bias::TestArg> args, Handle* handle, const char* algo_name) {
  1099. using namespace conv_bias;
  1100. Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(handle);
  1101. checker.set_before_exec_callback(
  1102. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1103. checker.set_dtype(0, dtype::Int8());
  1104. checker.set_dtype(1, dtype::Int8());
  1105. checker.set_dtype(2, dtype::Int32());
  1106. checker.set_dtype(4, dtype::Int32());
  1107. for (auto&& arg : args) {
  1108. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  1109. }
  1110. UniformIntRNG rng{-50, 50};
  1111. for (auto&& arg : args) {
  1112. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  1113. .set_dtype(1, dtype::QuantizedS8(2.5f))
  1114. .set_dtype(2, dtype::QuantizedS32(6.25f))
  1115. .set_dtype(4, dtype::QuantizedS32(6.25f))
  1116. .set_rng(0, &rng)
  1117. .set_rng(1, &rng)
  1118. .set_rng(2, &rng)
  1119. .set_param(arg.param)
  1120. .execs({arg.src, arg.filter, {}, {}, {}});
  1121. }
  1122. }
  1123. std::vector<conv_bias::TestArg> get_nchw44_conv_bias_args(
  1124. std::vector<size_t> kernel_vec,
  1125. std::vector<param::ConvBias::NonlineMode> nlmode_vec,
  1126. std::vector<megdnn::BiasMode> biasmode_vec, size_t stride, bool no_pad,
  1127. bool is_input_nchw, bool is_nchw44_dot) {
  1128. using namespace conv_bias;
  1129. using NLMode = param::ConvBias::NonlineMode;
  1130. std::vector<TestArg> args;
  1131. MEGDNN_MARK_USED_VAR(no_pad);
  1132. auto pack = [&](size_t n, size_t oc, size_t ic, size_t h, size_t w, size_t kernel,
  1133. size_t stride, size_t group, NLMode nlmode,
  1134. megdnn::BiasMode bias_mode, int any_pad = -1) {
  1135. constexpr int pack_c = 4;
  1136. const size_t pad = any_pad >= 0 ? any_pad : kernel / 2;
  1137. auto oc_per_group = oc / group;
  1138. auto ic_per_group = ic / group;
  1139. bool ok_group = (oc % group == 0 && ic % group == 0) &&
  1140. oc_per_group % pack_c == 0 && oc_per_group > 0 &&
  1141. ic_per_group > 0;
  1142. bool nchw_disable = group > 1 || ic_per_group >= 4;
  1143. bool nchw44_disable = ic_per_group % pack_c != 0;
  1144. bool invalid_pad = (w + 2 * pad < kernel) || (h + 2 * pad < kernel);
  1145. if (!(ok_group) || invalid_pad) {
  1146. return;
  1147. }
  1148. if ((is_input_nchw && nchw_disable) || (!is_input_nchw && nchw44_disable)) {
  1149. return;
  1150. }
  1151. size_t kernel_h = kernel;
  1152. size_t kernel_w = kernel;
  1153. param::ConvBias param;
  1154. if (!is_nchw44_dot) {
  1155. param.format = param::ConvBias::Format::NCHW44;
  1156. } else {
  1157. param.format = param::ConvBias::Format::NCHW44_DOT;
  1158. }
  1159. param.stride_h = stride;
  1160. param.stride_w = stride;
  1161. param.pad_h = pad;
  1162. param.pad_w = pad;
  1163. param.nonlineMode = nlmode;
  1164. auto src_tensor_shape = TensorShape{n, ic / pack_c, h, w, pack_c};
  1165. auto weight_tensor_shape = TensorShape{oc / pack_c, ic / pack_c, kernel_h,
  1166. kernel_w, pack_c, pack_c};
  1167. auto bias_tensor_shape = TensorShape{};
  1168. if (bias_mode == megdnn::BiasMode::BROADCAST_CHANNEL_BIAS) {
  1169. bias_tensor_shape = {1, oc / pack_c, 1, 1, pack_c};
  1170. } else if (bias_mode == megdnn::BiasMode::BIAS) {
  1171. bias_tensor_shape = {
  1172. n, oc / pack_c, (h + 2 * pad - kernel) / stride + 1,
  1173. (w + 2 * pad - kernel) / stride + 1, pack_c};
  1174. }
  1175. if (group == 1) {
  1176. param.sparse = param::ConvBias::Sparse::DENSE;
  1177. } else if (group > 1 && ic / group == 1 && oc / group == 1) {
  1178. megdnn_assert(0, "not support channel wise");
  1179. param.sparse = param::ConvBias::Sparse::GROUP;
  1180. weight_tensor_shape =
  1181. TensorShape{group / pack_c, 1, 1, kernel_h, kernel_w, pack_c};
  1182. } else if (
  1183. group > 1 && oc_per_group % pack_c == 0 && oc / group > 0 &&
  1184. ic_per_group % pack_c == 0 && ic / group > 0) {
  1185. param.sparse = param::ConvBias::Sparse::GROUP;
  1186. weight_tensor_shape = TensorShape{group,
  1187. oc_per_group / pack_c,
  1188. ic_per_group / pack_c,
  1189. kernel_h,
  1190. kernel_w,
  1191. pack_c,
  1192. pack_c};
  1193. }
  1194. if (is_input_nchw) {
  1195. src_tensor_shape = TensorShape{n, ic, h, w};
  1196. weight_tensor_shape =
  1197. TensorShape{oc / pack_c, kernel_h, kernel_w, ic, pack_c};
  1198. }
  1199. args.emplace_back(
  1200. param, src_tensor_shape, weight_tensor_shape, bias_tensor_shape);
  1201. };
  1202. for (auto bias : biasmode_vec)
  1203. for (auto nlmode : nlmode_vec)
  1204. for (size_t n : {1, 2})
  1205. for (size_t kernel : kernel_vec)
  1206. for (size_t oc : {4, 12})
  1207. for (size_t ic : {1, 3, 4, 12})
  1208. for (size_t h : {1, 3, 12})
  1209. for (size_t w : {1, 16, 23}) {
  1210. for (size_t group = 1;
  1211. group <= std::min(std::min(oc, ic), 4_z);
  1212. ++group) {
  1213. if (kernel != 1 && (h == 1 || w == 1)) {
  1214. continue;
  1215. }
  1216. pack(n, oc, ic, h, w, kernel, stride, group,
  1217. nlmode, bias);
  1218. }
  1219. }
  1220. return args;
  1221. }
  1222. std::vector<conv_bias::TestArg> get_nchw88_conv_bias_args(
  1223. std::vector<size_t> kernel_vec,
  1224. std::vector<param::ConvBias::NonlineMode> nlmode_vec,
  1225. std::vector<megdnn::BiasMode> biasmode_vec, size_t stride) {
  1226. using namespace conv_bias;
  1227. using NLMode = param::ConvBias::NonlineMode;
  1228. std::vector<TestArg> args;
  1229. auto pack = [&](size_t n, size_t oc, size_t ic, size_t h, size_t w, size_t kernel,
  1230. size_t stride, size_t group, NLMode nlmode,
  1231. megdnn::BiasMode bias_mode) {
  1232. constexpr int pack_c = 8;
  1233. const size_t pad = kernel / 2;
  1234. auto oc_per_group = oc / group;
  1235. auto ic_per_group = ic / group;
  1236. megdnn_assert(
  1237. oc_per_group % pack_c == 0 && ic_per_group % pack_c == 0,
  1238. "ocpg/icpg not divided by 8");
  1239. size_t kernel_h = kernel;
  1240. size_t kernel_w = kernel;
  1241. param::ConvBias param;
  1242. param.format = param::ConvBias::Format::NCHW88;
  1243. param.stride_h = stride;
  1244. param.stride_w = stride;
  1245. param.pad_h = pad;
  1246. param.pad_w = pad;
  1247. param.nonlineMode = nlmode;
  1248. auto src_tensor_shape = TensorShape{n, ic / pack_c, h, w, pack_c};
  1249. auto weight_tensor_shape = TensorShape{oc / pack_c, ic / pack_c, kernel_h,
  1250. kernel_w, pack_c, pack_c};
  1251. auto bias_tensor_shape = TensorShape{};
  1252. if (bias_mode == megdnn::BiasMode::BROADCAST_CHANNEL_BIAS) {
  1253. bias_tensor_shape = {1, oc / pack_c, 1, 1, pack_c};
  1254. } else if (bias_mode == megdnn::BiasMode::BIAS) {
  1255. bias_tensor_shape = {
  1256. n, oc / pack_c, (h + 2 * pad - kernel) / stride + 1,
  1257. (w + 2 * pad - kernel) / stride + 1, pack_c};
  1258. }
  1259. if (group == 1) {
  1260. param.sparse = param::ConvBias::Sparse::DENSE;
  1261. } else {
  1262. param.sparse = param::ConvBias::Sparse::GROUP;
  1263. weight_tensor_shape = TensorShape{group,
  1264. oc_per_group / pack_c,
  1265. ic_per_group / pack_c,
  1266. kernel_h,
  1267. kernel_w,
  1268. pack_c,
  1269. pack_c};
  1270. }
  1271. args.emplace_back(
  1272. param, src_tensor_shape, weight_tensor_shape, bias_tensor_shape);
  1273. };
  1274. for (auto bias : biasmode_vec)
  1275. for (auto nlmode : nlmode_vec)
  1276. for (size_t n : {1, 2})
  1277. for (size_t kernel : kernel_vec)
  1278. for (size_t oc : {8, 16})
  1279. for (size_t ic : {8, 16, 24})
  1280. for (size_t h : {1, 3, 12})
  1281. for (size_t w : {1, 8, 13}) {
  1282. for (size_t group = 1; group < oc / 8; ++group) {
  1283. if (ic % (group * 8) || oc % (group * 8)) {
  1284. continue;
  1285. }
  1286. if (kernel < h || kernel < w) {
  1287. continue;
  1288. }
  1289. pack(n, oc, ic, h, w, kernel, stride, group,
  1290. nlmode, bias);
  1291. }
  1292. }
  1293. return args;
  1294. }
  1295. } // namespace conv_bias
  1296. } // namespace test
  1297. } // namespace megdnn
  1298. // vim: syntax=cpp.doxygen