You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

conv_bias.cpp 62 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589
  1. #include "test/common/conv_bias.h"
  2. #include "megdnn/opr_param_defs.h"
  3. #include "src/common/utils.h"
  4. #include "test/common/benchmarker.h"
  5. namespace megdnn {
  6. namespace test {
  7. namespace conv_bias {
  8. namespace {
  9. void convert_arg_from_nchw4_to_chwn4(TestArg& arg) {
  10. arg.param.format = param::ConvBias::Format::CHWN4;
  11. arg.src = TensorShape{arg.src[1], arg.src[2], arg.src[3], arg.src[0], 4};
  12. arg.filter =
  13. TensorShape{arg.filter[1], arg.filter[2], arg.filter[3], arg.filter[0], 4};
  14. arg.bias = TensorShape{arg.bias[1], arg.bias[2], arg.bias[3], arg.bias[0], 4};
  15. }
  16. } // namespace
  17. std::vector<TestArg> get_args() {
  18. std::vector<TestArg> args;
  19. param::ConvBias cur_param;
  20. using NLMode = param::ConvBias::NonlineMode;
  21. // clang-format off
  22. for (auto nlmode :
  23. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  24. for (size_t i : {9, 63}) {
  25. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  26. cur_param.nonlineMode = nlmode;
  27. // fallback case
  28. args.emplace_back(cur_param, TensorShape{10, 1, i, i},
  29. TensorShape{1, 1, 8, 8}, TensorShape{1, 1, 1, 1});
  30. args.emplace_back(cur_param, TensorShape{10, 4, i, i},
  31. TensorShape{3, 4, 4, 4}, TensorShape{1, 3, 1, 1});
  32. cur_param.mode = param::ConvBias::Mode::CONVOLUTION;
  33. args.emplace_back(cur_param, TensorShape{10, 4, i, i},
  34. TensorShape{1, 4, 3, 3}, TensorShape{1, 1, 1, 1});
  35. args.emplace_back(cur_param, TensorShape{1, 4, i, i},
  36. TensorShape{5, 4, 3, 3}, TensorShape{1, 5, 1, 1});
  37. } }
  38. // clang-format on
  39. return args;
  40. }
  41. std::vector<TestArg> get_chanwise_args() {
  42. std::vector<TestArg> args;
  43. param::ConvBias cur_param;
  44. using NLMode = param::ConvBias::NonlineMode;
  45. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  46. cur_param.sparse = ConvBias::Param::Sparse::GROUP;
  47. for (auto nlmode :
  48. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  49. cur_param.nonlineMode = nlmode;
  50. // simple case
  51. for (uint32_t s : {1, 2})
  52. for (uint32_t p : {0, 1, 2, 3})
  53. for (size_t f : {2, 3, 5, 7})
  54. for (size_t ocpg : {1, 3}) {
  55. cur_param.pad_h = cur_param.pad_w = p;
  56. cur_param.stride_h = cur_param.stride_w = s;
  57. args.emplace_back(
  58. cur_param, TensorShape{2, 3, 16, 16},
  59. TensorShape{3, ocpg, 1, f, f},
  60. TensorShape{1, 3 * ocpg, 1, 1});
  61. }
  62. args.emplace_back(
  63. cur_param, TensorShape{32, 12, 20, 10}, TensorShape{12, 2, 1, 4, 5},
  64. TensorShape{1, 24, 1, 1});
  65. // padding larger than kern
  66. args.emplace_back(
  67. cur_param, TensorShape{32, 12, 20, 10}, TensorShape{12, 2, 1, 4, 5},
  68. TensorShape{1, 24, 1, 1});
  69. }
  70. return args;
  71. }
  72. std::vector<TestArg> get_args_1x1() {
  73. std::vector<TestArg> args;
  74. param::ConvBias cur_param;
  75. using NLMode = param::ConvBias::NonlineMode;
  76. for (auto nlmode :
  77. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  78. cur_param.nonlineMode = nlmode;
  79. for (size_t i : {16, 19}) {
  80. cur_param.mode = param::ConvBias::Mode::CONVOLUTION;
  81. args.emplace_back(
  82. cur_param, TensorShape{2, 20, i, i + 1}, TensorShape{30, 20, 1, 1},
  83. TensorShape{1, 30, 1, 1});
  84. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  85. args.emplace_back(
  86. cur_param, TensorShape{2, 20, i, i + 1}, TensorShape{30, 20, 1, 1},
  87. TensorShape{1, 30, 1, 1});
  88. }
  89. }
  90. return args;
  91. }
  92. std::vector<TestArg> get_winograd_args(size_t kernel_size) {
  93. std::vector<TestArg> args;
  94. param::ConvBias cur_param;
  95. using NLMode = param::ConvBias::NonlineMode;
  96. // clang-format off
  97. for (auto nlmode :
  98. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  99. for (size_t ic : {1, 3, 4, 7}) {
  100. for (size_t oc : {1, 3, 4, 7}) {
  101. for (size_t i : {9, 63}) {
  102. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  103. cur_param.nonlineMode = nlmode;
  104. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  105. cur_param.pad_h = cur_param.pad_w = 0;
  106. //! no bias
  107. args.emplace_back(cur_param, TensorShape{1, ic, i, i},
  108. TensorShape{oc, ic, kernel_size, kernel_size},
  109. TensorShape{});
  110. //! bias
  111. args.emplace_back(
  112. cur_param, TensorShape{2, ic, i, i},
  113. TensorShape{oc, ic, kernel_size, kernel_size},
  114. TensorShape{2, oc, (i + cur_param.pad_h * 2 - kernel_size) + 1,
  115. (i + cur_param.pad_w * 2 - kernel_size) + 1});
  116. //! bias channel
  117. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  118. TensorShape{oc, ic, kernel_size, kernel_size},
  119. TensorShape{1, oc, 1, 1});
  120. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  121. args.emplace_back(
  122. cur_param, TensorShape{2, 2 * ic, i, i},
  123. TensorShape{2, oc, ic, kernel_size, kernel_size},
  124. TensorShape{2, 2 * oc,
  125. (i + cur_param.pad_h * 2 - kernel_size) + 1,
  126. (i + cur_param.pad_w * 2 - kernel_size) + 1});
  127. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  128. TensorShape{2, oc, ic, kernel_size, kernel_size},
  129. TensorShape{1, 2 * oc, 1, 1});
  130. } } } }
  131. // clang-format on
  132. //! test for multi-thread OC parallel
  133. for (size_t i : {9, 63}) {
  134. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  135. cur_param.pad_h = cur_param.pad_w = 1;
  136. args.emplace_back(
  137. cur_param, TensorShape{1, 8, i, i},
  138. TensorShape{128, 8, kernel_size, kernel_size},
  139. TensorShape{1, 128, 1, 1});
  140. args.emplace_back(
  141. cur_param, TensorShape{2, 8, i, i},
  142. TensorShape{128, 8, kernel_size, kernel_size},
  143. TensorShape{1, 128, 1, 1});
  144. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  145. args.emplace_back(
  146. cur_param, TensorShape{2, 2 * 8, i, i},
  147. TensorShape{2, 128, 8, kernel_size, kernel_size},
  148. TensorShape{1, 2 * 128, 1, 1});
  149. }
  150. return args;
  151. }
  152. std::vector<TestArg> get_winograd_mk_packed_args(size_t pack_size) {
  153. std::vector<TestArg> args;
  154. param::ConvBias cur_param;
  155. using NLMode = param::ConvBias::NonlineMode;
  156. // clang-format off
  157. for (auto nlmode :
  158. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  159. for (size_t ic : {pack_size, 2 * pack_size}) {
  160. for (size_t oc : {pack_size, 2 * pack_size}) {
  161. for (size_t i : {9, 63}) {
  162. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  163. cur_param.nonlineMode = nlmode;
  164. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  165. cur_param.pad_h = cur_param.pad_w = 1;
  166. args.emplace_back(cur_param, TensorShape{1, pack_size, 3, 3},
  167. TensorShape{pack_size, pack_size, 3, 3},
  168. TensorShape{1, pack_size, 1, 1});
  169. //! no bias
  170. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  171. TensorShape{oc, ic, 3, 3}, TensorShape{});
  172. //! bias
  173. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  174. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  175. //! bias channel
  176. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  177. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  178. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  179. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  180. TensorShape{2, oc, ic, 3, 3},
  181. TensorShape{2, 2 * oc, i, i});
  182. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  183. TensorShape{2, oc, ic, 3, 3},
  184. TensorShape{1, 2 * oc, 1, 1});
  185. } } } }
  186. // clang-format on
  187. //! test for multi-thread OC parallel
  188. for (size_t i : {9, 63}) {
  189. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  190. cur_param.pad_h = cur_param.pad_w = 1;
  191. args.emplace_back(
  192. cur_param, TensorShape{1, 8, i, i}, TensorShape{128, 8, 3, 3},
  193. TensorShape{1, 128, 1, 1});
  194. args.emplace_back(
  195. cur_param, TensorShape{2, 8, i, i}, TensorShape{128, 8, 3, 3},
  196. TensorShape{1, 128, 1, 1});
  197. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  198. args.emplace_back(
  199. cur_param, TensorShape{2, 2 * 8, i, i}, TensorShape{2, 128, 8, 3, 3},
  200. TensorShape{1, 2 * 128, 1, 1});
  201. }
  202. return args;
  203. }
  204. std::vector<TestArg> get_quantized_winograd_mk_packed_args(
  205. size_t pack_size, bool compute_float32) {
  206. std::vector<TestArg> args;
  207. param::ConvBias cur_param;
  208. using NLMode = param::ConvBias::NonlineMode;
  209. // clang-format off
  210. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  211. for (size_t ic : {pack_size, 2 * pack_size}) {
  212. for (size_t oc : {pack_size, 2 * pack_size}) {
  213. for (size_t i : {9, 63}) {
  214. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  215. cur_param.nonlineMode = nlmode;
  216. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  217. cur_param.pad_h = cur_param.pad_w = 1;
  218. if(compute_float32){
  219. cur_param.compute_mode = param::ConvBias::ComputeMode::FLOAT32;
  220. }
  221. args.emplace_back(cur_param, TensorShape{1, pack_size, 3, 3},
  222. TensorShape{pack_size, pack_size, 3, 3},
  223. TensorShape{1, pack_size, 1, 1});
  224. //! no bias
  225. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  226. TensorShape{oc, ic, 3, 3}, TensorShape{});
  227. //! bias
  228. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  229. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  230. //! bias channel
  231. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  232. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  233. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  234. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  235. TensorShape{2, oc, ic, 3, 3},
  236. TensorShape{2, 2 * oc, i, i});
  237. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  238. TensorShape{2, oc, ic, 3, 3},
  239. TensorShape{1, 2 * oc, 1, 1});
  240. } } } }
  241. // clang-format on
  242. //! test for multi-thread OC parallel
  243. for (size_t i : {9, 63}) {
  244. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  245. cur_param.pad_h = cur_param.pad_w = 1;
  246. args.emplace_back(
  247. cur_param, TensorShape{1, 8, i, i}, TensorShape{128, 8, 3, 3},
  248. TensorShape{1, 128, 1, 1});
  249. args.emplace_back(
  250. cur_param, TensorShape{2, 8, i, i}, TensorShape{128, 8, 3, 3},
  251. TensorShape{1, 128, 1, 1});
  252. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  253. args.emplace_back(
  254. cur_param, TensorShape{2, 2 * 8, i, i}, TensorShape{2, 128, 8, 3, 3},
  255. TensorShape{1, 2 * 128, 1, 1});
  256. }
  257. return args;
  258. }
  259. std::vector<TestArg> get_quantized_args_with_nlmode(
  260. param::ConvBias::NonlineMode nlmode) {
  261. std::vector<TestArg> args;
  262. param::ConvBias cur_param;
  263. // clang-format off
  264. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION,
  265. param::ConvBias::Mode::CONVOLUTION}) {
  266. for (size_t ic : {1, 2, 3, 4, 5, 7}) {
  267. for (size_t oc : {1, 2, 3, 4, 5, 7}) {
  268. for (size_t i : {9, 63}) {
  269. cur_param.mode = mode;
  270. cur_param.nonlineMode = nlmode;
  271. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  272. cur_param.pad_h = cur_param.pad_w = 1;
  273. //! no bias
  274. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  275. TensorShape{oc, ic, 3, 3}, TensorShape{});
  276. //! bias
  277. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  278. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  279. //! bias channel
  280. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  281. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  282. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  283. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  284. TensorShape{2, oc, ic, 3, 3},
  285. TensorShape{2, 2 * oc, i, i});
  286. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  287. TensorShape{2, oc, ic, 3, 3},
  288. TensorShape{1, 2 * oc, 1, 1});
  289. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  290. cur_param.pad_h = cur_param.pad_w = 0;
  291. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  292. TensorShape{oc, ic, 1, 1}, TensorShape{});
  293. } } } }
  294. // clang-format on
  295. return args;
  296. }
  297. std::vector<TestArg> get_quantized_args() {
  298. using NLMode = param::ConvBias::NonlineMode;
  299. auto arg_p1 = get_quantized_args_with_nlmode(NLMode::IDENTITY),
  300. arg_p2 = get_quantized_args_with_nlmode(NLMode::RELU),
  301. arg_p3 = get_quantized_args_with_nlmode(NLMode::H_SWISH);
  302. std::vector<TestArg> args;
  303. args.insert(args.end(), arg_p1.begin(), arg_p1.end());
  304. args.insert(args.end(), arg_p2.begin(), arg_p2.end());
  305. args.insert(args.end(), arg_p3.begin(), arg_p3.end());
  306. return args;
  307. }
  308. std::vector<TestArg> get_int8_nchw4_args(size_t kernel_size) {
  309. std::vector<TestArg> args;
  310. param::ConvBias cur_param;
  311. using NLMode = param::ConvBias::NonlineMode;
  312. // clang-format off
  313. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  314. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  315. for (size_t b : {64, 16}) {
  316. for (size_t ic : {16, 32}) {
  317. for (size_t oc : {16, 32}) {
  318. for (size_t h : {8}) {
  319. for (size_t w : {8, 11}) {
  320. for (int p : {0, static_cast<int>(kernel_size / 2)}) {
  321. for (size_t s : {2, 1}) {
  322. if (kernel_size == 7) {
  323. b = std::min(b, 32_z);
  324. }
  325. size_t f = kernel_size;
  326. cur_param.mode = mode;
  327. cur_param.nonlineMode = nlmode;
  328. cur_param.format = param::ConvBias::Format::NCHW4;
  329. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  330. cur_param.pad_h = cur_param.pad_w = p;
  331. cur_param.stride_h = cur_param.stride_w = s;
  332. //! bias channel
  333. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  334. TensorShape{oc, ic / 4, f, f, 4},
  335. TensorShape{1, oc / 4, 1, 1, 4});
  336. } } } } } } } } }
  337. // clang-format on
  338. return args;
  339. }
  340. std::vector<TestArg> get_int8_nchw44_args(
  341. size_t kernel_size, size_t pack_size, bool compute_float32, bool group_mode) {
  342. std::vector<TestArg> args;
  343. param::ConvBias cur_param;
  344. megdnn_assert(pack_size > 0, "not support pack_size");
  345. megdnn_assert(kernel_size > 0, "not support kernel_size");
  346. using NLMode = param::ConvBias::NonlineMode;
  347. // clang-format off
  348. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU, NLMode::H_SWISH}) {
  349. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  350. for (size_t b : {1,2}) {
  351. for (size_t ic : {8,16}) {
  352. for (size_t oc : {8,16}) {
  353. for (size_t h : {9,23}) {
  354. for (size_t w : {9,23}) {
  355. for (int p : {0, static_cast<int>(kernel_size / 2)}) {
  356. for (size_t s : {1}) {
  357. if (kernel_size == 7) {
  358. b = std::min(b, 32_z);
  359. }
  360. size_t f = kernel_size;
  361. cur_param.mode = mode;
  362. cur_param.nonlineMode = nlmode;
  363. if (pack_size == 4){
  364. cur_param.format = param::ConvBias::Format::NCHW44;
  365. } else if(pack_size == 8){
  366. cur_param.format = param::ConvBias::Format::NCHW88;
  367. }
  368. if(compute_float32){
  369. cur_param.compute_mode =
  370. param::ConvBias::ComputeMode::FLOAT32;
  371. }
  372. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  373. cur_param.pad_h = cur_param.pad_w = p;
  374. cur_param.stride_h = cur_param.stride_w = s;
  375. if (!group_mode) {
  376. //! no bias
  377. args.emplace_back(cur_param,
  378. TensorShape{b, ic / pack_size, h, w, pack_size},
  379. TensorShape{oc / pack_size, ic / pack_size, f, f,
  380. pack_size, pack_size},
  381. TensorShape{});
  382. //! bias channel
  383. args.emplace_back(cur_param,
  384. TensorShape{b, ic / pack_size, h, w, pack_size},
  385. TensorShape{oc / pack_size, ic / pack_size, f, f,
  386. pack_size, pack_size},
  387. TensorShape{1, oc / pack_size, 1, 1, pack_size});
  388. //! bias
  389. args.emplace_back(
  390. cur_param, TensorShape{b, ic / pack_size, h, w, pack_size},
  391. TensorShape{oc / pack_size, ic / pack_size, f, f, pack_size,
  392. pack_size},
  393. TensorShape{b, oc / pack_size, (h - f + 2 * p) / s + 1,
  394. (w - f + 2 * p) / s + 1, pack_size});
  395. } else {
  396. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  397. args.emplace_back(
  398. cur_param,
  399. TensorShape{2, 2 * ic / pack_size, h, w, pack_size},
  400. TensorShape{2, oc / pack_size, ic / pack_size, 3, 3,
  401. pack_size, pack_size},
  402. TensorShape{2, 2 * oc / pack_size, (h - f + 2 * p) / s + 1,
  403. (w - f + 2 * p) / s + 1, pack_size});
  404. args.emplace_back(
  405. cur_param,
  406. TensorShape{2, 2 * ic / pack_size, h, w, pack_size},
  407. TensorShape{2, oc / pack_size, ic / pack_size, f, f,
  408. pack_size, pack_size},
  409. TensorShape{1, 2 * oc / pack_size, 1, 1, pack_size});
  410. args.emplace_back(
  411. cur_param,
  412. TensorShape{2, 2 * ic / pack_size, h, w, pack_size},
  413. TensorShape{2, oc / pack_size, ic / pack_size, f, f,
  414. pack_size, pack_size},
  415. TensorShape{});
  416. }
  417. } } } } } } } } }
  418. // clang-format on
  419. return args;
  420. }
  421. std::vector<TestArg> get_int8_nchw4_args_check_bounds(size_t kernel_size) {
  422. std::vector<TestArg> args;
  423. param::ConvBias cur_param;
  424. using NLMode = param::ConvBias::NonlineMode;
  425. // clang-format off
  426. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  427. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  428. for (size_t b : {7, 8, 4, 1}) {
  429. for (size_t ic : {16, 32}) {
  430. for (size_t oc : {16, 8, 4}) {
  431. for (size_t h : {8}) {
  432. for (size_t w : {8, 11}) {
  433. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  434. for (size_t s : {1, 2}) {
  435. size_t f = kernel_size;
  436. cur_param.mode = mode;
  437. cur_param.nonlineMode = nlmode;
  438. cur_param.format = param::ConvBias::Format::NCHW4;
  439. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  440. cur_param.pad_h = cur_param.pad_w = p;
  441. cur_param.stride_h = cur_param.stride_w = s;
  442. //! bias channel
  443. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  444. TensorShape{oc, ic / 4, f, f, 4},
  445. TensorShape{1, oc / 4, 1, 1, 4});
  446. } } } } } } } } }
  447. // clang-format on
  448. return args;
  449. }
  450. std::vector<TestArg> get_int4_nchw64_args_ptx(size_t kernel_size, bool is_uint4) {
  451. std::vector<TestArg> args;
  452. param::ConvBias cur_param;
  453. using NLMode = param::ConvBias::NonlineMode;
  454. // clang-format off
  455. for (auto nlmode : {NLMode::RELU, NLMode::IDENTITY}) {//{NLMode::H_SWISH} are not currently supported
  456. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  457. for (size_t b : {3, 7}) {
  458. for (size_t ic : {64, 128}) {
  459. for (size_t oc : {64, 320}) {
  460. for (size_t h : {13}) {
  461. for (size_t w : {28}) {
  462. for (int p : {0, static_cast<int>(kernel_size / 2)}) {
  463. for (size_t s : {1, 2}) {
  464. if (is_uint4 && nlmode == NLMode::H_SWISH) continue;
  465. size_t f = kernel_size;
  466. cur_param.mode = mode;
  467. cur_param.nonlineMode = nlmode;
  468. cur_param.format = param::ConvBias::Format::NCHW64;
  469. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  470. cur_param.pad_h = cur_param.pad_w = p;
  471. cur_param.stride_h = cur_param.stride_w = s;
  472. //! bias channel
  473. args.emplace_back(cur_param, TensorShape{b, ic / 64, h, w, 64},
  474. TensorShape{oc, ic / 64, f, f, 64},
  475. TensorShape{1, oc / 64, 1, 1, 64});
  476. } } } } } } } } }
  477. // clang-format on
  478. return args;
  479. }
  480. std::vector<TestArg> get_int8_nchw4_args_small_batch(size_t kernel_size) {
  481. std::vector<TestArg> args;
  482. param::ConvBias cur_param;
  483. using NLMode = param::ConvBias::NonlineMode;
  484. // clang-format off
  485. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU, NLMode::H_SWISH}) {
  486. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  487. for (size_t b : {12, 8, 4}) {
  488. for (size_t ic : {16, 32}) {
  489. for (size_t oc : {16, 8, 4}) {
  490. for (size_t h : {8}) {
  491. for (size_t w : {8, 9, 10, 11, 12, 13, 14, 15, 16}) {
  492. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  493. for (size_t s : {1, 2}) {
  494. size_t f = kernel_size;
  495. cur_param.mode = mode;
  496. cur_param.nonlineMode = nlmode;
  497. cur_param.format = param::ConvBias::Format::NCHW4;
  498. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  499. cur_param.pad_h = cur_param.pad_w = p;
  500. cur_param.stride_h = cur_param.stride_w = s;
  501. //! bias channel
  502. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  503. TensorShape{oc, ic / 4, f, f, 4},
  504. TensorShape{1, oc / 4, 1, 1, 4});
  505. } } } } } } } } }
  506. // clang-format on
  507. return args;
  508. }
  509. std::vector<TestArg> get_int8_nchw4_small_channel_args(size_t kernel_size) {
  510. std::vector<TestArg> args;
  511. param::ConvBias cur_param;
  512. using NLMode = param::ConvBias::NonlineMode;
  513. // clang-format off
  514. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU, NLMode::H_SWISH}) {
  515. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  516. for (size_t b : {64, 16}) {
  517. for (size_t ic : {4, 12}) {
  518. for (size_t oc : {128, 32}) {
  519. for (size_t h : {8}) {
  520. for (size_t w : {8, 11}) {
  521. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  522. for (size_t s : {1, 2}) {
  523. size_t f = kernel_size;
  524. cur_param.mode = mode;
  525. cur_param.nonlineMode = nlmode;
  526. cur_param.format =
  527. param::ConvBias::Format::NCHW4;
  528. cur_param.sparse =
  529. param::ConvBias::Sparse::DENSE;
  530. cur_param.pad_h = cur_param.pad_w = p;
  531. cur_param.stride_h =
  532. cur_param.stride_w = s;
  533. //! bias channel
  534. args.emplace_back(
  535. cur_param,
  536. TensorShape{b, ic / 4, h, w, 4},
  537. TensorShape{oc, ic / 4, f, f,
  538. 4},
  539. TensorShape{1, oc / 4, 1, 1,
  540. 4});
  541. } } } } } } } } }
  542. // clang-format on
  543. return args;
  544. }
  545. std::vector<TestArg> get_int8_nchw4_small_channel_args_check_bounds(
  546. size_t kernel_size) {
  547. std::vector<TestArg> args;
  548. param::ConvBias cur_param;
  549. using NLMode = param::ConvBias::NonlineMode;
  550. // clang-format off
  551. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  552. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  553. for (size_t b : {8, 7, 4, 1}) {
  554. for (size_t ic : {4, 12}) {
  555. for (size_t oc : {16, 8, 12, 4}) {
  556. for (size_t h : {8}) {
  557. for (size_t w : {8, 11}) {
  558. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  559. for (size_t s : {1, 2}) {
  560. size_t f = kernel_size;
  561. cur_param.mode = mode;
  562. cur_param.nonlineMode = nlmode;
  563. cur_param.format = param::ConvBias::Format::NCHW4;
  564. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  565. cur_param.pad_h = cur_param.pad_w = p;
  566. cur_param.stride_h = cur_param.stride_w = s;
  567. //! bias channel
  568. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  569. TensorShape{oc, ic / 4, f, f, 4},
  570. TensorShape{1, oc / 4, 1, 1, 4});
  571. } } } } } } } } }
  572. // clang-format on
  573. return args;
  574. }
  575. std::vector<TestArg> get_int8_chwn4_args(size_t kernel_size) {
  576. auto args = get_int8_nchw4_args(kernel_size);
  577. for (auto& arg : args) {
  578. convert_arg_from_nchw4_to_chwn4(arg);
  579. }
  580. return args;
  581. }
  582. std::vector<TestArg> get_int8_chwn4_args_check_bounds(size_t kernel_size) {
  583. auto args = get_int8_nchw4_args_check_bounds(kernel_size);
  584. for (auto& arg : args) {
  585. convert_arg_from_nchw4_to_chwn4(arg);
  586. }
  587. return args;
  588. }
  589. std::vector<TestArg> get_int8_chwn4_small_channel_args(size_t kernel_size) {
  590. auto args = get_int8_nchw4_small_channel_args(kernel_size);
  591. for (auto& arg : args) {
  592. convert_arg_from_nchw4_to_chwn4(arg);
  593. }
  594. return args;
  595. }
  596. std::vector<TestArg> get_int8_chwn4_small_channel_args_check_bounds(
  597. size_t kernel_size) {
  598. auto args = get_int8_nchw4_small_channel_args_check_bounds(kernel_size);
  599. for (auto& arg : args) {
  600. convert_arg_from_nchw4_to_chwn4(arg);
  601. }
  602. return args;
  603. }
  604. std::vector<TestArg> get_int8_chwn4_args_small_batch(size_t kernel_size) {
  605. auto args = get_int8_nchw4_args_small_batch(kernel_size);
  606. for (auto& arg : args) {
  607. convert_arg_from_nchw4_to_chwn4(arg);
  608. }
  609. return args;
  610. }
  611. std::vector<TestArg> get_int8_nchw4_tensorcore_args(size_t kernel_size) {
  612. std::vector<TestArg> args;
  613. param::ConvBias cur_param;
  614. using NLMode = param::ConvBias::NonlineMode;
  615. // clang-format off
  616. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU, NLMode::H_SWISH}) {
  617. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  618. size_t b = 64, oc = 128;
  619. for (size_t ic : {32, 64}) {
  620. for (size_t h : {8}) {
  621. for (size_t w : {11}) {
  622. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  623. for (size_t s : {1, 2}) {
  624. size_t f = kernel_size;
  625. cur_param.mode = mode;
  626. cur_param.nonlineMode = nlmode;
  627. cur_param.format = param::ConvBias::Format::NCHW4;
  628. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  629. cur_param.pad_h = cur_param.pad_w = p;
  630. cur_param.stride_h = cur_param.stride_w = s;
  631. //! bias channel
  632. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  633. TensorShape{oc, ic / 4, f, f, 4},
  634. TensorShape{1, oc / 4, 1, 1, 4});
  635. } } } } }
  636. } }
  637. // clang-format on
  638. return args;
  639. }
  640. std::vector<TestArg> get_int8_chwn4_tensorcore_args(size_t kernel_size) {
  641. auto args = get_int8_nchw4_tensorcore_args(kernel_size);
  642. for (auto& arg : args) {
  643. convert_arg_from_nchw4_to_chwn4(arg);
  644. }
  645. return args;
  646. }
  647. void check_conv_bias(
  648. DType src_dtype, DType filter_dtype, DType bias_dtype, DType dst_dtype,
  649. Handle* handle, const char* algo, param::ConvBias::Format format,
  650. const std::vector<TestArg>& args, bool fuse_z, bool stable_test) {
  651. megdnn_assert(
  652. (src_dtype.enumv() == filter_dtype.enumv()) ||
  653. (src_dtype.enumv() == DTypeEnum::Quantized4Asymm &&
  654. filter_dtype.enumv() == DTypeEnum::QuantizedS4));
  655. Checker<ConvBiasForward> checker(handle, !stable_test);
  656. if (algo) {
  657. checker.set_before_exec_callback(ConvBiasAlgoChecker<ConvBiasForward>(algo));
  658. }
  659. std::unique_ptr<RNG> rng;
  660. std::unique_ptr<RNG> flt_rng;
  661. std::unique_ptr<RNG> bias_rng;
  662. std::unique_ptr<RNG> const_rng;
  663. std::unique_ptr<RNG> zero_rng;
  664. // TODO: check range of rng
  665. if (src_dtype.enumv() == DTypeEnum::QuantizedS8) {
  666. rng = std::make_unique<UniformIntRNG>(-3, 3);
  667. flt_rng = std::make_unique<UniformIntRNG>(-3, 3);
  668. const_rng = std::make_unique<UniformIntRNG>(1, 1);
  669. zero_rng = std::make_unique<UniformIntRNG>(0, 0);
  670. megdnn_assert(bias_dtype.enumv() == DTypeEnum::QuantizedS32);
  671. bias_rng = std::make_unique<UniformIntRNG>(-50, 50);
  672. checker.set_epsilon(1 + 1e-3).set_max_avg_error(1e-1).set_max_avg_biased_error(
  673. 1e-3);
  674. } else if (src_dtype.enumv() == DTypeEnum::Quantized4Asymm) {
  675. rng = std::make_unique<UniformIntRNG>(0, 6);
  676. flt_rng = std::make_unique<UniformIntRNG>(-3, 3);
  677. const_rng = std::make_unique<UniformIntRNG>(1, 1);
  678. zero_rng = std::make_unique<UniformIntRNG>(0, 0);
  679. megdnn_assert(bias_dtype.enumv() == DTypeEnum::QuantizedS32);
  680. bias_rng = std::make_unique<UniformIntRNG>(-50, 50);
  681. checker.set_epsilon(1 + 1e-3).set_max_avg_error(1e-1).set_max_avg_biased_error(
  682. 1e-3);
  683. } else if (src_dtype.enumv() == DTypeEnum::QuantizedS4) {
  684. rng = std::make_unique<UniformIntRNG>(-3, 3);
  685. flt_rng = std::make_unique<UniformIntRNG>(-3, 3);
  686. const_rng = std::make_unique<UniformIntRNG>(1, 1);
  687. zero_rng = std::make_unique<UniformIntRNG>(0, 0);
  688. megdnn_assert(bias_dtype.enumv() == DTypeEnum::QuantizedS32);
  689. bias_rng = std::make_unique<UniformIntRNG>(-50, 50);
  690. checker.set_epsilon(1 + 1e-3).set_max_avg_error(1e-1).set_max_avg_biased_error(
  691. 1e-3);
  692. } else if (src_dtype.enumv() == DTypeEnum::Float16) {
  693. rng = std::make_unique<NormalRNG>(2.f);
  694. flt_rng = std::make_unique<NormalRNG>(2.f);
  695. megdnn_assert(bias_dtype.enumv() == DTypeEnum::Float16);
  696. bias_rng = std::make_unique<NormalRNG>(2.f);
  697. checker.set_epsilon(1e-2);
  698. } else if (src_dtype.enumv() == DTypeEnum::Float32) {
  699. rng = std::make_unique<NormalRNG>(2.f);
  700. flt_rng = std::make_unique<NormalRNG>(2.f);
  701. megdnn_assert(bias_dtype.enumv() == DTypeEnum::Float32);
  702. bias_rng = std::make_unique<NormalRNG>(2.f);
  703. }
  704. using Param = param::ConvBias;
  705. using Format = Param::Format;
  706. auto get_z_shape = [&fuse_z, &format](TestArg arg) -> TensorShape {
  707. TensorShape z{};
  708. if (fuse_z) {
  709. size_t hi, wi, sh, sw, ph, pw, fh, fw;
  710. z = arg.src;
  711. size_t spatial_idx = 2;
  712. if (format == Format::NCHW4) {
  713. hi = arg.src[2];
  714. wi = arg.src[3];
  715. fh = arg.filter[2];
  716. fw = arg.filter[3];
  717. z[1] = arg.filter[0] / 4;
  718. } else if (format == Format::NCHW32) {
  719. hi = arg.src[2];
  720. wi = arg.src[3];
  721. fh = arg.filter[2];
  722. fw = arg.filter[3];
  723. z[1] = arg.filter[0] / 32;
  724. } else if (format == Format::NCHW64) {
  725. hi = arg.src[2];
  726. wi = arg.src[3];
  727. fh = arg.filter[2];
  728. fw = arg.filter[3];
  729. z[1] = arg.filter[0] / 64;
  730. } else {
  731. megdnn_assert(format == Format::CHWN4);
  732. hi = arg.src[1];
  733. wi = arg.src[2];
  734. fh = arg.filter[1];
  735. fw = arg.filter[2];
  736. z[0] = arg.filter[3] / 4;
  737. spatial_idx = 1;
  738. }
  739. sh = arg.param.stride_h;
  740. sw = arg.param.stride_w;
  741. ph = arg.param.pad_h;
  742. pw = arg.param.pad_w;
  743. size_t ho = infer_conv_shape(hi, fh, sh, ph);
  744. size_t wo = infer_conv_shape(wi, fw, sw, pw);
  745. z[spatial_idx] = ho;
  746. z[spatial_idx + 1] = wo;
  747. }
  748. return z;
  749. };
  750. megdnn_assert(rng != nullptr && flt_rng != nullptr && bias_rng != nullptr);
  751. checker.set_rng(0, rng.get())
  752. .set_rng(1, flt_rng.get())
  753. .set_rng(2, bias_rng.get())
  754. .set_rng(3, rng.get());
  755. if (stable_test) {
  756. checker.set_stable_check(true);
  757. checker.set_no_naive_check(true);
  758. }
  759. if (args.empty()) {
  760. std::vector<TestArg> default_args;
  761. if (format == Format::NCHW4) {
  762. default_args = get_int8_nchw4_args(3);
  763. } else if (format == Format::CHWN4) {
  764. default_args = get_int8_chwn4_args(3);
  765. }
  766. for (auto&& arg : default_args) {
  767. auto z = get_z_shape(arg);
  768. checker.set_dtype(0, src_dtype)
  769. .set_dtype(1, filter_dtype)
  770. .set_dtype(2, bias_dtype)
  771. .set_dtype(3, dst_dtype)
  772. .set_dtype(4, dst_dtype)
  773. .set_param(arg.param)
  774. .execs({arg.src, arg.filter, arg.bias, z, {}});
  775. }
  776. } else {
  777. for (auto&& arg : args) {
  778. auto z = get_z_shape(arg);
  779. checker.set_dtype(0, src_dtype)
  780. .set_dtype(1, filter_dtype)
  781. .set_dtype(2, bias_dtype)
  782. .set_dtype(3, dst_dtype)
  783. .set_dtype(4, dst_dtype)
  784. .set_param(arg.param)
  785. .execs({arg.src, arg.filter, arg.bias, z, {}});
  786. }
  787. }
  788. }
  789. #if MEGDNN_WITH_BENCHMARK
  790. std::vector<conv_bias::TestArg> get_winograd_benchmark_args(
  791. size_t kernel, size_t pack_size) {
  792. std::vector<conv_bias::TestArg> args;
  793. auto pack = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel, size_t p) {
  794. if (ic % pack_size != 0 || oc % pack_size != 0)
  795. return;
  796. if (w + 2 * p < kernel || h + 2 * p < kernel)
  797. return;
  798. param::ConvBias param;
  799. param.stride_h = 1;
  800. param.stride_w = 1;
  801. param.pad_h = p;
  802. param.pad_w = p;
  803. args.push_back(conv_bias::TestArg{
  804. param,
  805. TensorShape{1, ic, h, w},
  806. TensorShape{oc, ic, kernel, kernel},
  807. {1, oc, 1, 1}});
  808. };
  809. for (size_t ic : {8, 16, 32, 64}) {
  810. for (size_t oc : {8, 16, 32, 64}) {
  811. pack(oc, ic, 56, 56, kernel, kernel / 2);
  812. pack(oc, ic, 128, 128, kernel, kernel / 2);
  813. pack(oc, ic, 256, 256, kernel, kernel / 2);
  814. }
  815. }
  816. //! conv in vgg16
  817. pack(512, 512, 15, 15, kernel, kernel / 2);
  818. pack(512, 256, 15, 15, kernel, kernel / 2);
  819. pack(256, 256, 29, 29, kernel, kernel / 2);
  820. pack(256, 128, 29, 29, kernel, kernel / 2);
  821. pack(128, 128, 57, 57, kernel, kernel / 2);
  822. pack(128, 64, 57, 57, kernel, kernel / 2);
  823. pack(64, 64, 123, 123, kernel, kernel / 2);
  824. pack(64, 24, 123, 123, kernel, kernel / 2);
  825. pack(24, 24, 224, 224, kernel, kernel / 2);
  826. //! conv in resnet18
  827. pack(64, 64, 56, 56, kernel, kernel / 2);
  828. pack(128, 128, 28, 28, kernel, kernel / 2);
  829. pack(256, 256, 14, 14, kernel, kernel / 2);
  830. pack(512, 512, 7, 7, kernel, kernel / 2);
  831. return args;
  832. }
  833. void benchmark_winograd(
  834. const char* algo_name, Handle* handle, size_t kernel, size_t pack_size) {
  835. auto&& args = get_winograd_benchmark_args(kernel, pack_size);
  836. using namespace conv_bias;
  837. constexpr size_t RUN = 10;
  838. Benchmarker<Convolution> benchmark(handle);
  839. benchmark.set_display(false);
  840. benchmark.set_times(RUN);
  841. Benchmarker<ConvBias> benchmark_winograd(handle);
  842. benchmark_winograd.set_display(false);
  843. benchmark_winograd.set_times(RUN);
  844. for (auto&& arg : args) {
  845. TensorLayout dst_layout;
  846. auto opr = handle->create_operator<ConvBias>();
  847. opr->param() = arg.param;
  848. opr->deduce_layout(
  849. {arg.src, dtype::Float32()}, {arg.filter, dtype::Float32()},
  850. {arg.bias, dtype::Float32()}, {}, dst_layout);
  851. //! dst.nr_elems * IC * FH * FW * 2
  852. float computations = dst_layout.total_nr_elems() * arg.filter[1] *
  853. arg.filter[2] * arg.filter[3] * 2.0 /
  854. (1024 * 1024 * 1024) * 1e3;
  855. param::Convolution conv_param;
  856. conv_param.pad_h = arg.param.pad_h;
  857. conv_param.pad_w = arg.param.pad_w;
  858. conv_param.stride_h = arg.param.stride_h;
  859. conv_param.stride_w = arg.param.stride_w;
  860. auto used =
  861. benchmark.set_param(conv_param).exec({arg.src, arg.filter, {}}) / RUN;
  862. benchmark_winograd.set_param(arg.param);
  863. auto used_winograd = algo_benchmark<ConvBias>(
  864. benchmark_winograd,
  865. {arg.src, arg.filter, {}, {}, {}}, algo_name) /
  866. RUN;
  867. printf("%s %s: normal: %f ms %f Gflops winograd: %f ms %f GFlops "
  868. "speedup: "
  869. "%f\n",
  870. arg.src.to_string().c_str(), arg.filter.to_string().c_str(), used,
  871. computations / used, used_winograd, computations / used_winograd,
  872. used / used_winograd);
  873. }
  874. }
  875. // usage of weight pre-processing for winograd benchmark
  876. void benchmark_winograd_weight_preprocess(
  877. const char* algo_name, megdnn::Handle* handle, size_t kernel,
  878. size_t pack_size) {
  879. auto&& args = get_winograd_benchmark_args(kernel, pack_size);
  880. using namespace conv_bias;
  881. constexpr size_t RUN = 10;
  882. //! here!!!
  883. Benchmarker<ConvBias, Timer, OprWeightPreprocessBenchmarkProxy<ConvBias>>
  884. benchmark_winograd(handle);
  885. benchmark_winograd.set_display(false);
  886. benchmark_winograd.set_times(RUN);
  887. for (auto&& arg : args) {
  888. TensorLayout dst_layout;
  889. auto opr = handle->create_operator<ConvBias>();
  890. opr->param() = arg.param;
  891. opr->deduce_layout(
  892. {arg.src, dtype::Float32()}, {arg.filter, dtype::Float32()},
  893. {arg.bias, dtype::Float32()}, {}, dst_layout);
  894. //! dst.nr_elems * IC * FH * FW * 2
  895. float computations = dst_layout.total_nr_elems() * arg.filter[1] *
  896. arg.filter[2] * arg.filter[3] * 2.0 /
  897. (1024 * 1024 * 1024) * 1e3;
  898. param::Convolution conv_param;
  899. conv_param.pad_h = arg.param.pad_h;
  900. conv_param.pad_w = arg.param.pad_w;
  901. conv_param.stride_h = arg.param.stride_h;
  902. conv_param.stride_w = arg.param.stride_w;
  903. benchmark_winograd.set_param(arg.param);
  904. auto used_winograd =
  905. algo_benchmark<
  906. ConvBias, OprWeightPreprocessBenchmarkProxy<ConvBias>, Timer>(
  907. benchmark_winograd, {arg.src, arg.filter, {}, {}, {}},
  908. algo_name) /
  909. RUN;
  910. printf("%s %s: %s: %f ms %f Gflops\n", arg.src.to_string().c_str(),
  911. arg.filter.to_string().c_str(), algo_name, used_winograd,
  912. computations / used_winograd);
  913. }
  914. }
  915. #endif // MEGDNN_WITH_BENCHMARK
  916. template <class Checker>
  917. void check_winograd(
  918. const char* algo_name, Checker& checker,
  919. const std::vector<conv_bias::TestArg>& args, param::MatrixMul::Format format,
  920. param::ConvBias::Format layout) {
  921. const char* matmul_name;
  922. #if MEGDNN_AARCH64
  923. if (format == param::MatrixMul::Format::MK4) {
  924. matmul_name = "AARCH64_F32_MK4_4x16";
  925. } else if (format == param::MatrixMul::Format::MK8) {
  926. matmul_name = "AARCH64_INT16X16X32_MK8_8X8";
  927. } else {
  928. matmul_name = "AARCH64_F32K8X12X1";
  929. }
  930. #elif MEGDNN_ARMV7
  931. if (format == param::MatrixMul::Format::MK4) {
  932. matmul_name = "ARMV7_F32_MK4_4x8";
  933. } else if (format == param::MatrixMul::Format::MK8) {
  934. matmul_name = "ARMV7_INT16X16X32_MK8_4X8";
  935. } else {
  936. matmul_name = "ARMV7_F32";
  937. }
  938. #else
  939. if (format == param::MatrixMul::Format::MK4) {
  940. matmul_name = "FB_GI_F32_MK4_4x8";
  941. } else {
  942. matmul_name = "FB_GI_F32_4x12";
  943. }
  944. #endif
  945. std::string winograd_algo_name;
  946. if (layout == megdnn::param::ConvBias::Format::NCHW) {
  947. winograd_algo_name = ssprintf("WINOGRAD:%s:%s", matmul_name, algo_name);
  948. } else if (layout == megdnn::param::ConvBias::Format::NCHW44) {
  949. winograd_algo_name = ssprintf("WINOGRAD_NCHW44:%s:%s", matmul_name, algo_name);
  950. } else {
  951. megdnn_throw("Invalid layout");
  952. }
  953. checker.set_before_exec_callback(
  954. conv_bias::ConvBiasAlgoChecker<ConvBias>(winograd_algo_name.c_str()));
  955. for (auto&& arg : args) {
  956. checker.set_param(arg.param).execs({arg.src, arg.filter, arg.bias, {}, {}});
  957. }
  958. }
  959. template void check_winograd<megdnn::test::Checker<megdnn::ConvBias>>(
  960. const char* algo_name, megdnn::test::Checker<megdnn::ConvBias>& checker,
  961. const std::vector<conv_bias::TestArg>& args, param::MatrixMul::Format format,
  962. param::ConvBias::Format layout);
  963. using WeightPreprocessChecker = megdnn::test::Checker<
  964. megdnn::ConvBias, megdnn::test::OprWeightPreprocessProxy<megdnn::ConvBias>>;
  965. template void check_winograd<WeightPreprocessChecker>(
  966. const char* algo_name, WeightPreprocessChecker& checker,
  967. const std::vector<conv_bias::TestArg>& args, param::MatrixMul::Format format,
  968. param::ConvBias::Format layout);
  969. std::vector<conv_bias::TestArg> get_conv_bias_args(
  970. std::vector<size_t> kernel, size_t stride, bool no_pad, bool no_bias,
  971. bool no_nonlinemode, bool quantized_nlmod, bool only_broadcast_bias) {
  972. using namespace conv_bias;
  973. using Param = param::ConvBias;
  974. using NLMode = param::ConvBias::NonlineMode;
  975. std::vector<TestArg> args;
  976. auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  977. size_t stride, NLMode nlmode) {
  978. Param param;
  979. param.stride_h = stride;
  980. param.stride_w = stride;
  981. if (!no_pad) {
  982. param.pad_h = kernel / 2;
  983. param.pad_w = kernel / 2;
  984. } else {
  985. param.pad_h = 0;
  986. param.pad_w = 0;
  987. }
  988. param.nonlineMode = nlmode;
  989. args.emplace_back(
  990. param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, kernel, kernel},
  991. TensorShape{});
  992. if (!no_bias) {
  993. args.emplace_back(
  994. param, TensorShape{n, ic, h, w},
  995. TensorShape{oc, ic, kernel, kernel}, TensorShape{1, oc, 1, 1});
  996. if (!only_broadcast_bias) {
  997. args.emplace_back(
  998. param, TensorShape{n, ic, h, w},
  999. TensorShape{oc, ic, kernel, kernel},
  1000. TensorShape{
  1001. n, oc, (h + 2 * param.pad_h - kernel) / stride + 1,
  1002. (w + 2 * param.pad_h - kernel) / stride + 1});
  1003. }
  1004. }
  1005. param.sparse = param::ConvBias::Sparse::GROUP;
  1006. args.emplace_back(
  1007. param, TensorShape{n, 2 * ic, h, w},
  1008. TensorShape{2, oc, ic, kernel, kernel}, TensorShape{});
  1009. if (!no_bias) {
  1010. if (!only_broadcast_bias) {
  1011. args.emplace_back(
  1012. param, TensorShape{n, 2 * ic, h, w},
  1013. TensorShape{2, oc, ic, kernel, kernel},
  1014. TensorShape{
  1015. n, 2 * oc, (h + param.pad_h * 2 - kernel) / stride + 1,
  1016. (w + param.pad_w * 2 - kernel) / stride + 1});
  1017. }
  1018. args.emplace_back(
  1019. param, TensorShape{n, 2 * ic, h, w},
  1020. TensorShape{2, oc, ic, kernel, kernel},
  1021. TensorShape{1, 2 * oc, 1, 1});
  1022. }
  1023. };
  1024. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  1025. if (!no_nonlinemode) {
  1026. nonlinemode.emplace_back(NLMode::RELU);
  1027. nonlinemode.emplace_back(NLMode::H_SWISH);
  1028. if (!quantized_nlmod) {
  1029. nonlinemode.emplace_back(NLMode::SIGMOID);
  1030. }
  1031. }
  1032. for (size_t n : {1, 2}) {
  1033. for (auto nlmode : nonlinemode) {
  1034. for (size_t ic : {1, 3, 7}) {
  1035. for (size_t oc : {1, 3, 7}) {
  1036. for (size_t size : {8, 16, 20}) {
  1037. for (size_t kern : kernel) {
  1038. pack(n, oc, ic, size, size, kern, stride, nlmode);
  1039. }
  1040. }
  1041. }
  1042. }
  1043. }
  1044. }
  1045. return args;
  1046. }
  1047. std::vector<megdnn::test::conv_bias::TestArg> get_conv_bias_1x1_args(
  1048. bool no_bias, bool no_nonlinemode, bool quantized_nlmod,
  1049. bool only_broadcast_bias) {
  1050. using namespace conv_bias;
  1051. using Param = param::ConvBias;
  1052. using NLMode = param::ConvBias::NonlineMode;
  1053. using CONVMode = param::ConvBias::Mode;
  1054. std::vector<TestArg> args;
  1055. auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h, size_t stride,
  1056. NLMode nlmode, CONVMode convmode) {
  1057. Param param;
  1058. param.stride_h = stride;
  1059. param.stride_w = stride;
  1060. param.pad_h = 0;
  1061. param.pad_w = 0;
  1062. param.mode = convmode;
  1063. param.nonlineMode = nlmode;
  1064. args.emplace_back(
  1065. param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, 1, 1},
  1066. TensorShape{});
  1067. if (!no_bias) {
  1068. args.emplace_back(
  1069. param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, 1, 1},
  1070. TensorShape{1, oc, 1, 1});
  1071. if (!only_broadcast_bias) {
  1072. args.emplace_back(
  1073. param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, 1, 1},
  1074. TensorShape{n, oc, (h - 1) / stride + 1, (w - 1) / stride + 1});
  1075. }
  1076. }
  1077. param.sparse = param::ConvBias::Sparse::GROUP;
  1078. args.emplace_back(
  1079. param, TensorShape{n, 2 * ic, h, w}, TensorShape{2, oc, ic, 1, 1},
  1080. TensorShape{});
  1081. if (!no_bias) {
  1082. args.emplace_back(
  1083. param, TensorShape{n, 2 * ic, h, w}, TensorShape{2, oc, ic, 1, 1},
  1084. TensorShape{1, 2 * oc, 1, 1});
  1085. if (!only_broadcast_bias) {
  1086. args.emplace_back(
  1087. param, TensorShape{n, 2 * ic, h, w},
  1088. TensorShape{2, oc, ic, 1, 1},
  1089. TensorShape{
  1090. n, 2 * oc, (h - 1) / stride + 1, (w - 1) / stride + 1});
  1091. }
  1092. }
  1093. };
  1094. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  1095. if (!no_nonlinemode) {
  1096. nonlinemode.emplace_back(NLMode::RELU);
  1097. nonlinemode.emplace_back(NLMode::H_SWISH);
  1098. if (!quantized_nlmod) {
  1099. nonlinemode.emplace_back(NLMode::SIGMOID);
  1100. }
  1101. }
  1102. std::vector<CONVMode> convmodes{
  1103. param::ConvBias::Mode::CONVOLUTION,
  1104. param::ConvBias::Mode::CROSS_CORRELATION};
  1105. for (size_t n : {1, 2})
  1106. for (size_t oc : {1, 9, 33})
  1107. for (size_t ic : {1, 16, 64})
  1108. for (size_t size : {1, 7, 14, 28})
  1109. for (auto nlmode : nonlinemode)
  1110. for (auto convmode : convmodes) {
  1111. pack(n, oc, ic, size, size, 1, nlmode, convmode);
  1112. }
  1113. return args;
  1114. }
  1115. void check_conv_bias(
  1116. std::vector<conv_bias::TestArg> args, Handle* handle, const char* algo_name) {
  1117. using namespace conv_bias;
  1118. Checker<ConvBias> checker(handle);
  1119. checker.set_before_exec_callback(
  1120. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1121. for (auto&& arg : args) {
  1122. checker.set_param(arg.param).execs({arg.src, arg.filter, arg.bias, {}, {}});
  1123. }
  1124. }
  1125. void checker_conv_bias_int8x8x16(
  1126. std::vector<conv_bias::TestArg> args, Handle* handle, const char* algo_name) {
  1127. using namespace conv_bias;
  1128. Checker<ConvBias> checker(handle);
  1129. checker.set_before_exec_callback(
  1130. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1131. checker.set_dtype(0, dtype::Int8());
  1132. checker.set_dtype(1, dtype::Int8());
  1133. checker.set_dtype(2, dtype::Int16());
  1134. checker.set_dtype(4, dtype::Int16());
  1135. for (auto&& arg : args) {
  1136. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  1137. }
  1138. }
  1139. void check_conv_bias_preprocess(
  1140. std::vector<conv_bias::TestArg> args, Handle* handle, RNG* rng, float epsilon,
  1141. DType type0, DType type1, DType type2, DType type3, const char* algo_name) {
  1142. using namespace conv_bias;
  1143. Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(handle);
  1144. checker.set_dtype(0, type0);
  1145. checker.set_dtype(1, type1);
  1146. checker.set_dtype(2, type2);
  1147. checker.set_dtype(4, type3);
  1148. checker.set_epsilon(epsilon);
  1149. if (NULL != rng) {
  1150. checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng).set_rng(3, rng);
  1151. }
  1152. checker.set_before_exec_callback(
  1153. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1154. for (auto&& arg : args) {
  1155. checker.set_param(arg.param).execs({arg.src, arg.filter, arg.bias, {}, {}});
  1156. }
  1157. }
  1158. void checker_conv_bias_common(
  1159. std::vector<conv_bias::TestArg> args, Handle* handle, RNG* rng, float epsilon,
  1160. DType type0, DType type1, DType type2, DType type3, const char* algo_name) {
  1161. using namespace conv_bias;
  1162. Checker<ConvBias> checker(handle);
  1163. checker.set_before_exec_callback(
  1164. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1165. checker.set_dtype(0, type0);
  1166. checker.set_dtype(1, type1);
  1167. checker.set_dtype(2, type2);
  1168. checker.set_dtype(4, type3);
  1169. checker.set_epsilon(epsilon);
  1170. if (NULL != rng) {
  1171. checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng).set_rng(3, rng);
  1172. }
  1173. for (auto&& arg : args) {
  1174. checker.set_param(arg.param).execs({arg.src, arg.filter, arg.bias, {}, {}});
  1175. }
  1176. }
  1177. void checker_conv_bias_mul_int8x8x32(
  1178. std::vector<conv_bias::TestArg> args, Handle* handle, const char* algo_name) {
  1179. using namespace conv_bias;
  1180. float epsilon = 0.001;
  1181. #if MEGDNN_ARMV7
  1182. epsilon = 1.0;
  1183. #endif
  1184. Checker<ConvBias> checker(handle);
  1185. checker.set_before_exec_callback(
  1186. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1187. checker.set_dtype(0, dtype::Int8());
  1188. checker.set_dtype(1, dtype::Int8());
  1189. checker.set_dtype(2, dtype::Int32());
  1190. checker.set_dtype(4, dtype::Int32());
  1191. checker.set_epsilon(epsilon);
  1192. for (auto&& arg : args) {
  1193. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  1194. }
  1195. UniformIntRNG rng{-50, 50};
  1196. for (auto&& arg : args) {
  1197. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  1198. .set_dtype(1, dtype::QuantizedS8(2.5f))
  1199. .set_dtype(2, dtype::QuantizedS32(6.25f))
  1200. .set_dtype(4, dtype::QuantizedS32(6.25f))
  1201. .set_rng(0, &rng)
  1202. .set_rng(1, &rng)
  1203. .set_rng(2, &rng)
  1204. .set_param(arg.param)
  1205. .set_epsilon(epsilon)
  1206. .execs({arg.src, arg.filter, {}, {}, {}});
  1207. }
  1208. }
  1209. void checker_conv_bias_int8x8x32_preprocess(
  1210. std::vector<conv_bias::TestArg> args, Handle* handle, const char* algo_name) {
  1211. using namespace conv_bias;
  1212. Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(handle);
  1213. checker.set_before_exec_callback(
  1214. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  1215. checker.set_dtype(0, dtype::Int8());
  1216. checker.set_dtype(1, dtype::Int8());
  1217. checker.set_dtype(2, dtype::Int32());
  1218. checker.set_dtype(4, dtype::Int32());
  1219. for (auto&& arg : args) {
  1220. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  1221. }
  1222. UniformIntRNG rng{-50, 50};
  1223. for (auto&& arg : args) {
  1224. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  1225. .set_dtype(1, dtype::QuantizedS8(2.5f))
  1226. .set_dtype(2, dtype::QuantizedS32(6.25f))
  1227. .set_dtype(4, dtype::QuantizedS32(6.25f))
  1228. .set_rng(0, &rng)
  1229. .set_rng(1, &rng)
  1230. .set_rng(2, &rng)
  1231. .set_param(arg.param)
  1232. .execs({arg.src, arg.filter, {}, {}, {}});
  1233. }
  1234. }
  1235. std::vector<conv_bias::TestArg> get_nchw44_conv_bias_args(
  1236. std::vector<size_t> kernel_vec,
  1237. std::vector<param::ConvBias::NonlineMode> nlmode_vec,
  1238. std::vector<megdnn::BiasMode> biasmode_vec, size_t stride, bool no_pad,
  1239. bool is_input_nchw, bool is_nchw44_dot) {
  1240. using namespace conv_bias;
  1241. using NLMode = param::ConvBias::NonlineMode;
  1242. std::vector<TestArg> args;
  1243. MEGDNN_MARK_USED_VAR(no_pad);
  1244. auto pack = [&](size_t n, size_t oc, size_t ic, size_t h, size_t w, size_t kernel,
  1245. size_t stride, size_t group, NLMode nlmode,
  1246. megdnn::BiasMode bias_mode, int any_pad = -1) {
  1247. constexpr int pack_c = 4;
  1248. const size_t pad = any_pad >= 0 ? any_pad : kernel / 2;
  1249. auto oc_per_group = oc / group;
  1250. auto ic_per_group = ic / group;
  1251. bool ok_group = (oc % group == 0 && ic % group == 0) &&
  1252. oc_per_group % pack_c == 0 && oc_per_group > 0 &&
  1253. ic_per_group > 0;
  1254. bool nchw_disable = group > 1 || ic_per_group >= 4;
  1255. bool nchw44_disable = ic_per_group % pack_c != 0;
  1256. bool invalid_pad = (w + 2 * pad < kernel) || (h + 2 * pad < kernel);
  1257. if (!(ok_group) || invalid_pad) {
  1258. return;
  1259. }
  1260. if ((is_input_nchw && nchw_disable) || (!is_input_nchw && nchw44_disable)) {
  1261. return;
  1262. }
  1263. size_t kernel_h = kernel;
  1264. size_t kernel_w = kernel;
  1265. param::ConvBias param;
  1266. if (!is_nchw44_dot) {
  1267. param.format = param::ConvBias::Format::NCHW44;
  1268. } else {
  1269. param.format = param::ConvBias::Format::NCHW44_DOT;
  1270. }
  1271. param.stride_h = stride;
  1272. param.stride_w = stride;
  1273. param.pad_h = pad;
  1274. param.pad_w = pad;
  1275. param.nonlineMode = nlmode;
  1276. auto src_tensor_shape = TensorShape{n, ic / pack_c, h, w, pack_c};
  1277. auto weight_tensor_shape = TensorShape{oc / pack_c, ic / pack_c, kernel_h,
  1278. kernel_w, pack_c, pack_c};
  1279. auto bias_tensor_shape = TensorShape{};
  1280. if (bias_mode == megdnn::BiasMode::BROADCAST_CHANNEL_BIAS) {
  1281. bias_tensor_shape = {1, oc / pack_c, 1, 1, pack_c};
  1282. } else if (bias_mode == megdnn::BiasMode::BIAS) {
  1283. bias_tensor_shape = {
  1284. n, oc / pack_c, (h + 2 * pad - kernel) / stride + 1,
  1285. (w + 2 * pad - kernel) / stride + 1, pack_c};
  1286. }
  1287. if (group == 1) {
  1288. param.sparse = param::ConvBias::Sparse::DENSE;
  1289. } else if (group > 1 && ic / group == 1 && oc / group == 1) {
  1290. megdnn_assert(0, "not support channel wise");
  1291. param.sparse = param::ConvBias::Sparse::GROUP;
  1292. weight_tensor_shape =
  1293. TensorShape{group / pack_c, 1, 1, kernel_h, kernel_w, pack_c};
  1294. } else if (
  1295. group > 1 && oc_per_group % pack_c == 0 && oc / group > 0 &&
  1296. ic_per_group % pack_c == 0 && ic / group > 0) {
  1297. param.sparse = param::ConvBias::Sparse::GROUP;
  1298. weight_tensor_shape = TensorShape{group,
  1299. oc_per_group / pack_c,
  1300. ic_per_group / pack_c,
  1301. kernel_h,
  1302. kernel_w,
  1303. pack_c,
  1304. pack_c};
  1305. }
  1306. if (is_input_nchw) {
  1307. src_tensor_shape = TensorShape{n, ic, h, w};
  1308. weight_tensor_shape =
  1309. TensorShape{oc / pack_c, kernel_h, kernel_w, ic, pack_c};
  1310. }
  1311. args.emplace_back(
  1312. param, src_tensor_shape, weight_tensor_shape, bias_tensor_shape);
  1313. };
  1314. for (auto bias : biasmode_vec)
  1315. for (auto nlmode : nlmode_vec)
  1316. for (size_t n : {1, 2})
  1317. for (size_t kernel : kernel_vec)
  1318. for (size_t oc : {4, 12})
  1319. for (size_t ic : {1, 3, 4, 12})
  1320. for (size_t h : {1, 3, 12})
  1321. for (size_t w : {1, 16, 23}) {
  1322. for (size_t group = 1;
  1323. group <= std::min(std::min(oc, ic), 4_z);
  1324. ++group) {
  1325. if (kernel != 1 && (h == 1 || w == 1)) {
  1326. continue;
  1327. }
  1328. pack(n, oc, ic, h, w, kernel, stride, group,
  1329. nlmode, bias);
  1330. }
  1331. }
  1332. return args;
  1333. }
  1334. std::vector<conv_bias::TestArg> get_nchw88_conv_bias_args(
  1335. std::vector<size_t> kernel_vec,
  1336. std::vector<param::ConvBias::NonlineMode> nlmode_vec,
  1337. std::vector<megdnn::BiasMode> biasmode_vec, size_t stride) {
  1338. using namespace conv_bias;
  1339. using NLMode = param::ConvBias::NonlineMode;
  1340. std::vector<TestArg> args;
  1341. auto pack = [&](size_t n, size_t oc, size_t ic, size_t h, size_t w, size_t kernel,
  1342. size_t stride, size_t group, NLMode nlmode,
  1343. megdnn::BiasMode bias_mode) {
  1344. constexpr int pack_c = 8;
  1345. const size_t pad = kernel / 2;
  1346. auto oc_per_group = oc / group;
  1347. auto ic_per_group = ic / group;
  1348. megdnn_assert(
  1349. oc_per_group % pack_c == 0 && ic_per_group % pack_c == 0,
  1350. "ocpg/icpg not divided by 8");
  1351. size_t kernel_h = kernel;
  1352. size_t kernel_w = kernel;
  1353. param::ConvBias param;
  1354. param.format = param::ConvBias::Format::NCHW88;
  1355. param.stride_h = stride;
  1356. param.stride_w = stride;
  1357. param.pad_h = pad;
  1358. param.pad_w = pad;
  1359. param.nonlineMode = nlmode;
  1360. auto src_tensor_shape = TensorShape{n, ic / pack_c, h, w, pack_c};
  1361. auto weight_tensor_shape = TensorShape{oc / pack_c, ic / pack_c, kernel_h,
  1362. kernel_w, pack_c, pack_c};
  1363. auto bias_tensor_shape = TensorShape{};
  1364. if (bias_mode == megdnn::BiasMode::BROADCAST_CHANNEL_BIAS) {
  1365. bias_tensor_shape = {1, oc / pack_c, 1, 1, pack_c};
  1366. } else if (bias_mode == megdnn::BiasMode::BIAS) {
  1367. bias_tensor_shape = {
  1368. n, oc / pack_c, (h + 2 * pad - kernel) / stride + 1,
  1369. (w + 2 * pad - kernel) / stride + 1, pack_c};
  1370. }
  1371. if (group == 1) {
  1372. param.sparse = param::ConvBias::Sparse::DENSE;
  1373. } else {
  1374. param.sparse = param::ConvBias::Sparse::GROUP;
  1375. weight_tensor_shape = TensorShape{group,
  1376. oc_per_group / pack_c,
  1377. ic_per_group / pack_c,
  1378. kernel_h,
  1379. kernel_w,
  1380. pack_c,
  1381. pack_c};
  1382. }
  1383. args.emplace_back(
  1384. param, src_tensor_shape, weight_tensor_shape, bias_tensor_shape);
  1385. };
  1386. for (auto bias : biasmode_vec)
  1387. for (auto nlmode : nlmode_vec)
  1388. for (size_t n : {1, 2})
  1389. for (size_t kernel : kernel_vec)
  1390. for (size_t oc : {8, 16})
  1391. for (size_t ic : {8, 16, 24})
  1392. for (size_t h : {1, 3, 12})
  1393. for (size_t w : {1, 8, 13}) {
  1394. for (size_t group = 1; group < oc / 8; ++group) {
  1395. if (ic % (group * 8) || oc % (group * 8)) {
  1396. continue;
  1397. }
  1398. if (kernel < h || kernel < w) {
  1399. continue;
  1400. }
  1401. pack(n, oc, ic, h, w, kernel, stride, group,
  1402. nlmode, bias);
  1403. }
  1404. }
  1405. return args;
  1406. }
  1407. } // namespace conv_bias
  1408. } // namespace test
  1409. } // namespace megdnn
  1410. // vim: syntax=cpp.doxygen