You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

conv_bias.cpp 44 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133
  1. /**
  2. * \file dnn/test/common/conv_bias.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/common/conv_bias.h"
  12. #include "megdnn/opr_param_defs.h"
  13. #include "src/common/utils.h"
  14. #include "test/common/benchmarker.h"
  15. namespace megdnn {
  16. namespace test {
  17. namespace conv_bias {
  18. namespace {
  19. void convert_arg_from_nchw4_to_chwn4(TestArg& arg) {
  20. arg.param.format = param::ConvBias::Format::CHWN4;
  21. arg.src = TensorShape{arg.src[1], arg.src[2], arg.src[3], arg.src[0], 4};
  22. arg.filter = TensorShape{arg.filter[1], arg.filter[2], arg.filter[3],
  23. arg.filter[0], 4};
  24. arg.bias =
  25. TensorShape{arg.bias[1], arg.bias[2], arg.bias[3], arg.bias[0], 4};
  26. }
  27. } // namespace
  28. std::vector<TestArg> get_args() {
  29. std::vector<TestArg> args;
  30. param::ConvBias cur_param;
  31. using NLMode = param::ConvBias::NonlineMode;
  32. // clang-format off
  33. for (auto nlmode :
  34. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  35. for (size_t i : {9, 63}) {
  36. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  37. cur_param.nonlineMode = nlmode;
  38. // fallback case
  39. args.emplace_back(cur_param, TensorShape{10, 1, i, i},
  40. TensorShape{1, 1, 8, 8}, TensorShape{1, 1, 1, 1});
  41. args.emplace_back(cur_param, TensorShape{10, 4, i, i},
  42. TensorShape{3, 4, 4, 4}, TensorShape{1, 3, 1, 1});
  43. cur_param.mode = param::ConvBias::Mode::CONVOLUTION;
  44. args.emplace_back(cur_param, TensorShape{10, 4, i, i},
  45. TensorShape{1, 4, 3, 3}, TensorShape{1, 1, 1, 1});
  46. args.emplace_back(cur_param, TensorShape{1, 4, i, i},
  47. TensorShape{5, 4, 3, 3}, TensorShape{1, 5, 1, 1});
  48. } }
  49. // clang-format on
  50. return args;
  51. }
  52. std::vector<TestArg> get_chanwise_args() {
  53. std::vector<TestArg> args;
  54. param::ConvBias cur_param;
  55. using NLMode = param::ConvBias::NonlineMode;
  56. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  57. cur_param.sparse = ConvBias::Param::Sparse::GROUP;
  58. for (auto nlmode :
  59. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  60. cur_param.nonlineMode = nlmode;
  61. // simple case
  62. for (uint32_t s : {1, 2})
  63. for (uint32_t p : {0, 1, 2, 3})
  64. for (size_t f : {2, 3, 5, 7})
  65. for (size_t ocpg : {1, 3}) {
  66. cur_param.pad_h = cur_param.pad_w = p;
  67. cur_param.stride_h = cur_param.stride_w = s;
  68. args.emplace_back(cur_param, TensorShape{2, 3, 16, 16},
  69. TensorShape{3, ocpg, 1, f, f},
  70. TensorShape{1, 3 * ocpg, 1, 1});
  71. }
  72. args.emplace_back(cur_param, TensorShape{32, 12, 20, 10},
  73. TensorShape{12, 2, 1, 4, 5},
  74. TensorShape{1, 24, 1, 1});
  75. // padding larger than kern
  76. args.emplace_back(cur_param, TensorShape{32, 12, 20, 10},
  77. TensorShape{12, 2, 1, 4, 5},
  78. TensorShape{1, 24, 1, 1});
  79. }
  80. return args;
  81. }
  82. std::vector<TestArg> get_args_1x1() {
  83. std::vector<TestArg> args;
  84. param::ConvBias cur_param;
  85. using NLMode = param::ConvBias::NonlineMode;
  86. for (auto nlmode :
  87. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  88. cur_param.nonlineMode = nlmode;
  89. for (size_t i : {16, 19}) {
  90. cur_param.mode = param::ConvBias::Mode::CONVOLUTION;
  91. args.emplace_back(cur_param, TensorShape{2, 20, i, i + 1},
  92. TensorShape{30, 20, 1, 1},
  93. TensorShape{1, 30, 1, 1});
  94. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  95. args.emplace_back(cur_param, TensorShape{2, 20, i, i + 1},
  96. TensorShape{30, 20, 1, 1},
  97. TensorShape{1, 30, 1, 1});
  98. }
  99. }
  100. return args;
  101. }
  102. std::vector<TestArg> get_winograd_args(size_t kernel_size) {
  103. std::vector<TestArg> args;
  104. param::ConvBias cur_param;
  105. using NLMode = param::ConvBias::NonlineMode;
  106. // clang-format off
  107. for (auto nlmode :
  108. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  109. for (size_t ic : {1, 3, 4, 7}) {
  110. for (size_t oc : {1, 3, 4, 7}) {
  111. for (size_t i : {9, 63}) {
  112. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  113. cur_param.nonlineMode = nlmode;
  114. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  115. cur_param.pad_h = cur_param.pad_w = 0;
  116. //! no bias
  117. args.emplace_back(cur_param, TensorShape{1, ic, i, i},
  118. TensorShape{oc, ic, kernel_size, kernel_size},
  119. TensorShape{});
  120. //! bias
  121. args.emplace_back(
  122. cur_param, TensorShape{2, ic, i, i},
  123. TensorShape{oc, ic, kernel_size, kernel_size},
  124. TensorShape{2, oc, (i + cur_param.pad_h * 2 - kernel_size) + 1,
  125. (i + cur_param.pad_w * 2 - kernel_size) + 1});
  126. //! bias channel
  127. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  128. TensorShape{oc, ic, kernel_size, kernel_size},
  129. TensorShape{1, oc, 1, 1});
  130. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  131. args.emplace_back(
  132. cur_param, TensorShape{2, 2 * ic, i, i},
  133. TensorShape{2, oc, ic, kernel_size, kernel_size},
  134. TensorShape{2, 2 * oc,
  135. (i + cur_param.pad_h * 2 - kernel_size) + 1,
  136. (i + cur_param.pad_w * 2 - kernel_size) + 1});
  137. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  138. TensorShape{2, oc, ic, kernel_size, kernel_size},
  139. TensorShape{1, 2 * oc, 1, 1});
  140. } } } }
  141. // clang-format on
  142. //! test for multi-thread OC parallel
  143. for (size_t i : {9, 63}) {
  144. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  145. cur_param.pad_h = cur_param.pad_w = 1;
  146. args.emplace_back(cur_param, TensorShape{1, 8, i, i},
  147. TensorShape{128, 8, kernel_size, kernel_size},
  148. TensorShape{1, 128, 1, 1});
  149. args.emplace_back(cur_param, TensorShape{2, 8, i, i},
  150. TensorShape{128, 8, kernel_size, kernel_size},
  151. TensorShape{1, 128, 1, 1});
  152. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  153. args.emplace_back(cur_param, TensorShape{2, 2 * 8, i, i},
  154. TensorShape{2, 128, 8, kernel_size, kernel_size},
  155. TensorShape{1, 2 * 128, 1, 1});
  156. }
  157. return args;
  158. }
  159. std::vector<TestArg> get_winograd_mk_packed_args(size_t pack_size) {
  160. std::vector<TestArg> args;
  161. param::ConvBias cur_param;
  162. using NLMode = param::ConvBias::NonlineMode;
  163. // clang-format off
  164. for (auto nlmode :
  165. {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) {
  166. for (size_t ic : {pack_size, 2 * pack_size}) {
  167. for (size_t oc : {pack_size, 2 * pack_size}) {
  168. for (size_t i : {9, 63}) {
  169. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  170. cur_param.nonlineMode = nlmode;
  171. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  172. cur_param.pad_h = cur_param.pad_w = 1;
  173. args.emplace_back(cur_param, TensorShape{1, pack_size, 3, 3},
  174. TensorShape{pack_size, pack_size, 3, 3},
  175. TensorShape{1, pack_size, 1, 1});
  176. //! no bias
  177. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  178. TensorShape{oc, ic, 3, 3}, TensorShape{});
  179. //! bias
  180. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  181. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  182. //! bias channel
  183. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  184. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  185. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  186. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  187. TensorShape{2, oc, ic, 3, 3},
  188. TensorShape{2, 2 * oc, i, i});
  189. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  190. TensorShape{2, oc, ic, 3, 3},
  191. TensorShape{1, 2 * oc, 1, 1});
  192. } } } }
  193. // clang-format on
  194. //! test for multi-thread OC parallel
  195. for (size_t i : {9, 63}) {
  196. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  197. cur_param.pad_h = cur_param.pad_w = 1;
  198. args.emplace_back(cur_param, TensorShape{1, 8, i, i},
  199. TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1});
  200. args.emplace_back(cur_param, TensorShape{2, 8, i, i},
  201. TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1});
  202. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  203. args.emplace_back(cur_param, TensorShape{2, 2 * 8, i, i},
  204. TensorShape{2, 128, 8, 3, 3},
  205. TensorShape{1, 2 * 128, 1, 1});
  206. }
  207. return args;
  208. }
  209. std::vector<TestArg> get_quantized_winograd_mk_packed_args(
  210. size_t pack_size, bool compute_float32) {
  211. std::vector<TestArg> args;
  212. param::ConvBias cur_param;
  213. using NLMode = param::ConvBias::NonlineMode;
  214. // clang-format off
  215. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  216. for (size_t ic : {pack_size, 2 * pack_size}) {
  217. for (size_t oc : {pack_size, 2 * pack_size}) {
  218. for (size_t i : {9, 63}) {
  219. cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION;
  220. cur_param.nonlineMode = nlmode;
  221. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  222. cur_param.pad_h = cur_param.pad_w = 1;
  223. if(compute_float32){
  224. cur_param.compute_mode = param::ConvBias::ComputeMode::FLOAT32;
  225. }
  226. args.emplace_back(cur_param, TensorShape{1, pack_size, 3, 3},
  227. TensorShape{pack_size, pack_size, 3, 3},
  228. TensorShape{1, pack_size, 1, 1});
  229. //! no bias
  230. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  231. TensorShape{oc, ic, 3, 3}, TensorShape{});
  232. //! bias
  233. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  234. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  235. //! bias channel
  236. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  237. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  238. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  239. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  240. TensorShape{2, oc, ic, 3, 3},
  241. TensorShape{2, 2 * oc, i, i});
  242. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  243. TensorShape{2, oc, ic, 3, 3},
  244. TensorShape{1, 2 * oc, 1, 1});
  245. } } } }
  246. // clang-format on
  247. //! test for multi-thread OC parallel
  248. for (size_t i : {9, 63}) {
  249. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  250. cur_param.pad_h = cur_param.pad_w = 1;
  251. args.emplace_back(cur_param, TensorShape{1, 8, i, i},
  252. TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1});
  253. args.emplace_back(cur_param, TensorShape{2, 8, i, i},
  254. TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1});
  255. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  256. args.emplace_back(cur_param, TensorShape{2, 2 * 8, i, i},
  257. TensorShape{2, 128, 8, 3, 3},
  258. TensorShape{1, 2 * 128, 1, 1});
  259. }
  260. return args;
  261. }
  262. std::vector<TestArg> get_quantized_args_with_nlmode(
  263. param::ConvBias::NonlineMode nlmode) {
  264. std::vector<TestArg> args;
  265. param::ConvBias cur_param;
  266. // clang-format off
  267. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION,
  268. param::ConvBias::Mode::CONVOLUTION}) {
  269. for (size_t ic : {1, 2, 3, 4, 5, 7}) {
  270. for (size_t oc : {1, 2, 3, 4, 5, 7}) {
  271. for (size_t i : {9, 63}) {
  272. cur_param.mode = mode;
  273. cur_param.nonlineMode = nlmode;
  274. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  275. cur_param.pad_h = cur_param.pad_w = 1;
  276. //! no bias
  277. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  278. TensorShape{oc, ic, 3, 3}, TensorShape{});
  279. //! bias
  280. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  281. TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i});
  282. //! bias channel
  283. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  284. TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1});
  285. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  286. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  287. TensorShape{2, oc, ic, 3, 3},
  288. TensorShape{2, 2 * oc, i, i});
  289. args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i},
  290. TensorShape{2, oc, ic, 3, 3},
  291. TensorShape{1, 2 * oc, 1, 1});
  292. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  293. cur_param.pad_h = cur_param.pad_w = 0;
  294. args.emplace_back(cur_param, TensorShape{2, ic, i, i},
  295. TensorShape{oc, ic, 1, 1}, TensorShape{});
  296. } } } }
  297. // clang-format on
  298. return args;
  299. }
  300. std::vector<TestArg> get_quantized_args() {
  301. using NLMode = param::ConvBias::NonlineMode;
  302. auto arg_p1 = get_quantized_args_with_nlmode(NLMode::IDENTITY),
  303. arg_p2 = get_quantized_args_with_nlmode(NLMode::RELU),
  304. arg_p3 = get_quantized_args_with_nlmode(NLMode::H_SWISH);
  305. std::vector<TestArg> args;
  306. args.insert(args.end(), arg_p1.begin(), arg_p1.end());
  307. args.insert(args.end(), arg_p2.begin(), arg_p2.end());
  308. args.insert(args.end(), arg_p3.begin(), arg_p3.end());
  309. return args;
  310. }
  311. std::vector<TestArg> get_int8_nchw4_args(size_t kernel_size) {
  312. std::vector<TestArg> args;
  313. param::ConvBias cur_param;
  314. using NLMode = param::ConvBias::NonlineMode;
  315. // clang-format off
  316. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  317. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  318. for (size_t b : {64, 16}) {
  319. for (size_t ic : {16, 32}) {
  320. for (size_t oc : {16, 32}) {
  321. for (size_t h : {8}) {
  322. for (size_t w : {8, 11}) {
  323. for (int p : {0, static_cast<int>(kernel_size / 2)}) {
  324. for (size_t s : {2, 1}) {
  325. if (kernel_size == 7) {
  326. b = std::min(b, 32_z);
  327. }
  328. size_t f = kernel_size;
  329. cur_param.mode = mode;
  330. cur_param.nonlineMode = nlmode;
  331. cur_param.format = param::ConvBias::Format::NCHW4;
  332. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  333. cur_param.pad_h = cur_param.pad_w = p;
  334. cur_param.stride_h = cur_param.stride_w = s;
  335. //! bias channel
  336. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  337. TensorShape{oc, ic / 4, f, f, 4},
  338. TensorShape{1, oc / 4, 1, 1, 4});
  339. } } } } } } } } }
  340. // clang-format on
  341. return args;
  342. }
  343. std::vector<TestArg> get_int8_nchw44_args(size_t kernel_size, size_t pack_size,
  344. bool compute_float32,
  345. bool group_mode) {
  346. std::vector<TestArg> args;
  347. param::ConvBias cur_param;
  348. megdnn_assert(pack_size > 0, "not support pack_size");
  349. megdnn_assert(kernel_size > 0, "not support kernel_size");
  350. using NLMode = param::ConvBias::NonlineMode;
  351. //// clang-format off
  352. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  353. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  354. for (size_t b : {1,2}) {
  355. for (size_t ic : {8,16}) {
  356. for (size_t oc : {8,16}) {
  357. for (size_t h : {9,23}) {
  358. for (size_t w : {9,23}) {
  359. for (int p : {0, static_cast<int>(kernel_size / 2)}) {
  360. for (size_t s : {1}) {
  361. if (kernel_size == 7) {
  362. b = std::min(b, 32_z);
  363. }
  364. size_t f = kernel_size;
  365. cur_param.mode = mode;
  366. cur_param.nonlineMode = nlmode;
  367. if (pack_size == 4){
  368. cur_param.format = param::ConvBias::Format::NCHW44;
  369. } else if(pack_size == 8){
  370. cur_param.format = param::ConvBias::Format::NCHW88;
  371. }
  372. if(compute_float32){
  373. cur_param.compute_mode =
  374. param::ConvBias::ComputeMode::FLOAT32;
  375. }
  376. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  377. cur_param.pad_h = cur_param.pad_w = p;
  378. cur_param.stride_h = cur_param.stride_w = s;
  379. if (!group_mode) {
  380. //! no bias
  381. args.emplace_back(cur_param,
  382. TensorShape{b, ic / pack_size, h, w, pack_size},
  383. TensorShape{oc / pack_size, ic / pack_size, f, f,
  384. pack_size, pack_size},
  385. TensorShape{});
  386. //! bias channel
  387. args.emplace_back(cur_param,
  388. TensorShape{b, ic / pack_size, h, w, pack_size},
  389. TensorShape{oc / pack_size, ic / pack_size, f, f,
  390. pack_size, pack_size},
  391. TensorShape{1, oc / pack_size, 1, 1, pack_size});
  392. //! bias
  393. args.emplace_back(
  394. cur_param, TensorShape{b, ic / pack_size, h, w, pack_size},
  395. TensorShape{oc / pack_size, ic / pack_size, f, f, pack_size,
  396. pack_size},
  397. TensorShape{b, oc / pack_size, (h - f + 2 * p) / s + 1,
  398. (w - f + 2 * p) / s + 1, pack_size});
  399. } else {
  400. cur_param.sparse = param::ConvBias::Sparse::GROUP;
  401. args.emplace_back(
  402. cur_param,
  403. TensorShape{2, 2 * ic / pack_size, h, w, pack_size},
  404. TensorShape{2, oc / pack_size, ic / pack_size, 3, 3,
  405. pack_size, pack_size},
  406. TensorShape{2, 2 * oc / pack_size, (h - f + 2 * p) / s + 1,
  407. (w - f + 2 * p) / s + 1, pack_size});
  408. args.emplace_back(
  409. cur_param,
  410. TensorShape{2, 2 * ic / pack_size, h, w, pack_size},
  411. TensorShape{2, oc / pack_size, ic / pack_size, f, f,
  412. pack_size, pack_size},
  413. TensorShape{1, 2 * oc / pack_size, 1, 1, pack_size});
  414. args.emplace_back(
  415. cur_param,
  416. TensorShape{2, 2 * ic / pack_size, h, w, pack_size},
  417. TensorShape{2, oc / pack_size, ic / pack_size, f, f,
  418. pack_size, pack_size},
  419. TensorShape{});
  420. }
  421. } } } } } } } } }
  422. // clang-format on
  423. return args;
  424. }
  425. std::vector<TestArg> get_int8_nchw4_args_check_bounds(size_t kernel_size) {
  426. std::vector<TestArg> args;
  427. param::ConvBias cur_param;
  428. using NLMode = param::ConvBias::NonlineMode;
  429. // clang-format off
  430. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  431. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  432. for (size_t b : {7, 8, 4, 1}) {
  433. for (size_t ic : {16, 32}) {
  434. for (size_t oc : {16, 8, 4}) {
  435. for (size_t h : {8}) {
  436. for (size_t w : {8, 11}) {
  437. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  438. for (size_t s : {1, 2}) {
  439. size_t f = kernel_size;
  440. cur_param.mode = mode;
  441. cur_param.nonlineMode = nlmode;
  442. cur_param.format = param::ConvBias::Format::NCHW4;
  443. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  444. cur_param.pad_h = cur_param.pad_w = p;
  445. cur_param.stride_h = cur_param.stride_w = s;
  446. //! bias channel
  447. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  448. TensorShape{oc, ic / 4, f, f, 4},
  449. TensorShape{1, oc / 4, 1, 1, 4});
  450. } } } } } } } } }
  451. // clang-format on
  452. return args;
  453. }
  454. std::vector<TestArg> get_int8_nchw4_args_small_batch(size_t kernel_size) {
  455. std::vector<TestArg> args;
  456. param::ConvBias cur_param;
  457. using NLMode = param::ConvBias::NonlineMode;
  458. // clang-format off
  459. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  460. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  461. for (size_t b : {12, 8, 4}) {
  462. for (size_t ic : {16, 32}) {
  463. for (size_t oc : {16, 8, 4}) {
  464. for (size_t h : {8}) {
  465. for (size_t w : {8, 9, 10, 11, 12, 13, 14, 15, 16}) {
  466. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  467. for (size_t s : {1, 2}) {
  468. size_t f = kernel_size;
  469. cur_param.mode = mode;
  470. cur_param.nonlineMode = nlmode;
  471. cur_param.format = param::ConvBias::Format::NCHW4;
  472. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  473. cur_param.pad_h = cur_param.pad_w = p;
  474. cur_param.stride_h = cur_param.stride_w = s;
  475. //! bias channel
  476. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  477. TensorShape{oc, ic / 4, f, f, 4},
  478. TensorShape{1, oc / 4, 1, 1, 4});
  479. } } } } } } } } }
  480. // clang-format on
  481. return args;
  482. }
  483. std::vector<TestArg> get_int8_nchw4_small_channel_args(size_t kernel_size) {
  484. std::vector<TestArg> args;
  485. param::ConvBias cur_param;
  486. using NLMode = param::ConvBias::NonlineMode;
  487. // clang-format off
  488. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  489. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  490. for (size_t b : {64, 16}) {
  491. for (size_t ic : {4, 12}) {
  492. for (size_t oc : {128, 32}) {
  493. for (size_t h : {8}) {
  494. for (size_t w : {8, 11}) {
  495. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  496. for (size_t s : {1, 2}) {
  497. size_t f = kernel_size;
  498. cur_param.mode = mode;
  499. cur_param.nonlineMode = nlmode;
  500. cur_param.format =
  501. param::ConvBias::Format::NCHW4;
  502. cur_param.sparse =
  503. param::ConvBias::Sparse::DENSE;
  504. cur_param.pad_h = cur_param.pad_w = p;
  505. cur_param.stride_h =
  506. cur_param.stride_w = s;
  507. //! bias channel
  508. args.emplace_back(
  509. cur_param,
  510. TensorShape{b, ic / 4, h, w, 4},
  511. TensorShape{oc, ic / 4, f, f,
  512. 4},
  513. TensorShape{1, oc / 4, 1, 1,
  514. 4});
  515. } } } } } } } } }
  516. // clang-format on
  517. return args;
  518. }
  519. std::vector<TestArg> get_int8_nchw4_small_channel_args_check_bounds(
  520. size_t kernel_size) {
  521. std::vector<TestArg> args;
  522. param::ConvBias cur_param;
  523. using NLMode = param::ConvBias::NonlineMode;
  524. // clang-format off
  525. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  526. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  527. for (size_t b : {8, 7, 4, 1}) {
  528. for (size_t ic : {4, 12}) {
  529. for (size_t oc : {16, 8, 12, 4}) {
  530. for (size_t h : {8}) {
  531. for (size_t w : {8, 11}) {
  532. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  533. for (size_t s : {1, 2}) {
  534. size_t f = kernel_size;
  535. cur_param.mode = mode;
  536. cur_param.nonlineMode = nlmode;
  537. cur_param.format = param::ConvBias::Format::NCHW4;
  538. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  539. cur_param.pad_h = cur_param.pad_w = p;
  540. cur_param.stride_h = cur_param.stride_w = s;
  541. //! bias channel
  542. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  543. TensorShape{oc, ic / 4, f, f, 4},
  544. TensorShape{1, oc / 4, 1, 1, 4});
  545. } } } } } } } } }
  546. // clang-format on
  547. return args;
  548. }
  549. std::vector<TestArg> get_int8_chwn4_args(size_t kernel_size) {
  550. auto args = get_int8_nchw4_args(kernel_size);
  551. for (auto& arg : args) {
  552. convert_arg_from_nchw4_to_chwn4(arg);
  553. }
  554. return args;
  555. }
  556. std::vector<TestArg> get_int8_chwn4_args_check_bounds(size_t kernel_size) {
  557. auto args = get_int8_nchw4_args_check_bounds(kernel_size);
  558. for (auto& arg : args) {
  559. convert_arg_from_nchw4_to_chwn4(arg);
  560. }
  561. return args;
  562. }
  563. std::vector<TestArg> get_int8_chwn4_small_channel_args(size_t kernel_size) {
  564. auto args = get_int8_nchw4_small_channel_args(kernel_size);
  565. for (auto& arg : args) {
  566. convert_arg_from_nchw4_to_chwn4(arg);
  567. }
  568. return args;
  569. }
  570. std::vector<TestArg> get_int8_chwn4_small_channel_args_check_bounds(
  571. size_t kernel_size) {
  572. auto args = get_int8_nchw4_small_channel_args_check_bounds(kernel_size);
  573. for (auto& arg : args) {
  574. convert_arg_from_nchw4_to_chwn4(arg);
  575. }
  576. return args;
  577. }
  578. std::vector<TestArg> get_int8_chwn4_args_small_batch(size_t kernel_size) {
  579. auto args = get_int8_nchw4_args_small_batch(kernel_size);
  580. for (auto& arg : args) {
  581. convert_arg_from_nchw4_to_chwn4(arg);
  582. }
  583. return args;
  584. }
  585. std::vector<TestArg> get_int8_nchw4_tensorcore_args(size_t kernel_size) {
  586. std::vector<TestArg> args;
  587. param::ConvBias cur_param;
  588. using NLMode = param::ConvBias::NonlineMode;
  589. // clang-format off
  590. for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) {
  591. for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) {
  592. size_t b = 64, oc = 128;
  593. for (size_t ic : {32, 64}) {
  594. for (size_t h : {8}) {
  595. for (size_t w : {11}) {
  596. for (int p : {static_cast<int>(kernel_size / 2), 0}) {
  597. for (size_t s : {1, 2}) {
  598. size_t f = kernel_size;
  599. cur_param.mode = mode;
  600. cur_param.nonlineMode = nlmode;
  601. cur_param.format = param::ConvBias::Format::NCHW4;
  602. cur_param.sparse = param::ConvBias::Sparse::DENSE;
  603. cur_param.pad_h = cur_param.pad_w = p;
  604. cur_param.stride_h = cur_param.stride_w = s;
  605. //! bias channel
  606. args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4},
  607. TensorShape{oc, ic / 4, f, f, 4},
  608. TensorShape{1, oc / 4, 1, 1, 4});
  609. } } } } }
  610. } }
  611. // clang-format on
  612. return args;
  613. }
  614. std::vector<TestArg> get_int8_chwn4_tensorcore_args(size_t kernel_size) {
  615. auto args = get_int8_nchw4_tensorcore_args(kernel_size);
  616. for (auto& arg : args) {
  617. convert_arg_from_nchw4_to_chwn4(arg);
  618. }
  619. return args;
  620. }
  621. void check_conv_bias(DType src_dtype, DType filter_dtype, DType bias_dtype,
  622. DType dst_dtype, Handle* handle, const char* algo,
  623. param::ConvBias::Format format,
  624. const std::vector<TestArg>& args) {
  625. megdnn_assert(src_dtype.enumv() == filter_dtype.enumv());
  626. Checker<ConvBiasForward> checker(handle);
  627. if (algo) {
  628. checker.set_before_exec_callback(
  629. ConvBiasAlgoChecker<ConvBiasForward>(algo));
  630. }
  631. std::unique_ptr<RNG> rng;
  632. std::unique_ptr<RNG> bias_rng;
  633. std::unique_ptr<RNG> const_rng;
  634. // TODO: check range of rng
  635. if (src_dtype.enumv() == DTypeEnum::QuantizedS8) {
  636. rng = std::make_unique<UniformIntRNG>(-3, 3);
  637. const_rng = std::make_unique<UniformIntRNG>(1, 1);
  638. megdnn_assert(bias_dtype.enumv() == DTypeEnum::QuantizedS32);
  639. bias_rng = std::make_unique<UniformIntRNG>(-50, 50);
  640. checker.set_epsilon(1 + 1e-3)
  641. .set_max_avg_error(1e-1)
  642. .set_max_avg_biased_error(1e-1);
  643. } else if (src_dtype.enumv() == DTypeEnum::Float16) {
  644. rng = std::make_unique<NormalRNG>(2.f);
  645. megdnn_assert(bias_dtype.enumv() == DTypeEnum::Float16);
  646. bias_rng = std::make_unique<NormalRNG>(2.f);
  647. checker.set_epsilon(1e-2);
  648. } else if (src_dtype.enumv() == DTypeEnum::Float32) {
  649. rng = std::make_unique<NormalRNG>(2.f);
  650. megdnn_assert(bias_dtype.enumv() == DTypeEnum::Float32);
  651. bias_rng = std::make_unique<NormalRNG>(2.f);
  652. }
  653. megdnn_assert(rng != nullptr && bias_rng != nullptr);
  654. checker.set_rng(0, rng.get())
  655. .set_rng(1, rng.get())
  656. .set_rng(2, rng.get())
  657. .set_rng(3, rng.get());
  658. if (args.empty()) {
  659. std::vector<TestArg> default_args;
  660. using Param = param::ConvBias;
  661. using Format = Param::Format;
  662. if (format == Format::NCHW4) {
  663. default_args = get_int8_nchw4_args(3);
  664. } else if (format == Format::CHWN4) {
  665. default_args = get_int8_chwn4_args(3);
  666. }
  667. for (auto&& arg : default_args) {
  668. checker.set_dtype(0, src_dtype)
  669. .set_dtype(1, filter_dtype)
  670. .set_dtype(2, bias_dtype)
  671. .set_dtype(4, dst_dtype)
  672. .set_param(arg.param)
  673. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  674. }
  675. } else {
  676. for (auto&& arg : args) {
  677. checker.set_dtype(0, src_dtype)
  678. .set_dtype(1, filter_dtype)
  679. .set_dtype(2, bias_dtype)
  680. .set_dtype(4, dst_dtype)
  681. .set_param(arg.param)
  682. .execs({arg.src, arg.filter, arg.bias, {}, {}});
  683. }
  684. }
  685. }
  686. #if MEGDNN_WITH_BENCHMARK
  687. std::vector<conv_bias::TestArg> get_winograd_benchmark_args(size_t kernel,
  688. size_t pack_size) {
  689. std::vector<conv_bias::TestArg> args;
  690. auto pack = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  691. size_t p) {
  692. if (ic % pack_size != 0 || oc % pack_size != 0)
  693. return;
  694. if (w + 2 * p < kernel || h + 2 * p < kernel)
  695. return;
  696. param::ConvBias param;
  697. param.stride_h = 1;
  698. param.stride_w = 1;
  699. param.pad_h = p;
  700. param.pad_w = p;
  701. args.push_back(conv_bias::TestArg{param,
  702. TensorShape{1, ic, h, w},
  703. TensorShape{oc, ic, kernel, kernel},
  704. {1, oc, 1, 1}});
  705. };
  706. for (size_t ic : {8, 16, 32, 64}) {
  707. for (size_t oc : {8, 16, 32, 64}) {
  708. pack(oc, ic, 56, 56, kernel, kernel / 2);
  709. pack(oc, ic, 128, 128, kernel, kernel / 2);
  710. pack(oc, ic, 256, 256, kernel, kernel / 2);
  711. }
  712. }
  713. //! conv in vgg16
  714. pack(512, 512, 15, 15, kernel, kernel / 2);
  715. pack(512, 256, 15, 15, kernel, kernel / 2);
  716. pack(256, 256, 29, 29, kernel, kernel / 2);
  717. pack(256, 128, 29, 29, kernel, kernel / 2);
  718. pack(128, 128, 57, 57, kernel, kernel / 2);
  719. pack(128, 64, 57, 57, kernel, kernel / 2);
  720. pack(64, 64, 123, 123, kernel, kernel / 2);
  721. pack(64, 24, 123, 123, kernel, kernel / 2);
  722. pack(24, 24, 224, 224, kernel, kernel / 2);
  723. //! conv in resnet18
  724. pack(64, 64, 56, 56, kernel, kernel / 2);
  725. pack(128, 128, 28, 28, kernel, kernel / 2);
  726. pack(256, 256, 14, 14, kernel, kernel / 2);
  727. pack(512, 512, 7, 7, kernel, kernel / 2);
  728. return args;
  729. }
  730. void benchmark_winograd(const char* algo_name, Handle* handle, size_t kernel,
  731. size_t pack_size) {
  732. auto&& args = get_winograd_benchmark_args(kernel, pack_size);
  733. using namespace conv_bias;
  734. constexpr size_t RUN = 10;
  735. Benchmarker<Convolution> benchmark(handle);
  736. benchmark.set_display(false);
  737. benchmark.set_times(RUN);
  738. Benchmarker<ConvBias> benchmark_winograd(handle);
  739. benchmark_winograd.set_display(false);
  740. benchmark_winograd.set_times(RUN);
  741. for (auto&& arg : args) {
  742. TensorLayout dst_layout;
  743. auto opr = handle->create_operator<ConvBias>();
  744. opr->param() = arg.param;
  745. opr->deduce_layout({arg.src, dtype::Float32()},
  746. {arg.filter, dtype::Float32()},
  747. {arg.bias, dtype::Float32()}, {}, dst_layout);
  748. //! dst.nr_elems * IC * FH * FW * 2
  749. float computations = dst_layout.total_nr_elems() * arg.filter[1] *
  750. arg.filter[2] * arg.filter[3] * 2.0 /
  751. (1024 * 1024 * 1024) * 1e3;
  752. param::Convolution conv_param;
  753. conv_param.pad_h = arg.param.pad_h;
  754. conv_param.pad_w = arg.param.pad_w;
  755. conv_param.stride_h = arg.param.stride_h;
  756. conv_param.stride_w = arg.param.stride_w;
  757. auto used = benchmark.set_param(conv_param)
  758. .exec({arg.src, arg.filter, {}}) /
  759. RUN;
  760. benchmark_winograd.set_param(arg.param);
  761. auto used_winograd =
  762. algo_benchmark<ConvBias>(benchmark_winograd,
  763. {arg.src, arg.filter, {}, {}, {}},
  764. algo_name) /
  765. RUN;
  766. printf("%s %s: normal: %f ms %f Gflops winograd: %f ms %f GFlops "
  767. "speedup: "
  768. "%f\n",
  769. arg.src.to_string().c_str(), arg.filter.to_string().c_str(),
  770. used, computations / used, used_winograd,
  771. computations / used_winograd, used / used_winograd);
  772. }
  773. }
  774. #endif // MEGDNN_WITH_BENCHMARK
  775. std::vector<conv_bias::TestArg> get_conv_bias_args(
  776. std::vector<size_t> kernel, size_t stride, bool no_pad, bool no_bias,
  777. bool no_nonlinemode, bool quantized_nlmod, bool only_broadcast_bias) {
  778. using namespace conv_bias;
  779. using Param = param::ConvBias;
  780. using NLMode = param::ConvBias::NonlineMode;
  781. std::vector<TestArg> args;
  782. auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h,
  783. size_t kernel, size_t stride, NLMode nlmode) {
  784. Param param;
  785. param.stride_h = stride;
  786. param.stride_w = stride;
  787. if (!no_pad) {
  788. param.pad_h = kernel / 2;
  789. param.pad_w = kernel / 2;
  790. } else {
  791. param.pad_h = 0;
  792. param.pad_w = 0;
  793. }
  794. param.nonlineMode = nlmode;
  795. args.emplace_back(param, TensorShape{n, ic, h, w},
  796. TensorShape{oc, ic, kernel, kernel}, TensorShape{});
  797. if (!no_bias) {
  798. args.emplace_back(param, TensorShape{n, ic, h, w},
  799. TensorShape{oc, ic, kernel, kernel},
  800. TensorShape{1, oc, 1, 1});
  801. if (!only_broadcast_bias) {
  802. args.emplace_back(
  803. param, TensorShape{n, ic, h, w},
  804. TensorShape{oc, ic, kernel, kernel},
  805. TensorShape{
  806. n, oc,
  807. (h + 2 * param.pad_h - kernel) / stride + 1,
  808. (w + 2 * param.pad_h - kernel) / stride + 1});
  809. }
  810. }
  811. param.sparse = param::ConvBias::Sparse::GROUP;
  812. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  813. TensorShape{2, oc, ic, kernel, kernel},
  814. TensorShape{});
  815. if (!no_bias) {
  816. if (!only_broadcast_bias) {
  817. args.emplace_back(
  818. param, TensorShape{n, 2 * ic, h, w},
  819. TensorShape{2, oc, ic, kernel, kernel},
  820. TensorShape{
  821. n, 2 * oc,
  822. (h + param.pad_h * 2 - kernel) / stride + 1,
  823. (w + param.pad_w * 2 - kernel) / stride + 1});
  824. }
  825. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  826. TensorShape{2, oc, ic, kernel, kernel},
  827. TensorShape{1, 2 * oc, 1, 1});
  828. }
  829. };
  830. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  831. if (!no_nonlinemode) {
  832. nonlinemode.emplace_back(NLMode::RELU);
  833. nonlinemode.emplace_back(NLMode::H_SWISH);
  834. if (!quantized_nlmod) {
  835. nonlinemode.emplace_back(NLMode::SIGMOID);
  836. }
  837. }
  838. for (size_t n : {1, 2}) {
  839. for (auto nlmode : nonlinemode) {
  840. for (size_t ic : {1, 3, 7}) {
  841. for (size_t oc : {1, 3, 7}) {
  842. for (size_t size : {8, 16, 20}) {
  843. for (size_t kern : kernel) {
  844. pack(n, oc, ic, size, size, kern, stride, nlmode);
  845. }
  846. }
  847. }
  848. }
  849. }
  850. }
  851. return args;
  852. }
  853. std::vector<megdnn::test::conv_bias::TestArg> get_conv_bias_1x1_args(
  854. bool no_bias, bool no_nonlinemode, bool quantized_nlmod,
  855. bool only_broadcast_bias) {
  856. using namespace conv_bias;
  857. using Param = param::ConvBias;
  858. using NLMode = param::ConvBias::NonlineMode;
  859. using CONVMode = param::ConvBias::Mode;
  860. std::vector<TestArg> args;
  861. auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h,
  862. size_t stride, NLMode nlmode, CONVMode convmode) {
  863. Param param;
  864. param.stride_h = stride;
  865. param.stride_w = stride;
  866. param.pad_h = 0;
  867. param.pad_w = 0;
  868. param.mode = convmode;
  869. param.nonlineMode = nlmode;
  870. args.emplace_back(param, TensorShape{n, ic, h, w},
  871. TensorShape{oc, ic, 1, 1}, TensorShape{});
  872. if (!no_bias) {
  873. args.emplace_back(param, TensorShape{n, ic, h, w},
  874. TensorShape{oc, ic, 1, 1},
  875. TensorShape{1, oc, 1, 1});
  876. if (!only_broadcast_bias) {
  877. args.emplace_back(param, TensorShape{n, ic, h, w},
  878. TensorShape{oc, ic, 1, 1},
  879. TensorShape{n, oc, (h - 1) / stride + 1,
  880. (w - 1) / stride + 1});
  881. }
  882. }
  883. param.sparse = param::ConvBias::Sparse::GROUP;
  884. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  885. TensorShape{2, oc, ic, 1, 1}, TensorShape{});
  886. if (!no_bias) {
  887. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  888. TensorShape{2, oc, ic, 1, 1},
  889. TensorShape{1, 2 * oc, 1, 1});
  890. if (!only_broadcast_bias) {
  891. args.emplace_back(param, TensorShape{n, 2 * ic, h, w},
  892. TensorShape{2, oc, ic, 1, 1},
  893. TensorShape{n, 2 * oc, (h - 1) / stride + 1,
  894. (w - 1) / stride + 1});
  895. }
  896. }
  897. };
  898. std::vector<NLMode> nonlinemode = {NLMode::IDENTITY};
  899. if (!no_nonlinemode) {
  900. nonlinemode.emplace_back(NLMode::RELU);
  901. nonlinemode.emplace_back(NLMode::H_SWISH);
  902. if (!quantized_nlmod) {
  903. nonlinemode.emplace_back(NLMode::SIGMOID);
  904. }
  905. }
  906. std::vector<CONVMode> convmodes{param::ConvBias::Mode::CONVOLUTION,
  907. param::ConvBias::Mode::CROSS_CORRELATION};
  908. for (size_t n : {1, 2})
  909. for (size_t oc : {1, 9, 33})
  910. for (size_t ic : {1, 16, 64})
  911. for (size_t size : {7, 14, 28})
  912. for (auto nlmode : nonlinemode)
  913. for (auto convmode : convmodes) {
  914. pack(n, oc, ic, size, size, 1, nlmode, convmode);
  915. }
  916. return args;
  917. }
  918. void check_conv_bias(std::vector<conv_bias::TestArg> args, Handle* handle,
  919. const char* algo_name) {
  920. using namespace conv_bias;
  921. Checker<ConvBias> checker(handle);
  922. checker.set_before_exec_callback(
  923. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  924. for (auto&& arg : args) {
  925. checker.set_param(arg.param).execs(
  926. {arg.src, arg.filter, arg.bias, {}, {}});
  927. }
  928. }
  929. void checker_conv_bias_int8x8x16(std::vector<conv_bias::TestArg> args,
  930. Handle* handle, const char* algo_name) {
  931. using namespace conv_bias;
  932. Checker<ConvBias> checker(handle);
  933. checker.set_before_exec_callback(
  934. conv_bias::ConvBiasAlgoChecker<ConvBias>(algo_name));
  935. checker.set_dtype(0, dtype::Int8());
  936. checker.set_dtype(1, dtype::Int8());
  937. checker.set_dtype(2, dtype::Int16());
  938. checker.set_dtype(4, dtype::Int16());
  939. for (auto&& arg : args) {
  940. checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}});
  941. }
  942. }
  943. void winograd_algo_extra_impl(const TensorNDArray& tensors, uint32_t m,
  944. param::ConvBias param, Handle* handle,
  945. param::MatrixMul::Format format) {
  946. megdnn_assert(param.format == param::ConvBias::Format::NCHW ||
  947. param.format == param::ConvBias::Format::NCHW44);
  948. auto winograd_preprocess_opr =
  949. handle->create_operator<WinogradFilterPreprocess>();
  950. winograd_preprocess_opr->param().output_block_size = m;
  951. winograd_preprocess_opr->param().format = format;
  952. winograd_preprocess_opr->param().compute_mode =
  953. param.compute_mode;
  954. TensorLayout filter_transform_layout;
  955. winograd_preprocess_opr->deduce_layout(tensors[1].layout,
  956. filter_transform_layout);
  957. size_t winograd_preprocess_workspace_in_bytes =
  958. winograd_preprocess_opr->get_workspace_in_bytes(
  959. tensors[1].layout, filter_transform_layout);
  960. auto conv_bias_opr = handle->create_operator<ConvBias>();
  961. conv_bias_opr->param() = param;
  962. if (param.format == param::ConvBias::Format::NCHW) {
  963. conv_bias_opr->param().format = param::ConvBias::Format::NCHW_WINOGRAD;
  964. } else {
  965. conv_bias_opr->param().format =
  966. param::ConvBias::Format::NCHW44_WINOGRAD;
  967. }
  968. conv_bias_opr->param().output_block_size = m;
  969. size_t conv_bias_workspace_in_bytes = conv_bias_opr->get_workspace_in_bytes(
  970. tensors[0].layout, filter_transform_layout, tensors[2].layout,
  971. tensors[3].layout, tensors[4].layout, nullptr);
  972. WorkspaceBundle wb(nullptr, {filter_transform_layout.span().dist_byte(),
  973. conv_bias_workspace_in_bytes,
  974. winograd_preprocess_workspace_in_bytes});
  975. wb.set(malloc(wb.total_size_in_bytes()));
  976. TensorND filter_transform_tensor(wb.get(0),
  977. std::move(filter_transform_layout));
  978. winograd_preprocess_opr->exec(tensors[1], filter_transform_tensor,
  979. wb.get_workspace(2));
  980. conv_bias_opr->exec(tensors[0], filter_transform_tensor, tensors[2],
  981. tensors[3], tensors[4], nullptr, wb.get_workspace(1));
  982. free(wb.ptr());
  983. };
  984. } // namespace conv_bias
  985. } // namespace test
  986. } // namespace megdnn
  987. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台