You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convolution.cpp 20 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. #include "test/common/convolution.h"
  2. #include "hcc_detail/hcc_defs_prologue.h"
  3. #include "megdnn/opr_param_defs.h"
  4. #include "megdnn/oprs.h"
  5. #include "test/common/benchmarker.h"
  6. #include "test/common/checker.h"
  7. #include "test/common/rng.h"
  8. #include "test/common/tensor.h"
  9. #include "test/common/workspace_wrapper.h"
  10. #include "test/rocm/benchmarker.h"
  11. #include "test/rocm/fixture.h"
  12. #include "src/common/utils.h"
  13. #include "src/rocm/utils.h"
  14. namespace megdnn {
  15. namespace test {
  16. namespace convolution {
  17. std::vector<TestArg> get_args_0() {
  18. std::vector<TestArg> args, tmp_args;
  19. #define ADD_ARGS(NAME) \
  20. tmp_args = get_args_##NAME(); \
  21. args.insert(args.end(), tmp_args.begin(), tmp_args.end());
  22. ADD_ARGS(common)
  23. ADD_ARGS(padding)
  24. ADD_ARGS(large_channel)
  25. ADD_ARGS(1x1)
  26. ADD_ARGS(large_filter)
  27. ADD_ARGS(exhaustive_search)
  28. ADD_ARGS(4x4)
  29. ADD_ARGS(large_channels)
  30. ADD_ARGS(x86_direct_case_2)
  31. ADD_ARGS(cudnn_5_1_failures)
  32. ADD_ARGS(x86_winograd_algorithm)
  33. ADD_ARGS(BRAIN_481)
  34. #undef ADD_ARGS
  35. return args;
  36. }
  37. std::vector<TestArg> get_args_1() {
  38. return get_args_fallback_templated_impl();
  39. }
  40. std::vector<TestArg> get_args_2() {
  41. return get_args_fallback_non_templated_impl();
  42. }
  43. std::vector<TestArg> get_group_conv_args() {
  44. std::vector<TestArg> args;
  45. for (size_t batch_size : {2}) {
  46. for (size_t ih : {23}) {
  47. for (size_t iw : {ih + 1}) {
  48. for (size_t icpg : {2, 4, 8}) {
  49. for (size_t ocpg : {4, 8}) {
  50. for (size_t fh : {3, 5, 7}) {
  51. for (size_t fw : {fh, fh + 1}) {
  52. for (size_t ph : {0_z, size_t{fw / 2}}) {
  53. for (size_t sh : {1, 2}) {
  54. for (size_t dh : {1, 2}) {
  55. param::Convolution param;
  56. size_t groups = 2;
  57. param.sparse =
  58. param::Convolution::Sparse::GROUP;
  59. param.mode = param::Convolution::Mode::
  60. CROSS_CORRELATION;
  61. param.stride_h = param.stride_w = sh;
  62. param.pad_h = param.pad_w = ph;
  63. param.dilate_h = param.dilate_w = dh;
  64. args.emplace_back(
  65. param,
  66. TensorShape{
  67. batch_size, icpg * groups,
  68. ih, iw},
  69. TensorShape{
  70. groups, ocpg, icpg, fh,
  71. fw});
  72. }
  73. }
  74. }
  75. }
  76. }
  77. }
  78. }
  79. }
  80. }
  81. }
  82. return args;
  83. }
  84. } // namespace convolution
  85. TEST_F(ROCM, CONV_GROUP) {
  86. megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false);
  87. using namespace convolution;
  88. std::vector<TestArg> args = get_group_conv_args();
  89. Checker<ConvolutionForward> checker(handle_rocm());
  90. NormalRNG default_rng;
  91. for (auto&& arg : args) {
  92. checker.set_dtype(0, dtype::Float32())
  93. .set_dtype(1, dtype::Float32())
  94. .set_rng(0, &default_rng)
  95. .set_rng(1, &default_rng)
  96. .set_epsilon(1e-3)
  97. .set_param(arg.param)
  98. .execs({arg.src, arg.filter, {}});
  99. }
  100. }
  101. TEST_F(ROCM, CONV_CHANNWISE) {
  102. megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false);
  103. using namespace convolution;
  104. std::vector<TestArg> args = get_chanwise_args();
  105. Checker<ConvolutionForward> checker(handle_rocm());
  106. NormalRNG default_rng;
  107. for (auto&& arg : args) {
  108. using Mode = param::Convolution::Mode;
  109. //! non xcorr not supported for miopen
  110. if (arg.param.mode == Mode::CONVOLUTION) {
  111. continue;
  112. }
  113. checker.set_dtype(0, dtype::Float32())
  114. .set_dtype(1, dtype::Float32())
  115. .set_rng(0, &default_rng)
  116. .set_rng(1, &default_rng)
  117. .set_epsilon(1e-3)
  118. .set_param(arg.param)
  119. .execs({arg.src, arg.filter, {}});
  120. }
  121. }
  122. TEST_F(ROCM, CONVOLUTION_FORWARD_0) {
  123. megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false);
  124. using namespace convolution;
  125. std::vector<TestArg> args = get_args_0();
  126. Checker<ConvolutionForward> checker(handle_rocm());
  127. NormalRNG default_rng;
  128. for (auto&& arg : args) {
  129. using Mode = param::Convolution::Mode;
  130. //! non xcorr not supported for miopen
  131. if (arg.param.mode == Mode::CONVOLUTION) {
  132. continue;
  133. }
  134. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]);
  135. UniformFloatRNG rng(scale, 2 * scale);
  136. checker.set_dtype(0, dtype::Float32())
  137. .set_dtype(1, dtype::Float32())
  138. .set_dtype(2, dtype::Float32())
  139. .set_rng(0, &default_rng)
  140. .set_rng(1, &default_rng)
  141. .set_epsilon(1e-3)
  142. .set_param(arg.param)
  143. .execs({arg.src, arg.filter, {}});
  144. #if !MEGDNN_DISABLE_FLOAT16
  145. checker.set_dtype(0, dtype::Float16())
  146. .set_dtype(1, dtype::Float16())
  147. .set_dtype(2, dtype::Float16())
  148. .set_rng(0, &rng)
  149. .set_rng(1, &rng)
  150. .set_epsilon(1e-1)
  151. .set_param(arg.param)
  152. .execs({arg.src, arg.filter, {}});
  153. #endif
  154. }
  155. }
  156. TEST_F(ROCM, CONVOLUTION_FORWARD_1) {
  157. megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false);
  158. using namespace convolution;
  159. std::vector<TestArg> args = get_args_1();
  160. Checker<ConvolutionForward> checker(handle_rocm());
  161. NormalRNG default_rng;
  162. for (auto&& arg : args) {
  163. using Mode = param::Convolution::Mode;
  164. //! non xcorr not supported for miopen
  165. if (arg.param.mode == Mode::CONVOLUTION) {
  166. continue;
  167. }
  168. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]);
  169. UniformFloatRNG rng(scale, 2 * scale);
  170. checker.set_dtype(0, dtype::Float32())
  171. .set_dtype(1, dtype::Float32())
  172. .set_dtype(2, dtype::Float32())
  173. .set_rng(0, &default_rng)
  174. .set_rng(1, &default_rng)
  175. .set_epsilon(1e-3)
  176. .set_param(arg.param)
  177. .execs({arg.src, arg.filter, {}});
  178. #if !MEGDNN_DISABLE_FLOAT16
  179. checker.set_dtype(0, dtype::Float16())
  180. .set_dtype(1, dtype::Float16())
  181. .set_dtype(2, dtype::Float16())
  182. .set_rng(0, &rng)
  183. .set_rng(1, &rng)
  184. .set_epsilon(1e-1)
  185. .set_param(arg.param)
  186. .execs({arg.src, arg.filter, {}});
  187. #endif
  188. }
  189. }
  190. TEST_F(ROCM, CONVOLUTION_FORWARD_2) {
  191. megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false);
  192. using namespace convolution;
  193. std::vector<TestArg> args = get_args_2();
  194. Checker<ConvolutionForward> checker(handle_rocm());
  195. NormalRNG default_rng;
  196. for (auto&& arg : args) {
  197. using Mode = param::Convolution::Mode;
  198. //! non xcorr not supported for miopen
  199. if (arg.param.mode == Mode::CONVOLUTION) {
  200. continue;
  201. }
  202. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]);
  203. UniformFloatRNG rng(scale, 2 * scale);
  204. checker.set_dtype(0, dtype::Float32())
  205. .set_dtype(1, dtype::Float32())
  206. .set_dtype(2, dtype::Float32())
  207. .set_rng(0, &default_rng)
  208. .set_rng(1, &default_rng)
  209. .set_epsilon(1e-3)
  210. .set_param(arg.param)
  211. .execs({arg.src, arg.filter, {}});
  212. #if !MEGDNN_DISABLE_FLOAT16
  213. checker.set_dtype(0, dtype::Float16())
  214. .set_dtype(1, dtype::Float16())
  215. .set_dtype(2, dtype::Float16())
  216. .set_rng(0, &rng)
  217. .set_rng(1, &rng)
  218. .set_epsilon(1e-1)
  219. .set_param(arg.param)
  220. .execs({arg.src, arg.filter, {}});
  221. #endif
  222. }
  223. }
  224. TEST_F(ROCM, CONVOLUTION_1X1_FORWARD) {
  225. megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false);
  226. using namespace convolution;
  227. std::vector<TestArg> args = get_1x1_args();
  228. Checker<ConvolutionForward> checker(handle_rocm());
  229. NormalRNG default_rng;
  230. for (auto&& arg : args) {
  231. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]);
  232. UniformFloatRNG rng(scale, 2 * scale);
  233. checker.set_dtype(0, dtype::Float32())
  234. .set_dtype(1, dtype::Float32())
  235. .set_rng(0, &default_rng)
  236. .set_rng(1, &default_rng)
  237. .set_epsilon(1e-3)
  238. .set_param(arg.param)
  239. .execs({arg.src, arg.filter, {}});
  240. }
  241. }
  242. #if MEGDNN_WITH_BENCHMARK
  243. TEST_F(ROCM, CONVOLUTION_1X1_FORWARD_ALL_ALGOS) {
  244. megdnn::rocm::enable_miopen_algo_search(handle_rocm(), true);
  245. using namespace convolution;
  246. OprProxy<ConvolutionForward> proxy{true};
  247. proxy.warmup_times = 1;
  248. proxy.exec_times = 10;
  249. Benchmarker<ConvolutionForward> checker(handle_rocm());
  250. checker.set_times(1);
  251. auto get_computation = [&](TestArg arg) -> float {
  252. megdnn_assert(arg.param.format == param::Convolution::Format::NCHW);
  253. size_t N = arg.src[0], IC = arg.src[1], IH = arg.src[2], IW = arg.src[3],
  254. OC = arg.filter[0], FH = arg.filter[2], FW = arg.filter[3],
  255. SH = arg.param.stride_h, SW = arg.param.stride_w, PH = arg.param.pad_h,
  256. PW = arg.param.pad_w;
  257. size_t OH = infer_conv_shape(IH, FH, SH, PH);
  258. size_t OW = infer_conv_shape(IW, FW, SW, PW);
  259. float flops = 2.0 * N * OC * OH * OW * IC * FH * FW;
  260. return flops;
  261. };
  262. std::vector<TestArg> args = get_1x1_args();
  263. NormalRNG default_rng;
  264. for (auto&& arg : args) {
  265. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]);
  266. UniformFloatRNG rng(scale, 2 * scale);
  267. checker.set_proxy(proxy)
  268. .set_dtype(0, dtype::Float32())
  269. .set_dtype(1, dtype::Float32())
  270. .set_rng(0, &default_rng)
  271. .set_rng(1, &default_rng)
  272. .set_param(arg.param);
  273. float time_in_ms = checker.execs({arg.src, arg.filter, {}});
  274. float flops = get_computation(arg);
  275. printf("inp=%s,flt=%s,flops=%.2fGflo,time = %.2f ms, perf = %.2f "
  276. "GFLOPS\n",
  277. arg.src.to_string().c_str(), arg.filter.to_string().c_str(), flops / 1e9,
  278. time_in_ms, flops / (1e6 * time_in_ms));
  279. }
  280. }
  281. #endif
  282. TEST_F(ROCM, CONVOLUTION_BACKWARD_DATA_0) {
  283. megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false);
  284. using namespace convolution;
  285. std::vector<TestArg> args = get_args_0();
  286. Checker<ConvolutionBackwardData> checker(handle_rocm());
  287. NormalRNG default_rng;
  288. for (auto&& arg : args) {
  289. using Mode = param::Convolution::Mode;
  290. //! non xcorr not supported for miopen
  291. if (arg.param.mode == Mode::CONVOLUTION) {
  292. continue;
  293. }
  294. float scale = 1.0f / sqrt(arg.filter[0] * arg.filter[2] * arg.filter[3]);
  295. UniformFloatRNG rng(scale, 2 * scale);
  296. auto src = TensorLayout(arg.src, dtype::Float32());
  297. auto filter = TensorLayout(arg.filter, dtype::Float32());
  298. TensorLayout dst;
  299. {
  300. auto opr = handle_rocm()->create_operator<Convolution>();
  301. opr->param() = arg.param;
  302. opr->deduce_layout(src, filter, dst);
  303. }
  304. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  305. checker.set_rng(0, &default_rng)
  306. .set_rng(1, &default_rng)
  307. .set_epsilon(1e-3)
  308. .set_param(arg.param)
  309. .exec(TensorLayoutArray{filter, dst, src});
  310. #if !MEGDNN_DISABLE_FLOAT16
  311. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  312. checker.set_rng(0, &rng)
  313. .set_rng(1, &rng)
  314. .set_epsilon(1e-1)
  315. .set_param(arg.param)
  316. .exec(TensorLayoutArray{filter, dst, src});
  317. #endif
  318. }
  319. }
  320. TEST_F(ROCM, CONVOLUTION_BACKWARD_DATA_1) {
  321. megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false);
  322. using namespace convolution;
  323. std::vector<TestArg> args = get_args_1();
  324. Checker<ConvolutionBackwardData> checker(handle_rocm());
  325. NormalRNG default_rng;
  326. for (auto&& arg : args) {
  327. using Mode = param::Convolution::Mode;
  328. //! non xcorr not supported for miopen
  329. if (arg.param.mode == Mode::CONVOLUTION) {
  330. continue;
  331. }
  332. float scale = 1.0f / sqrt(arg.filter[0] * arg.filter[2] * arg.filter[3]);
  333. UniformFloatRNG rng(scale, 2 * scale);
  334. auto src = TensorLayout(arg.src, dtype::Float32());
  335. auto filter = TensorLayout(arg.filter, dtype::Float32());
  336. TensorLayout dst;
  337. {
  338. auto opr = handle_rocm()->create_operator<Convolution>();
  339. opr->param() = arg.param;
  340. opr->deduce_layout(src, filter, dst);
  341. }
  342. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  343. checker.set_rng(0, &default_rng)
  344. .set_rng(1, &default_rng)
  345. .set_epsilon(1e-3)
  346. .set_param(arg.param)
  347. .exec(TensorLayoutArray{filter, dst, src});
  348. #if !MEGDNN_DISABLE_FLOAT16
  349. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  350. checker.set_rng(0, &rng)
  351. .set_rng(1, &rng)
  352. .set_epsilon(1e-1)
  353. .set_param(arg.param)
  354. .exec(TensorLayoutArray{filter, dst, src});
  355. #endif
  356. }
  357. }
  358. TEST_F(ROCM, CONVOLUTION_BACKWARD_DATA_2) {
  359. megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false);
  360. using namespace convolution;
  361. std::vector<TestArg> args = get_args_2();
  362. Checker<ConvolutionBackwardData> checker(handle_rocm());
  363. NormalRNG default_rng;
  364. for (auto&& arg : args) {
  365. using Mode = param::Convolution::Mode;
  366. //! non xcorr not supported for miopen
  367. if (arg.param.mode == Mode::CONVOLUTION) {
  368. continue;
  369. }
  370. float scale = 1.0f / sqrt(arg.filter[0] * arg.filter[2] * arg.filter[3]);
  371. UniformFloatRNG rng(scale, 2 * scale);
  372. auto src = TensorLayout(arg.src, dtype::Float32());
  373. auto filter = TensorLayout(arg.filter, dtype::Float32());
  374. TensorLayout dst;
  375. {
  376. auto opr = handle_rocm()->create_operator<Convolution>();
  377. opr->param() = arg.param;
  378. opr->deduce_layout(src, filter, dst);
  379. }
  380. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  381. checker.set_rng(0, &default_rng)
  382. .set_rng(1, &default_rng)
  383. .set_epsilon(1e-3)
  384. .set_param(arg.param)
  385. .exec(TensorLayoutArray{filter, dst, src});
  386. #if !MEGDNN_DISABLE_FLOAT16
  387. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  388. checker.set_rng(0, &rng)
  389. .set_rng(1, &rng)
  390. .set_epsilon(1e-1)
  391. .set_param(arg.param)
  392. .exec(TensorLayoutArray{filter, dst, src});
  393. #endif
  394. }
  395. }
  396. TEST_F(ROCM, DISABLED_CONVOLUTION_BACKWARD_FILTER) {
  397. megdnn::rocm::enable_miopen_algo_search(handle_rocm(), false);
  398. using namespace convolution;
  399. std::vector<TestArg> args = get_args();
  400. Checker<ConvolutionBackwardFilter> checker(handle_rocm());
  401. NormalRNG default_rng;
  402. bool f16_checked = false;
  403. MEGDNN_MARK_USED_VAR(f16_checked);
  404. for (auto&& arg : args) {
  405. using Mode = param::Convolution::Mode;
  406. //! non xcorr not supported for miopen
  407. if (arg.param.mode == Mode::CONVOLUTION) {
  408. continue;
  409. }
  410. auto src = TensorLayout(arg.src, dtype::Float32());
  411. auto filter = TensorLayout(arg.filter, dtype::Float32());
  412. TensorLayout dst;
  413. {
  414. auto opr = handle_rocm()->create_operator<Convolution>();
  415. opr->param() = arg.param;
  416. opr->deduce_layout(src, filter, dst);
  417. }
  418. float scale = 1.0f / sqrt(dst[2] * dst[3]);
  419. UniformFloatRNG rng(scale, 2 * scale);
  420. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  421. checker.set_rng(0, &default_rng)
  422. .set_rng(1, &default_rng)
  423. .set_epsilon(1e-3)
  424. .set_param(arg.param)
  425. .exec(TensorLayoutArray{src, dst, filter});
  426. #if !MEGDNN_DISABLE_FLOAT16
  427. //! FIXME: MIOpen convolution backward weights for FP16 with bugs
  428. #if 0
  429. // reduce on large f16 array may introduce significant error
  430. if (dst.total_nr_elems() >= 1000 && f16_checked)
  431. continue;
  432. f16_checked = true;
  433. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  434. checker.set_rng(0, &rng)
  435. .set_rng(1, &rng)
  436. .set_epsilon(1e-1)
  437. .set_param(arg.param)
  438. .exec(TensorLayoutArray{src, dst, filter});
  439. #endif
  440. #endif
  441. }
  442. }
  443. #if MEGDNN_WITH_BENCHMARK
  444. TEST_F(ROCM, CONV_FWD_BENCHMARK) {
  445. megdnn::rocm::enable_miopen_algo_search(handle_rocm(), true);
  446. auto benchmarker =
  447. ROCMBenchmarker<ConvolutionForward>(handle_rocm(), handle_naive(false));
  448. auto run = [&](size_t N, size_t OC, size_t IC, size_t IH, size_t IW, size_t SH = 1,
  449. size_t SW = 1, size_t FH = 1, size_t FW = 1, size_t PH = 0,
  450. size_t PW = 0, DType dtype = dtype::Float32()) {
  451. benchmarker.set_dtype(0, dtype).set_dtype(1, dtype).set_dtype(2, dtype);
  452. benchmarker.set_display(true);
  453. ConvolutionForward::Param param;
  454. param.stride_h = SH;
  455. param.stride_w = SW;
  456. param.pad_h = PH;
  457. param.pad_w = PW;
  458. benchmarker.set_param(param);
  459. size_t OH = (IH - FH + 2 * PH) / SH + 1;
  460. size_t OW = (IW - FW + 2 * PW) / SW + 1;
  461. // warm up find best algo
  462. benchmarker.execs({{N, IC, IH, IW}, {OC, IC, FH, FW}, {N, OC, OH, OW}});
  463. // do actual benchmark
  464. auto time_ms =
  465. benchmarker.execs({{N, IC, IH, IW}, {OC, IC, FH, FW}, {N, OC, OH, OW}});
  466. auto flo = (double)N * OC * IC * OH * OW * FH * FW * 2;
  467. auto flops = flo / (time_ms * 1e9);
  468. printf("%.3fG FLO, flops %.3fTFLOPS\n", flo / 1e9, flops);
  469. };
  470. run(32, 24, 16, 224, 224, 2, 2, 7, 7, 3, 3);
  471. run(32, 128, 32, 112, 112, 1, 1, 3, 3, 1, 1);
  472. run(32, 128, 128, 56, 56, 1, 1, 3, 3, 1, 1);
  473. run(32, 128, 256, 28, 28, 1, 1, 3, 3, 1, 1);
  474. run(32, 256, 256, 28, 28, 1, 1, 1, 1, 0, 0);
  475. run(32, 256, 256, 28, 28, 2, 2, 3, 3, 1, 1);
  476. run(32, 256, 256, 14, 14, 1, 1, 3, 3, 1, 1);
  477. run(32, 512, 512, 7, 7, 1, 1, 3, 3, 1, 1);
  478. #if !MEGDNN_DISABLE_FLOAT16
  479. run(32, 256, 256, 56, 56, 1, 1, 1, 1, 0, 0, dtype::Float16());
  480. #endif
  481. }
  482. #endif
  483. } // namespace test
  484. } // namespace megdnn
  485. // vim: syntax=cpp.doxygen