You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convolution3d.cpp 17 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. /**
  2. * \file dnn/test/cuda/convolution3d.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/common/convolution3d.h"
  12. #include "megdnn/opr_param_defs.h"
  13. #include "megdnn/oprs.h"
  14. #include "src/cuda/utils.h"
  15. #include "test/common/benchmarker.h"
  16. #include "test/common/checker.h"
  17. #include "test/common/rng.h"
  18. #include "test/common/tensor.h"
  19. #include "test/common/workspace_wrapper.h"
  20. #include "test/cuda/fixture.h"
  21. namespace megdnn {
  22. namespace test {
  23. #if 0
  24. TEST_F(CUDA, CONVOLUTION3D_8X8X32) {
  25. if (!cuda::is_compute_capability_required(6, 1)) {
  26. printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device"
  27. "doesn't support\n");
  28. return;
  29. }
  30. using namespace convolution3d;
  31. std::vector<TestArg> args;
  32. {
  33. auto v = get_args();
  34. for (auto&& a : v) {
  35. args.push_back(std::move(a));
  36. }
  37. }
  38. /*
  39. {
  40. auto v = get_dilated_args();
  41. for (auto &&a: v) {
  42. args.push_back(std::move(a));
  43. }
  44. }
  45. {
  46. auto v = get_chanwise_args();
  47. for (auto &&a: v) {
  48. args.push_back(std::move(a));
  49. }
  50. }
  51. */
  52. Checker<Convolution3DForward> checker(handle_cuda());
  53. UniformIntRNG rng(-4, 4);
  54. UniformIntRNG rng_same(1, 1);
  55. for (auto arg : args) {
  56. arg.param.format = param::Convolution3D::Format::NDHWC;
  57. arg.param.data_type = param::Convolution3D::DataType::INT8x8x32;
  58. arg.src = cvt_src_or_dst_ncdhw2ndhwc(arg.src);
  59. arg.filter = cvt_filter_ncdhw2ndhwc(arg.filter);
  60. checker.set_dtype(0, dtype::Int8())
  61. .set_dtype(1, dtype::Int8())
  62. .set_dtype(2, dtype::Int32())
  63. .set_param(arg.param)
  64. .set_rng(0, &rng)
  65. .set_rng(1, &rng)
  66. .execs({arg.src, arg.filter, {}});
  67. }
  68. }
  69. #endif
  70. TEST_F(CUDA, CONVOLUTION3D_FORWARD) {
  71. using namespace convolution3d;
  72. std::vector<TestArg> args = get_args();
  73. /*
  74. {
  75. auto v = get_chanwise_args();
  76. for (auto&& a : v) {
  77. args.push_back(std::move(a));
  78. }
  79. }
  80. {
  81. auto v = get_dilated_args();
  82. for (auto&& a : v) {
  83. args.push_back(std::move(a));
  84. }
  85. }
  86. */
  87. bool fp16_checked = false;
  88. Checker<Convolution3DForward> checker(handle_cuda());
  89. NormalRNG default_rng;
  90. for (auto&& arg : args) {
  91. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
  92. arg.filter[4]);
  93. UniformFloatRNG rng(scale, 2 * scale);
  94. checker.set_dtype(0, dtype::Float32())
  95. .set_dtype(1, dtype::Float32())
  96. .set_rng(0, &default_rng)
  97. .set_rng(1, &default_rng)
  98. .set_epsilon(1e-3)
  99. .set_param(arg.param)
  100. .execs({arg.src, arg.filter, {}});
  101. if (!fp16_checked || arg.src.total_nr_elems() >= 1000)
  102. continue;
  103. checker.set_dtype(0, dtype::Float16())
  104. .set_dtype(1, dtype::Float16())
  105. .set_rng(0, &rng)
  106. .set_rng(1, &rng)
  107. .set_epsilon(1e-1)
  108. .set_param(arg.param)
  109. .execs({arg.src, arg.filter, {}});
  110. }
  111. }
  112. TEST_F(CUDA, CONVOLUTION3D_1X1X1_FORWARD) {
  113. using namespace convolution3d;
  114. std::vector<TestArg> args = get_1x1x1_args();
  115. Checker<Convolution3DForward> checker(handle_cuda());
  116. NormalRNG default_rng;
  117. for (auto&& arg : args) {
  118. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
  119. arg.filter[4]);
  120. UniformFloatRNG rng(scale, 2 * scale);
  121. checker.set_dtype(0, dtype::Float32())
  122. .set_dtype(1, dtype::Float32())
  123. .set_rng(0, &default_rng)
  124. .set_rng(1, &default_rng)
  125. .set_epsilon(1e-3)
  126. .set_param(arg.param)
  127. .execs({arg.src, arg.filter, {}});
  128. }
  129. }
  130. TEST_F(CUDA, CONVOLUTION3D_MATMUL_FORWARD) {
  131. using namespace convolution3d;
  132. std::vector<TestArg> args = get_args();
  133. Checker<Convolution3DForward> checker(handle_cuda());
  134. NormalRNG default_rng;
  135. for (auto&& arg : args) {
  136. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
  137. arg.filter[4]);
  138. UniformFloatRNG rng(scale, 2 * scale);
  139. checker.set_dtype(0, dtype::Float32())
  140. .set_dtype(1, dtype::Float32())
  141. .set_rng(0, &default_rng)
  142. .set_rng(1, &default_rng)
  143. .set_param(arg.param)
  144. .execs({arg.src, arg.filter, {}});
  145. }
  146. }
  147. TEST_F(CUDA, CONVOLUTION3D_FORWARD_NONCONTIG_CUDNN) {
  148. using namespace convolution3d;
  149. Checker<Convolution3DForward> checker(handle_cuda());
  150. checker.set_before_exec_callback(AlgoChecker<Convolution3DForward>("CUDNN"));
  151. param::Convolution3D param;
  152. param.pad_d = param.pad_h = param.pad_w = 1;
  153. checker.set_dtype(0, dtype::Float32())
  154. .set_dtype(1, dtype::Float32())
  155. .set_epsilon(1e-3);
  156. //! noncontiguous case
  157. {
  158. checker.set_param(param).execl(TensorLayoutArray{
  159. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()},
  160. {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()},
  161. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()}});
  162. }
  163. }
  164. TEST_F(CUDA, CONVOLUTION3D_FORWARD_NONCONTIG_INPLACE_MATMUL) {
  165. using namespace convolution3d;
  166. Checker<Convolution3DForward> checker(handle_cuda());
  167. checker.set_before_exec_callback(
  168. AlgoChecker<Convolution3DForward>("INPLACE_MATMUL"));
  169. param::Convolution3D param;
  170. param.pad_d = param.pad_h = param.pad_w = 1;
  171. checker.set_dtype(0, dtype::Float32())
  172. .set_dtype(1, dtype::Float32())
  173. .set_epsilon(1e-3);
  174. //! noncontiguous case
  175. {
  176. checker.set_param(param).execl(TensorLayoutArray{
  177. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()},
  178. {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()},
  179. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()}});
  180. }
  181. }
  182. TEST_F(CUDA, CONVOLUTION3D_FORWARD_NONCONTIG_1x1x1) {
  183. using namespace convolution3d;
  184. Checker<Convolution3DForward> checker(handle_cuda());
  185. checker.set_before_exec_callback(AlgoChecker<Convolution3DForward>("1x1x1"));
  186. param::Convolution3D param;
  187. checker.set_dtype(0, dtype::Float32())
  188. .set_dtype(1, dtype::Float32())
  189. .set_epsilon(1e-3);
  190. //! noncontiguous case
  191. {
  192. checker.set_param(param).execl(TensorLayoutArray{
  193. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()},
  194. {{5, 5, 1, 1, 1}, {5, 1, 1, 1, 1}, dtype::Float32()},
  195. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()}});
  196. }
  197. }
  198. #if MEGDNN_WITH_BENCHMARK
  199. TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_MATMUL_BACKWARD_FILTER) {
  200. using namespace convolution3d;
  201. std::vector<TestArg> args = get_speed_test_args();
  202. Benchmarker<Convolution3DBackwardFilter> marker(handle_cuda());
  203. NormalRNG default_rng;
  204. for (auto&& arg : args) {
  205. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
  206. arg.filter[4]);
  207. auto src = TensorLayout(arg.src, dtype::Float32());
  208. auto filter = TensorLayout(arg.filter, dtype::Float32());
  209. TensorLayout dst;
  210. auto opr = handle_cuda()->create_operator<Convolution3D>();
  211. opr->param() = arg.param;
  212. opr->deduce_layout(src, filter, dst);
  213. UniformFloatRNG rng(scale, 2 * scale);
  214. marker.set_dtype(0, dtype::Float32())
  215. .set_dtype(1, dtype::Float32())
  216. .set_rng(0, &default_rng)
  217. .set_rng(1, &default_rng)
  218. .set_param(arg.param)
  219. .execs({src, dst, filter});
  220. }
  221. }
  222. TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_MATMUL_FORWARD) {
  223. using namespace convolution3d;
  224. std::vector<TestArg> args = get_speed_test_args();
  225. Benchmarker<Convolution3DForward> marker(handle_cuda());
  226. NormalRNG default_rng;
  227. for (auto&& arg : args) {
  228. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
  229. arg.filter[4]);
  230. UniformFloatRNG rng(scale, 2 * scale);
  231. marker.set_dtype(0, dtype::Float32())
  232. .set_dtype(1, dtype::Float32())
  233. .set_rng(0, &default_rng)
  234. .set_rng(1, &default_rng)
  235. . // set_param(arg.param).
  236. execs({arg.src, arg.filter, {}});
  237. }
  238. }
  239. TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_1X1X1_FORWARD) {
  240. using namespace convolution3d;
  241. std::vector<TestArg> args = get_1x1x1_args();
  242. Benchmarker<Convolution3DForward> marker(handle_cuda());
  243. NormalRNG default_rng;
  244. for (auto&& arg : args) {
  245. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
  246. arg.filter[4]);
  247. UniformFloatRNG rng(scale, 2 * scale);
  248. marker.set_dtype(0, dtype::Float32())
  249. .set_dtype(1, dtype::Float32())
  250. .set_rng(0, &default_rng)
  251. .set_rng(1, &default_rng)
  252. .
  253. // set_param(arg.param).
  254. execs({arg.src, arg.filter, {}});
  255. }
  256. }
  257. TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_FORWARD) {
  258. using namespace convolution3d;
  259. std::vector<TestArg> args = get_args();
  260. {
  261. auto v = get_chanwise_args();
  262. for (auto&& a : v)
  263. args.push_back(std::move(a));
  264. }
  265. {
  266. auto v = get_1x1x1_args();
  267. for (auto&& a : v)
  268. args.push_back(std::move(a));
  269. }
  270. {
  271. auto v = get_dilated_args();
  272. for (auto&& a : v)
  273. args.push_back(std::move(a));
  274. }
  275. Benchmarker<Convolution3DForward> marker(handle_cuda());
  276. NormalRNG default_rng;
  277. for (auto&& arg : args) {
  278. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
  279. arg.filter[4]);
  280. UniformFloatRNG rng(scale, 2 * scale);
  281. marker.set_dtype(0, dtype::Float32())
  282. .set_dtype(1, dtype::Float32())
  283. .set_rng(0, &default_rng)
  284. .set_rng(1, &default_rng)
  285. .set_param(arg.param)
  286. .execs({arg.src, arg.filter, {}});
  287. marker.set_dtype(0, dtype::Float16())
  288. .set_dtype(1, dtype::Float16())
  289. .set_rng(0, &rng)
  290. .set_rng(1, &rng)
  291. .set_param(arg.param)
  292. .execs({arg.src, arg.filter, {}});
  293. }
  294. }
  295. #endif
  296. TEST_F(CUDA, CONVOLUTION3D_BACKWARD_DATA) {
  297. using namespace convolution3d;
  298. std::vector<TestArg> args = get_args();
  299. Checker<Convolution3DBackwardData> checker(handle_cuda());
  300. NormalRNG default_rng;
  301. for (auto&& arg : args) {
  302. float scale = 1.0f / sqrt(arg.filter[0] * arg.filter[2] * arg.filter[3] *
  303. arg.filter[4]);
  304. UniformFloatRNG rng(scale, 2 * scale);
  305. auto src = TensorLayout(arg.src, dtype::Float32());
  306. auto filter = TensorLayout(arg.filter, dtype::Float32());
  307. TensorLayout dst;
  308. {
  309. auto opr = handle_cuda()->create_operator<Convolution3D>();
  310. opr->param() = arg.param;
  311. opr->deduce_layout(src, filter, dst);
  312. }
  313. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  314. checker.set_rng(0, &default_rng)
  315. .set_rng(1, &default_rng)
  316. .set_epsilon(1e-3)
  317. .set_param(arg.param)
  318. .exec(TensorLayoutArray{filter, dst, src});
  319. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  320. checker.set_rng(0, &rng)
  321. .set_rng(1, &rng)
  322. .set_epsilon(1e-1)
  323. .set_param(arg.param)
  324. .exec(TensorLayoutArray{filter, dst, src});
  325. }
  326. }
  327. TEST_F(CUDA, CONVOLUTION3D_BACKWARD_FILTER) {
  328. using namespace convolution3d;
  329. std::vector<TestArg> args = get_args();
  330. Checker<Convolution3DBackwardFilter> checker(handle_cuda());
  331. NormalRNG default_rng;
  332. for (auto&& arg : args) {
  333. auto src = TensorLayout(arg.src, dtype::Float32());
  334. auto filter = TensorLayout(arg.filter, dtype::Float32());
  335. TensorLayout dst;
  336. {
  337. auto opr = handle_cuda()->create_operator<Convolution3D>();
  338. opr->param() = arg.param;
  339. opr->deduce_layout(src, filter, dst);
  340. }
  341. float scale = 1.0f / sqrt(dst[0] * dst[2] * dst[3] * dst[4]);
  342. UniformFloatRNG rng(scale, 2 * scale);
  343. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  344. checker.set_rng(0, &default_rng)
  345. .set_rng(1, &default_rng)
  346. .set_epsilon(1e-3)
  347. .set_param(arg.param)
  348. .exec(TensorLayoutArray{src, dst, filter});
  349. if (dst.total_nr_elems() >= 1000)
  350. continue;
  351. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  352. checker.set_rng(0, &rng)
  353. .set_rng(1, &rng)
  354. .set_epsilon(1e-1)
  355. .set_param(arg.param)
  356. .exec(TensorLayoutArray{src, dst, filter});
  357. }
  358. }
  359. TEST_F(CUDA, CONVOLUTION3D_MATMUL_BACKWARD_FILTER) {
  360. using namespace convolution3d;
  361. std::vector<TestArg> args = get_args();
  362. Checker<Convolution3DBackwardFilter> checker(handle_cuda());
  363. NormalRNG default_rng;
  364. for (auto&& arg : args) {
  365. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
  366. arg.filter[4]);
  367. UniformFloatRNG rng(scale, 2 * scale);
  368. auto src = TensorLayout(arg.src, dtype::Float32());
  369. auto filter = TensorLayout(arg.filter, dtype::Float32());
  370. TensorLayout dst;
  371. auto opr = handle_cuda()->create_operator<Convolution3D>();
  372. opr->param() = arg.param;
  373. opr->deduce_layout(src, filter, dst);
  374. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  375. checker.set_rng(0, &default_rng)
  376. .set_rng(1, &default_rng)
  377. .set_param(arg.param)
  378. .exec(TensorLayoutArray{src, dst, filter});
  379. }
  380. }
  381. TEST_F(CUDA, CONVOLUTION3D_BACKWARD_DATA_NONCONTIG_CUDNN) {
  382. using namespace convolution3d;
  383. Checker<Convolution3DBackwardData> checker(handle_cuda());
  384. checker.set_before_exec_callback(AlgoChecker<Convolution3DBackwardData>("CUDNN"));
  385. Convolution3DBackwardData::Param param;
  386. param.pad_d = param.pad_h = param.pad_w = 1;
  387. NormalRNG default_rng;
  388. checker.set_dtype(0, dtype::Float32())
  389. .set_dtype(1, dtype::Float32())
  390. .set_rng(0, &default_rng)
  391. .set_rng(1, &default_rng)
  392. .set_epsilon(1e-3)
  393. .set_param(param);
  394. //! noncontiguous case
  395. {
  396. checker.execl(TensorLayoutArray{
  397. {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()},
  398. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()},
  399. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()}});
  400. }
  401. }
  402. TEST_F(CUDA, CONVOLUTION3D_BACKWARD_FILTER_NONCONTIG_CUDNN) {
  403. using namespace convolution3d;
  404. Checker<Convolution3DBackwardFilter> checker(handle_cuda());
  405. checker.set_before_exec_callback(AlgoChecker<Convolution3DBackwardFilter>("CUDNN"));
  406. Convolution3DBackwardFilter::Param param;
  407. param.pad_d = param.pad_h = param.pad_w = 1;
  408. NormalRNG default_rng;
  409. checker.set_dtype(0, dtype::Float32())
  410. .set_dtype(1, dtype::Float32())
  411. .set_rng(0, &default_rng)
  412. .set_rng(1, &default_rng)
  413. .set_epsilon(1e-3)
  414. .set_param(param);
  415. //! noncontiguous case
  416. {
  417. checker.execl(TensorLayoutArray{
  418. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()},
  419. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()},
  420. {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()}});
  421. }
  422. }
  423. /*
  424. TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) {
  425. auto eps_getter = [](bool f16, int stage, const char *name) -> float {
  426. if (f16) {
  427. return stage == 2 ? 0.9 : 0.7;
  428. }
  429. if (strstr(name, "WINOGRAD_NONFUSED"))
  430. return 0.3;
  431. return 1e-3;
  432. };
  433. convolution3d::test_conv_config_combinations(handle_cuda(), false, true,
  434. true, eps_getter);
  435. }
  436. */
  437. } // namespace test
  438. } // namespace megdnn
  439. // vim: syntax=cpp.doxygen