You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convolution3d.cpp 17 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489
  1. /**
  2. * \file dnn/test/cuda/convolution3d.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/common/convolution3d.h"
  12. #include "megdnn/opr_param_defs.h"
  13. #include "megdnn/oprs.h"
  14. #include "src/cuda/utils.h"
  15. #include "test/common/benchmarker.h"
  16. #include "test/common/checker.h"
  17. #include "test/common/rng.h"
  18. #include "test/common/tensor.h"
  19. #include "test/common/workspace_wrapper.h"
  20. #include "test/cuda/fixture.h"
  21. namespace megdnn {
  22. namespace test {
  23. #if 0
  24. TEST_F(CUDA, CONVOLUTION3D_8X8X32) {
  25. if (!cuda::is_compute_capability_required(6, 1)) {
  26. printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device"
  27. "doesn't support\n");
  28. return;
  29. }
  30. using namespace convolution3d;
  31. std::vector<TestArg> args;
  32. {
  33. auto v = get_args();
  34. for (auto&& a : v) {
  35. args.push_back(std::move(a));
  36. }
  37. }
  38. /*
  39. {
  40. auto v = get_dilated_args();
  41. for (auto &&a: v) {
  42. args.push_back(std::move(a));
  43. }
  44. }
  45. {
  46. auto v = get_chanwise_args();
  47. for (auto &&a: v) {
  48. args.push_back(std::move(a));
  49. }
  50. }
  51. */
  52. Checker<Convolution3DForward> checker(handle_cuda());
  53. UniformIntRNG rng(-4, 4);
  54. UniformIntRNG rng_same(1, 1);
  55. for (auto arg : args) {
  56. arg.param.format = param::Convolution3D::Format::NDHWC;
  57. arg.param.data_type = param::Convolution3D::DataType::INT8x8x32;
  58. arg.src = cvt_src_or_dst_ncdhw2ndhwc(arg.src);
  59. arg.filter = cvt_filter_ncdhw2ndhwc(arg.filter);
  60. checker.set_dtype(0, dtype::Int8())
  61. .set_dtype(1, dtype::Int8())
  62. .set_dtype(2, dtype::Int32())
  63. .set_param(arg.param)
  64. .set_rng(0, &rng)
  65. .set_rng(1, &rng)
  66. .execs({arg.src, arg.filter, {}});
  67. }
  68. }
  69. #endif
  70. TEST_F(CUDA, CONVOLUTION3D_FORWARD) {
  71. using namespace convolution3d;
  72. std::vector<TestArg> args = get_args();
  73. /*
  74. {
  75. auto v = get_chanwise_args();
  76. for (auto&& a : v) {
  77. args.push_back(std::move(a));
  78. }
  79. }
  80. {
  81. auto v = get_dilated_args();
  82. for (auto&& a : v) {
  83. args.push_back(std::move(a));
  84. }
  85. }
  86. */
  87. bool fp16_checked = false;
  88. Checker<Convolution3DForward> checker(handle_cuda());
  89. NormalRNG default_rng;
  90. for (auto&& arg : args) {
  91. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] *
  92. arg.filter[3] * arg.filter[4]);
  93. UniformFloatRNG rng(scale, 2 * scale);
  94. checker.set_dtype(0, dtype::Float32())
  95. .set_dtype(1, dtype::Float32())
  96. .set_rng(0, &default_rng)
  97. .set_rng(1, &default_rng)
  98. .set_epsilon(1e-3)
  99. .set_param(arg.param)
  100. .execs({arg.src, arg.filter, {}});
  101. if (!fp16_checked || arg.src.total_nr_elems() >= 1000)
  102. continue;
  103. checker.set_dtype(0, dtype::Float16())
  104. .set_dtype(1, dtype::Float16())
  105. .set_rng(0, &rng)
  106. .set_rng(1, &rng)
  107. .set_epsilon(1e-1)
  108. .set_param(arg.param)
  109. .execs({arg.src, arg.filter, {}});
  110. }
  111. }
  112. TEST_F(CUDA, CONVOLUTION3D_1X1X1_FORWARD) {
  113. using namespace convolution3d;
  114. std::vector<TestArg> args = get_1x1x1_args();
  115. Checker<Convolution3DForward> checker(handle_cuda());
  116. NormalRNG default_rng;
  117. for (auto&& arg : args) {
  118. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] *
  119. arg.filter[3] * arg.filter[4]);
  120. UniformFloatRNG rng(scale, 2 * scale);
  121. checker.set_dtype(0, dtype::Float32())
  122. .set_dtype(1, dtype::Float32())
  123. .set_rng(0, &default_rng)
  124. .set_rng(1, &default_rng)
  125. .set_epsilon(1e-3)
  126. .set_param(arg.param)
  127. .execs({arg.src, arg.filter, {}});
  128. }
  129. }
  130. TEST_F(CUDA, CONVOLUTION3D_MATMUL_FORWARD) {
  131. using namespace convolution3d;
  132. std::vector<TestArg> args = get_args();
  133. Checker<Convolution3DForward> checker(handle_cuda());
  134. NormalRNG default_rng;
  135. for (auto&& arg : args) {
  136. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] *
  137. arg.filter[3] * arg.filter[4]);
  138. UniformFloatRNG rng(scale, 2 * scale);
  139. checker.set_dtype(0, dtype::Float32())
  140. .set_dtype(1, dtype::Float32())
  141. .set_rng(0, &default_rng)
  142. .set_rng(1, &default_rng)
  143. .set_param(arg.param).
  144. execs({arg.src, arg.filter, {}});
  145. }
  146. }
  147. TEST_F(CUDA, CONVOLUTION3D_FORWARD_NONCONTIG_CUDNN) {
  148. using namespace convolution3d;
  149. Checker<Convolution3DForward> checker(handle_cuda());
  150. checker.set_before_exec_callback(AlgoChecker<Convolution3DForward>(
  151. "CUDNN"));
  152. param::Convolution3D param;
  153. param.pad_d = param.pad_h = param.pad_w = 1;
  154. checker.set_dtype(0, dtype::Float32())
  155. .set_dtype(1, dtype::Float32())
  156. .set_epsilon(1e-3);
  157. //! noncontiguous case
  158. {
  159. checker.set_param(param).execl(TensorLayoutArray{
  160. {{4, 5, 16, 16, 16},
  161. {40960, 4096, 256, 16, 1},
  162. dtype::Float32()},
  163. {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()},
  164. {{4, 5, 16, 16, 16},
  165. {40960, 4096, 256, 16, 1},
  166. dtype::Float32()}});
  167. }
  168. }
  169. TEST_F(CUDA, CONVOLUTION3D_FORWARD_NONCONTIG_INPLACE_MATMUL) {
  170. using namespace convolution3d;
  171. Checker<Convolution3DForward> checker(handle_cuda());
  172. checker.set_before_exec_callback(AlgoChecker<Convolution3DForward>(
  173. "INPLACE_MATMUL"));
  174. param::Convolution3D param;
  175. param.pad_d = param.pad_h = param.pad_w = 1;
  176. checker.set_dtype(0, dtype::Float32())
  177. .set_dtype(1, dtype::Float32())
  178. .set_epsilon(1e-3);
  179. //! noncontiguous case
  180. {
  181. checker.set_param(param).execl(TensorLayoutArray{
  182. {{4, 5, 16, 16, 16},
  183. {40960, 4096, 256, 16, 1},
  184. dtype::Float32()},
  185. {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()},
  186. {{4, 5, 16, 16, 16},
  187. {40960, 4096, 256, 16, 1},
  188. dtype::Float32()}});
  189. }
  190. }
  191. TEST_F(CUDA, CONVOLUTION3D_FORWARD_NONCONTIG_1x1x1) {
  192. using namespace convolution3d;
  193. Checker<Convolution3DForward> checker(handle_cuda());
  194. checker.set_before_exec_callback(AlgoChecker<Convolution3DForward>(
  195. "1x1x1"));
  196. param::Convolution3D param;
  197. checker.set_dtype(0, dtype::Float32())
  198. .set_dtype(1, dtype::Float32())
  199. .set_epsilon(1e-3);
  200. //! noncontiguous case
  201. {
  202. checker.set_param(param).execl(TensorLayoutArray{
  203. {{4, 5, 16, 16, 16},
  204. {40960, 4096, 256, 16, 1},
  205. dtype::Float32()},
  206. {{5, 5, 1, 1, 1}, {5, 1, 1, 1, 1}, dtype::Float32()},
  207. {{4, 5, 16, 16, 16},
  208. {40960, 4096, 256, 16, 1},
  209. dtype::Float32()}});
  210. }
  211. }
  212. #if MEGDNN_WITH_BENCHMARK
  213. TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_MATMUL_BACKWARD_FILTER) {
  214. using namespace convolution3d;
  215. std::vector<TestArg> args = get_speed_test_args();
  216. Benchmarker<Convolution3DBackwardFilter> marker(handle_cuda());
  217. NormalRNG default_rng;
  218. for (auto&& arg : args) {
  219. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] *
  220. arg.filter[3] * arg.filter[4]);
  221. auto src = TensorLayout(arg.src, dtype::Float32());
  222. auto filter = TensorLayout(arg.filter, dtype::Float32());
  223. TensorLayout dst;
  224. auto opr = handle_cuda()->create_operator<Convolution3D>();
  225. opr->param() = arg.param;
  226. opr->deduce_layout(src, filter, dst);
  227. UniformFloatRNG rng(scale, 2 * scale);
  228. marker.set_dtype(0, dtype::Float32())
  229. .set_dtype(1, dtype::Float32())
  230. .set_rng(0, &default_rng)
  231. .set_rng(1, &default_rng)
  232. .set_param(arg.param)
  233. .execs({src, dst, filter});
  234. }
  235. }
  236. TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_MATMUL_FORWARD) {
  237. using namespace convolution3d;
  238. std::vector<TestArg> args = get_speed_test_args();
  239. Benchmarker<Convolution3DForward> marker(handle_cuda());
  240. NormalRNG default_rng;
  241. for (auto&& arg : args) {
  242. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] *
  243. arg.filter[3] * arg.filter[4]);
  244. UniformFloatRNG rng(scale, 2 * scale);
  245. marker.set_dtype(0, dtype::Float32())
  246. .set_dtype(1, dtype::Float32())
  247. .set_rng(0, &default_rng)
  248. .set_rng(1, &default_rng)
  249. . //set_param(arg.param).
  250. execs({arg.src, arg.filter, {}});
  251. }
  252. }
  253. TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_1X1X1_FORWARD) {
  254. using namespace convolution3d;
  255. std::vector<TestArg> args = get_1x1x1_args();
  256. Benchmarker<Convolution3DForward> marker(handle_cuda());
  257. NormalRNG default_rng;
  258. for (auto&& arg : args) {
  259. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] *
  260. arg.filter[3] * arg.filter[4]);
  261. UniformFloatRNG rng(scale, 2 * scale);
  262. marker.set_dtype(0, dtype::Float32())
  263. .set_dtype(1, dtype::Float32())
  264. .set_rng(0, &default_rng)
  265. .set_rng(1, &default_rng)
  266. .
  267. // set_param(arg.param).
  268. execs({arg.src, arg.filter, {}});
  269. }
  270. }
  271. TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_FORWARD) {
  272. using namespace convolution3d;
  273. std::vector<TestArg> args = get_args();
  274. {
  275. auto v = get_chanwise_args();
  276. for (auto&& a : v)
  277. args.push_back(std::move(a));
  278. }
  279. {
  280. auto v = get_1x1x1_args();
  281. for (auto&& a : v)
  282. args.push_back(std::move(a));
  283. }
  284. {
  285. auto v = get_dilated_args();
  286. for (auto&& a : v)
  287. args.push_back(std::move(a));
  288. }
  289. Benchmarker<Convolution3DForward> marker(handle_cuda());
  290. NormalRNG default_rng;
  291. for (auto&& arg : args) {
  292. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] *
  293. arg.filter[3] * arg.filter[4]);
  294. UniformFloatRNG rng(scale, 2 * scale);
  295. marker.set_dtype(0, dtype::Float32())
  296. .set_dtype(1, dtype::Float32())
  297. .set_rng(0, &default_rng)
  298. .set_rng(1, &default_rng)
  299. .set_param(arg.param)
  300. .execs({arg.src, arg.filter, {}});
  301. marker.set_dtype(0, dtype::Float16())
  302. .set_dtype(1, dtype::Float16())
  303. .set_rng(0, &rng)
  304. .set_rng(1, &rng)
  305. .set_param(arg.param)
  306. .execs({arg.src, arg.filter, {}});
  307. }
  308. }
  309. #endif
  310. TEST_F(CUDA, CONVOLUTION3D_BACKWARD_DATA) {
  311. using namespace convolution3d;
  312. std::vector<TestArg> args = get_args();
  313. Checker<Convolution3DBackwardData> checker(handle_cuda());
  314. NormalRNG default_rng;
  315. for (auto&& arg : args) {
  316. float scale = 1.0f / sqrt(arg.filter[0] * arg.filter[2] *
  317. arg.filter[3] * arg.filter[4]);
  318. UniformFloatRNG rng(scale, 2 * scale);
  319. auto src = TensorLayout(arg.src, dtype::Float32());
  320. auto filter = TensorLayout(arg.filter, dtype::Float32());
  321. TensorLayout dst;
  322. {
  323. auto opr = handle_cuda()->create_operator<Convolution3D>();
  324. opr->param() = arg.param;
  325. opr->deduce_layout(src, filter, dst);
  326. }
  327. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  328. checker.set_rng(0, &default_rng)
  329. .set_rng(1, &default_rng)
  330. .set_epsilon(1e-3)
  331. .set_param(arg.param)
  332. .exec(TensorLayoutArray{filter, dst, src});
  333. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  334. checker.set_rng(0, &rng)
  335. .set_rng(1, &rng)
  336. .set_epsilon(1e-1)
  337. .set_param(arg.param)
  338. .exec(TensorLayoutArray{filter, dst, src});
  339. }
  340. }
  341. TEST_F(CUDA, CONVOLUTION3D_BACKWARD_FILTER) {
  342. using namespace convolution3d;
  343. std::vector<TestArg> args = get_args();
  344. Checker<Convolution3DBackwardFilter> checker(handle_cuda());
  345. NormalRNG default_rng;
  346. for (auto&& arg : args) {
  347. auto src = TensorLayout(arg.src, dtype::Float32());
  348. auto filter = TensorLayout(arg.filter, dtype::Float32());
  349. TensorLayout dst;
  350. {
  351. auto opr = handle_cuda()->create_operator<Convolution3D>();
  352. opr->param() = arg.param;
  353. opr->deduce_layout(src, filter, dst);
  354. }
  355. float scale = 1.0f / sqrt(dst[0] * dst[2] * dst[3] * dst[4]);
  356. UniformFloatRNG rng(scale, 2 * scale);
  357. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  358. checker.set_rng(0, &default_rng)
  359. .set_rng(1, &default_rng)
  360. .set_epsilon(1e-3)
  361. .set_param(arg.param)
  362. .exec(TensorLayoutArray{src, dst, filter});
  363. if (dst.total_nr_elems() >= 1000)
  364. continue;
  365. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  366. checker.set_rng(0, &rng)
  367. .set_rng(1, &rng)
  368. .set_epsilon(1e-1)
  369. .set_param(arg.param)
  370. .exec(TensorLayoutArray{src, dst, filter});
  371. }
  372. }
  373. TEST_F(CUDA, CONVOLUTION3D_MATMUL_BACKWARD_FILTER) {
  374. using namespace convolution3d;
  375. std::vector<TestArg> args = get_args();
  376. Checker<Convolution3DBackwardFilter> checker(handle_cuda());
  377. NormalRNG default_rng;
  378. for (auto&& arg : args) {
  379. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] *
  380. arg.filter[3] * arg.filter[4]);
  381. UniformFloatRNG rng(scale, 2 * scale);
  382. auto src = TensorLayout(arg.src, dtype::Float32());
  383. auto filter = TensorLayout(arg.filter, dtype::Float32());
  384. TensorLayout dst;
  385. auto opr = handle_cuda()->create_operator<Convolution3D>();
  386. opr->param() = arg.param;
  387. opr->deduce_layout(src, filter, dst);
  388. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  389. checker.set_rng(0, &default_rng)
  390. .set_rng(1, &default_rng)
  391. .set_param(arg.param)
  392. .exec(TensorLayoutArray{src, dst, filter});
  393. }
  394. }
  395. TEST_F(CUDA, CONVOLUTION3D_BACKWARD_DATA_NONCONTIG_CUDNN) {
  396. using namespace convolution3d;
  397. Checker<Convolution3DBackwardData> checker(handle_cuda());
  398. checker.set_before_exec_callback(AlgoChecker<Convolution3DBackwardData>(
  399. "CUDNN"));
  400. Convolution3DBackwardData::Param param;
  401. param.pad_d = param.pad_h = param.pad_w = 1;
  402. NormalRNG default_rng;
  403. checker.set_dtype(0, dtype::Float32())
  404. .set_dtype(1, dtype::Float32())
  405. .set_rng(0, &default_rng)
  406. .set_rng(1, &default_rng)
  407. .set_epsilon(1e-3)
  408. .set_param(param);
  409. //! noncontiguous case
  410. {
  411. checker.execl(TensorLayoutArray{
  412. {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()},
  413. {{4, 5, 16, 16, 16},
  414. {40960, 4096, 256, 16, 1},
  415. dtype::Float32()},
  416. {{4, 5, 16, 16, 16},
  417. {40960, 4096, 256, 16, 1},
  418. dtype::Float32()}});
  419. }
  420. }
  421. TEST_F(CUDA, CONVOLUTION3D_BACKWARD_FILTER_NONCONTIG_CUDNN) {
  422. using namespace convolution3d;
  423. Checker<Convolution3DBackwardFilter> checker(handle_cuda());
  424. checker.set_before_exec_callback(AlgoChecker<Convolution3DBackwardFilter>(
  425. "CUDNN"));
  426. Convolution3DBackwardFilter::Param param;
  427. param.pad_d = param.pad_h = param.pad_w = 1;
  428. NormalRNG default_rng;
  429. checker.set_dtype(0, dtype::Float32())
  430. .set_dtype(1, dtype::Float32())
  431. .set_rng(0, &default_rng)
  432. .set_rng(1, &default_rng)
  433. .set_epsilon(1e-3)
  434. .set_param(param);
  435. //! noncontiguous case
  436. {
  437. checker.execl(TensorLayoutArray{
  438. {{4, 5, 16, 16, 16},
  439. {40960, 4096, 256, 16, 1},
  440. dtype::Float32()},
  441. {{4, 5, 16, 16, 16},
  442. {40960, 4096, 256, 16, 1},
  443. dtype::Float32()},
  444. {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()}});
  445. }
  446. }
  447. /*
  448. TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) {
  449. auto eps_getter = [](bool f16, int stage, const char *name) -> float {
  450. if (f16) {
  451. return stage == 2 ? 0.9 : 0.7;
  452. }
  453. if (strstr(name, "WINOGRAD_NONFUSED"))
  454. return 0.3;
  455. return 1e-3;
  456. };
  457. convolution3d::test_conv_config_combinations(handle_cuda(), false, true,
  458. true, eps_getter);
  459. }
  460. */
  461. } // namespace test
  462. } // namespace megdnn
  463. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台