You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convolution3d.cpp 17 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. #include "test/common/convolution3d.h"
  2. #include "megdnn/opr_param_defs.h"
  3. #include "megdnn/oprs.h"
  4. #include "test/common/benchmarker.h"
  5. #include "test/common/checker.h"
  6. #include "test/common/rng.h"
  7. #include "test/common/tensor.h"
  8. #include "test/common/workspace_wrapper.h"
  9. #include "test/cuda/fixture.h"
  10. #include "test/cuda/utils.h"
  11. namespace megdnn {
  12. namespace test {
  13. #if 0
  14. TEST_F(CUDA, CONVOLUTION3D_8X8X32) {
  15. if (!check_compute_capability(6, 1)) {
  16. printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device"
  17. "doesn't support\n");
  18. return;
  19. }
  20. using namespace convolution3d;
  21. std::vector<TestArg> args;
  22. {
  23. auto v = get_args();
  24. for (auto&& a : v) {
  25. args.push_back(std::move(a));
  26. }
  27. }
  28. /*
  29. {
  30. auto v = get_dilated_args();
  31. for (auto &&a: v) {
  32. args.push_back(std::move(a));
  33. }
  34. }
  35. {
  36. auto v = get_chanwise_args();
  37. for (auto &&a: v) {
  38. args.push_back(std::move(a));
  39. }
  40. }
  41. */
  42. Checker<Convolution3DForward> checker(handle_cuda());
  43. UniformIntRNG rng(-4, 4);
  44. UniformIntRNG rng_same(1, 1);
  45. for (auto arg : args) {
  46. arg.param.format = param::Convolution3D::Format::NDHWC;
  47. arg.param.data_type = param::Convolution3D::DataType::INT8x8x32;
  48. arg.src = cvt_src_or_dst_ncdhw2ndhwc(arg.src);
  49. arg.filter = cvt_filter_ncdhw2ndhwc(arg.filter);
  50. checker.set_dtype(0, dtype::Int8())
  51. .set_dtype(1, dtype::Int8())
  52. .set_dtype(2, dtype::Int32())
  53. .set_param(arg.param)
  54. .set_rng(0, &rng)
  55. .set_rng(1, &rng)
  56. .execs({arg.src, arg.filter, {}});
  57. }
  58. }
  59. #endif
  60. TEST_F(CUDA, CONVOLUTION3D_FORWARD) {
  61. using namespace convolution3d;
  62. std::vector<TestArg> args = get_args();
  63. /*
  64. {
  65. auto v = get_chanwise_args();
  66. for (auto&& a : v) {
  67. args.push_back(std::move(a));
  68. }
  69. }
  70. {
  71. auto v = get_dilated_args();
  72. for (auto&& a : v) {
  73. args.push_back(std::move(a));
  74. }
  75. }
  76. */
  77. bool fp16_checked = false;
  78. Checker<Convolution3DForward> checker(handle_cuda());
  79. NormalRNG default_rng;
  80. for (auto&& arg : args) {
  81. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
  82. arg.filter[4]);
  83. UniformFloatRNG rng(scale, 2 * scale);
  84. checker.set_dtype(0, dtype::Float32())
  85. .set_dtype(1, dtype::Float32())
  86. .set_rng(0, &default_rng)
  87. .set_rng(1, &default_rng)
  88. .set_epsilon(1e-3)
  89. .set_param(arg.param)
  90. .execs({arg.src, arg.filter, {}});
  91. if (!fp16_checked || arg.src.total_nr_elems() >= 1000)
  92. continue;
  93. checker.set_dtype(0, dtype::Float16())
  94. .set_dtype(1, dtype::Float16())
  95. .set_rng(0, &rng)
  96. .set_rng(1, &rng)
  97. .set_epsilon(1e-1)
  98. .set_param(arg.param)
  99. .execs({arg.src, arg.filter, {}});
  100. }
  101. }
  102. TEST_F(CUDA, CONVOLUTION3D_1X1X1_FORWARD) {
  103. using namespace convolution3d;
  104. std::vector<TestArg> args = get_1x1x1_args();
  105. Checker<Convolution3DForward> checker(handle_cuda());
  106. NormalRNG default_rng;
  107. for (auto&& arg : args) {
  108. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
  109. arg.filter[4]);
  110. UniformFloatRNG rng(scale, 2 * scale);
  111. checker.set_dtype(0, dtype::Float32())
  112. .set_dtype(1, dtype::Float32())
  113. .set_rng(0, &default_rng)
  114. .set_rng(1, &default_rng)
  115. .set_epsilon(1e-3)
  116. .set_param(arg.param)
  117. .execs({arg.src, arg.filter, {}});
  118. }
  119. }
  120. TEST_F(CUDA, CONVOLUTION3D_MATMUL_FORWARD) {
  121. using namespace convolution3d;
  122. std::vector<TestArg> args = get_args();
  123. Checker<Convolution3DForward> checker(handle_cuda());
  124. NormalRNG default_rng;
  125. for (auto&& arg : args) {
  126. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
  127. arg.filter[4]);
  128. UniformFloatRNG rng(scale, 2 * scale);
  129. checker.set_dtype(0, dtype::Float32())
  130. .set_dtype(1, dtype::Float32())
  131. .set_rng(0, &default_rng)
  132. .set_rng(1, &default_rng)
  133. .set_param(arg.param)
  134. .execs({arg.src, arg.filter, {}});
  135. }
  136. }
  137. TEST_F(CUDA, CONVOLUTION3D_FORWARD_NONCONTIG_CUDNN) {
  138. using namespace convolution3d;
  139. Checker<Convolution3DForward> checker(handle_cuda());
  140. checker.set_before_exec_callback(AlgoChecker<Convolution3DForward>("CUDNN"));
  141. param::Convolution3D param;
  142. param.pad_d = param.pad_h = param.pad_w = 1;
  143. checker.set_dtype(0, dtype::Float32())
  144. .set_dtype(1, dtype::Float32())
  145. .set_epsilon(1e-3);
  146. //! noncontiguous case
  147. {
  148. checker.set_param(param).execl(TensorLayoutArray{
  149. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()},
  150. {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()},
  151. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()}});
  152. }
  153. }
  154. TEST_F(CUDA, CONVOLUTION3D_FORWARD_NONCONTIG_INPLACE_MATMUL) {
  155. using namespace convolution3d;
  156. Checker<Convolution3DForward> checker(handle_cuda());
  157. checker.set_before_exec_callback(
  158. AlgoChecker<Convolution3DForward>("INPLACE_MATMUL"));
  159. param::Convolution3D param;
  160. param.pad_d = param.pad_h = param.pad_w = 1;
  161. checker.set_dtype(0, dtype::Float32())
  162. .set_dtype(1, dtype::Float32())
  163. .set_epsilon(1e-3);
  164. //! noncontiguous case
  165. {
  166. checker.set_param(param).execl(TensorLayoutArray{
  167. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()},
  168. {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()},
  169. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()}});
  170. }
  171. }
  172. TEST_F(CUDA, CONVOLUTION3D_FORWARD_NONCONTIG_1x1x1) {
  173. using namespace convolution3d;
  174. Checker<Convolution3DForward> checker(handle_cuda());
  175. checker.set_before_exec_callback(AlgoChecker<Convolution3DForward>("1x1x1"));
  176. param::Convolution3D param;
  177. checker.set_dtype(0, dtype::Float32())
  178. .set_dtype(1, dtype::Float32())
  179. .set_epsilon(1e-3);
  180. //! noncontiguous case
  181. {
  182. checker.set_param(param).execl(TensorLayoutArray{
  183. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()},
  184. {{5, 5, 1, 1, 1}, {5, 1, 1, 1, 1}, dtype::Float32()},
  185. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()}});
  186. }
  187. }
  188. #if MEGDNN_WITH_BENCHMARK
  189. TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_MATMUL_BACKWARD_FILTER) {
  190. using namespace convolution3d;
  191. std::vector<TestArg> args = get_speed_test_args();
  192. Benchmarker<Convolution3DBackwardFilter> marker(handle_cuda());
  193. NormalRNG default_rng;
  194. for (auto&& arg : args) {
  195. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
  196. arg.filter[4]);
  197. auto src = TensorLayout(arg.src, dtype::Float32());
  198. auto filter = TensorLayout(arg.filter, dtype::Float32());
  199. TensorLayout dst;
  200. auto opr = handle_cuda()->create_operator<Convolution3D>();
  201. opr->param() = arg.param;
  202. opr->deduce_layout(src, filter, dst);
  203. UniformFloatRNG rng(scale, 2 * scale);
  204. marker.set_dtype(0, dtype::Float32())
  205. .set_dtype(1, dtype::Float32())
  206. .set_rng(0, &default_rng)
  207. .set_rng(1, &default_rng)
  208. .set_param(arg.param)
  209. .execs({src, dst, filter});
  210. }
  211. }
  212. TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_MATMUL_FORWARD) {
  213. using namespace convolution3d;
  214. std::vector<TestArg> args = get_speed_test_args();
  215. Benchmarker<Convolution3DForward> marker(handle_cuda());
  216. NormalRNG default_rng;
  217. for (auto&& arg : args) {
  218. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
  219. arg.filter[4]);
  220. UniformFloatRNG rng(scale, 2 * scale);
  221. marker.set_dtype(0, dtype::Float32())
  222. .set_dtype(1, dtype::Float32())
  223. .set_rng(0, &default_rng)
  224. .set_rng(1, &default_rng)
  225. . // set_param(arg.param).
  226. execs({arg.src, arg.filter, {}});
  227. }
  228. }
  229. TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_1X1X1_FORWARD) {
  230. using namespace convolution3d;
  231. std::vector<TestArg> args = get_1x1x1_args();
  232. Benchmarker<Convolution3DForward> marker(handle_cuda());
  233. NormalRNG default_rng;
  234. for (auto&& arg : args) {
  235. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
  236. arg.filter[4]);
  237. UniformFloatRNG rng(scale, 2 * scale);
  238. marker.set_dtype(0, dtype::Float32())
  239. .set_dtype(1, dtype::Float32())
  240. .set_rng(0, &default_rng)
  241. .set_rng(1, &default_rng)
  242. .
  243. // set_param(arg.param).
  244. execs({arg.src, arg.filter, {}});
  245. }
  246. }
  247. TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_FORWARD) {
  248. using namespace convolution3d;
  249. std::vector<TestArg> args = get_args();
  250. {
  251. auto v = get_chanwise_args();
  252. for (auto&& a : v)
  253. args.push_back(std::move(a));
  254. }
  255. {
  256. auto v = get_1x1x1_args();
  257. for (auto&& a : v)
  258. args.push_back(std::move(a));
  259. }
  260. {
  261. auto v = get_dilated_args();
  262. for (auto&& a : v)
  263. args.push_back(std::move(a));
  264. }
  265. Benchmarker<Convolution3DForward> marker(handle_cuda());
  266. NormalRNG default_rng;
  267. for (auto&& arg : args) {
  268. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
  269. arg.filter[4]);
  270. UniformFloatRNG rng(scale, 2 * scale);
  271. marker.set_dtype(0, dtype::Float32())
  272. .set_dtype(1, dtype::Float32())
  273. .set_rng(0, &default_rng)
  274. .set_rng(1, &default_rng)
  275. .set_param(arg.param)
  276. .execs({arg.src, arg.filter, {}});
  277. marker.set_dtype(0, dtype::Float16())
  278. .set_dtype(1, dtype::Float16())
  279. .set_rng(0, &rng)
  280. .set_rng(1, &rng)
  281. .set_param(arg.param)
  282. .execs({arg.src, arg.filter, {}});
  283. }
  284. }
  285. #endif
  286. TEST_F(CUDA, CONVOLUTION3D_BACKWARD_DATA) {
  287. using namespace convolution3d;
  288. std::vector<TestArg> args = get_args();
  289. Checker<Convolution3DBackwardData> checker(handle_cuda());
  290. NormalRNG default_rng;
  291. for (auto&& arg : args) {
  292. float scale = 1.0f / sqrt(arg.filter[0] * arg.filter[2] * arg.filter[3] *
  293. arg.filter[4]);
  294. UniformFloatRNG rng(scale, 2 * scale);
  295. auto src = TensorLayout(arg.src, dtype::Float32());
  296. auto filter = TensorLayout(arg.filter, dtype::Float32());
  297. TensorLayout dst;
  298. {
  299. auto opr = handle_cuda()->create_operator<Convolution3D>();
  300. opr->param() = arg.param;
  301. opr->deduce_layout(src, filter, dst);
  302. }
  303. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  304. checker.set_rng(0, &default_rng)
  305. .set_rng(1, &default_rng)
  306. .set_epsilon(1e-3)
  307. .set_param(arg.param)
  308. .exec(TensorLayoutArray{filter, dst, src});
  309. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  310. checker.set_rng(0, &rng)
  311. .set_rng(1, &rng)
  312. .set_epsilon(1e-1)
  313. .set_param(arg.param)
  314. .exec(TensorLayoutArray{filter, dst, src});
  315. }
  316. }
  317. TEST_F(CUDA, CONVOLUTION3D_BACKWARD_FILTER) {
  318. using namespace convolution3d;
  319. std::vector<TestArg> args = get_args();
  320. Checker<Convolution3DBackwardFilter> checker(handle_cuda());
  321. NormalRNG default_rng;
  322. for (auto&& arg : args) {
  323. auto src = TensorLayout(arg.src, dtype::Float32());
  324. auto filter = TensorLayout(arg.filter, dtype::Float32());
  325. TensorLayout dst;
  326. {
  327. auto opr = handle_cuda()->create_operator<Convolution3D>();
  328. opr->param() = arg.param;
  329. opr->deduce_layout(src, filter, dst);
  330. }
  331. float scale = 1.0f / sqrt(dst[0] * dst[2] * dst[3] * dst[4]);
  332. UniformFloatRNG rng(scale, 2 * scale);
  333. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  334. checker.set_rng(0, &default_rng)
  335. .set_rng(1, &default_rng)
  336. .set_epsilon(1e-3)
  337. .set_param(arg.param)
  338. .exec(TensorLayoutArray{src, dst, filter});
  339. if (dst.total_nr_elems() >= 1000)
  340. continue;
  341. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  342. checker.set_rng(0, &rng)
  343. .set_rng(1, &rng)
  344. .set_epsilon(1e-1)
  345. .set_param(arg.param)
  346. .exec(TensorLayoutArray{src, dst, filter});
  347. }
  348. }
  349. TEST_F(CUDA, CONVOLUTION3D_MATMUL_BACKWARD_FILTER) {
  350. using namespace convolution3d;
  351. std::vector<TestArg> args = get_args();
  352. Checker<Convolution3DBackwardFilter> checker(handle_cuda());
  353. NormalRNG default_rng;
  354. for (auto&& arg : args) {
  355. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
  356. arg.filter[4]);
  357. UniformFloatRNG rng(scale, 2 * scale);
  358. auto src = TensorLayout(arg.src, dtype::Float32());
  359. auto filter = TensorLayout(arg.filter, dtype::Float32());
  360. TensorLayout dst;
  361. auto opr = handle_cuda()->create_operator<Convolution3D>();
  362. opr->param() = arg.param;
  363. opr->deduce_layout(src, filter, dst);
  364. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  365. checker.set_rng(0, &default_rng)
  366. .set_rng(1, &default_rng)
  367. .set_param(arg.param)
  368. .exec(TensorLayoutArray{src, dst, filter});
  369. }
  370. }
  371. TEST_F(CUDA, CONVOLUTION3D_BACKWARD_DATA_NONCONTIG_CUDNN) {
  372. using namespace convolution3d;
  373. Checker<Convolution3DBackwardData> checker(handle_cuda());
  374. checker.set_before_exec_callback(AlgoChecker<Convolution3DBackwardData>("CUDNN"));
  375. Convolution3DBackwardData::Param param;
  376. param.pad_d = param.pad_h = param.pad_w = 1;
  377. NormalRNG default_rng;
  378. checker.set_dtype(0, dtype::Float32())
  379. .set_dtype(1, dtype::Float32())
  380. .set_rng(0, &default_rng)
  381. .set_rng(1, &default_rng)
  382. .set_epsilon(1e-3)
  383. .set_param(param);
  384. //! noncontiguous case
  385. {
  386. checker.execl(TensorLayoutArray{
  387. {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()},
  388. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()},
  389. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()}});
  390. }
  391. }
  392. TEST_F(CUDA, CONVOLUTION3D_BACKWARD_FILTER_NONCONTIG_CUDNN) {
  393. using namespace convolution3d;
  394. Checker<Convolution3DBackwardFilter> checker(handle_cuda());
  395. checker.set_before_exec_callback(AlgoChecker<Convolution3DBackwardFilter>("CUDNN"));
  396. Convolution3DBackwardFilter::Param param;
  397. param.pad_d = param.pad_h = param.pad_w = 1;
  398. NormalRNG default_rng;
  399. checker.set_dtype(0, dtype::Float32())
  400. .set_dtype(1, dtype::Float32())
  401. .set_rng(0, &default_rng)
  402. .set_rng(1, &default_rng)
  403. .set_epsilon(1e-3)
  404. .set_param(param);
  405. //! noncontiguous case
  406. {
  407. checker.execl(TensorLayoutArray{
  408. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()},
  409. {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()},
  410. {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()}});
  411. }
  412. }
  413. /*
  414. TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) {
  415. auto eps_getter = [](bool f16, int stage, const char *name) -> float {
  416. if (f16) {
  417. return stage == 2 ? 0.9 : 0.7;
  418. }
  419. if (strstr(name, "WINOGRAD_NONFUSED"))
  420. return 0.3;
  421. return 1e-3;
  422. };
  423. convolution3d::test_conv_config_combinations(handle_cuda(), false, true,
  424. true, eps_getter);
  425. }
  426. */
  427. } // namespace test
  428. } // namespace megdnn
  429. // vim: syntax=cpp.doxygen