You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convolution3d.cpp 13 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. /**
  2. * \file dnn/test/cuda/convolution3d.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/common/convolution3d.h"
  12. #include "megdnn/opr_param_defs.h"
  13. #include "megdnn/oprs.h"
  14. #include "src/cuda/utils.h"
  15. #include "test/common/benchmarker.h"
  16. #include "test/common/checker.h"
  17. #include "test/common/rng.h"
  18. #include "test/common/tensor.h"
  19. #include "test/common/workspace_wrapper.h"
  20. #include "test/cuda/fixture.h"
  21. namespace megdnn {
  22. namespace test {
  23. #if 0
  24. TEST_F(CUDA, CONVOLUTION3D_8X8X32) {
  25. if (!cuda::is_compute_capability_required(6, 1)) {
  26. printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device"
  27. "doesn't support\n");
  28. return;
  29. }
  30. using namespace convolution3d;
  31. std::vector<TestArg> args;
  32. {
  33. auto v = get_args();
  34. for (auto&& a : v) {
  35. args.push_back(std::move(a));
  36. }
  37. }
  38. /*
  39. {
  40. auto v = get_dilated_args();
  41. for (auto &&a: v) {
  42. args.push_back(std::move(a));
  43. }
  44. }
  45. {
  46. auto v = get_chanwise_args();
  47. for (auto &&a: v) {
  48. args.push_back(std::move(a));
  49. }
  50. }
  51. */
  52. Checker<Convolution3DForward> checker(handle_cuda());
  53. UniformIntRNG rng(-4, 4);
  54. UniformIntRNG rng_same(1, 1);
  55. for (auto arg : args) {
  56. arg.param.format = param::Convolution3D::Format::NDHWC;
  57. arg.param.data_type = param::Convolution3D::DataType::INT8x8x32;
  58. arg.src = cvt_src_or_dst_ncdhw2ndhwc(arg.src);
  59. arg.filter = cvt_filter_ncdhw2ndhwc(arg.filter);
  60. checker.set_dtype(0, dtype::Int8())
  61. .set_dtype(1, dtype::Int8())
  62. .set_dtype(2, dtype::Int32())
  63. .set_param(arg.param)
  64. .set_rng(0, &rng)
  65. .set_rng(1, &rng)
  66. .execs({arg.src, arg.filter, {}});
  67. }
  68. }
  69. #endif
  70. TEST_F(CUDA, CONVOLUTION3D_FORWARD) {
  71. using namespace convolution3d;
  72. std::vector<TestArg> args = get_args();
  73. /*
  74. {
  75. auto v = get_chanwise_args();
  76. for (auto&& a : v) {
  77. args.push_back(std::move(a));
  78. }
  79. }
  80. {
  81. auto v = get_dilated_args();
  82. for (auto&& a : v) {
  83. args.push_back(std::move(a));
  84. }
  85. }
  86. */
  87. bool fp16_checked = false;
  88. Checker<Convolution3DForward> checker(handle_cuda());
  89. NormalRNG default_rng;
  90. for (auto&& arg : args) {
  91. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] *
  92. arg.filter[3] * arg.filter[4]);
  93. UniformFloatRNG rng(scale, 2 * scale);
  94. checker.set_dtype(0, dtype::Float32())
  95. .set_dtype(1, dtype::Float32())
  96. .set_rng(0, &default_rng)
  97. .set_rng(1, &default_rng)
  98. .set_epsilon(1e-3)
  99. .set_param(arg.param)
  100. .execs({arg.src, arg.filter, {}});
  101. if (!fp16_checked || arg.src.total_nr_elems() >= 1000)
  102. continue;
  103. checker.set_dtype(0, dtype::Float16())
  104. .set_dtype(1, dtype::Float16())
  105. .set_rng(0, &rng)
  106. .set_rng(1, &rng)
  107. .set_epsilon(1e-1)
  108. .set_param(arg.param)
  109. .execs({arg.src, arg.filter, {}});
  110. }
  111. }
  112. TEST_F(CUDA, CONVOLUTION3D_1X1X1_FORWARD) {
  113. using namespace convolution3d;
  114. std::vector<TestArg> args = get_1x1x1_args();
  115. Checker<Convolution3DForward> checker(handle_cuda());
  116. NormalRNG default_rng;
  117. for (auto&& arg : args) {
  118. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] *
  119. arg.filter[3] * arg.filter[4]);
  120. UniformFloatRNG rng(scale, 2 * scale);
  121. checker.set_dtype(0, dtype::Float32())
  122. .set_dtype(1, dtype::Float32())
  123. .set_rng(0, &default_rng)
  124. .set_rng(1, &default_rng)
  125. .set_epsilon(1e-3)
  126. .set_param(arg.param)
  127. .execs({arg.src, arg.filter, {}});
  128. }
  129. }
  130. TEST_F(CUDA, CONVOLUTION3D_MATMUL_FORWARD) {
  131. using namespace convolution3d;
  132. std::vector<TestArg> args = get_args();
  133. Checker<Convolution3DForward> checker(handle_cuda());
  134. NormalRNG default_rng;
  135. for (auto&& arg : args) {
  136. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] *
  137. arg.filter[3] * arg.filter[4]);
  138. UniformFloatRNG rng(scale, 2 * scale);
  139. checker.set_dtype(0, dtype::Float32())
  140. .set_dtype(1, dtype::Float32())
  141. .set_rng(0, &default_rng)
  142. .set_rng(1, &default_rng)
  143. .set_param(arg.param).
  144. execs({arg.src, arg.filter, {}});
  145. }
  146. }
  147. #if MEGDNN_WITH_BENCHMARK
  148. TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_MATMUL_BACKWARD_FILTER) {
  149. using namespace convolution3d;
  150. std::vector<TestArg> args = get_speed_test_args();
  151. Benchmarker<Convolution3DBackwardFilter> marker(handle_cuda());
  152. NormalRNG default_rng;
  153. for (auto&& arg : args) {
  154. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] *
  155. arg.filter[3] * arg.filter[4]);
  156. auto src = TensorLayout(arg.src, dtype::Float32());
  157. auto filter = TensorLayout(arg.filter, dtype::Float32());
  158. TensorLayout dst;
  159. auto opr = handle_cuda()->create_operator<Convolution3D>();
  160. opr->param() = arg.param;
  161. opr->deduce_layout(src, filter, dst);
  162. UniformFloatRNG rng(scale, 2 * scale);
  163. marker.set_dtype(0, dtype::Float32())
  164. .set_dtype(1, dtype::Float32())
  165. .set_rng(0, &default_rng)
  166. .set_rng(1, &default_rng)
  167. .set_param(arg.param)
  168. .execs({src, dst, filter});
  169. }
  170. }
  171. TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_MATMUL_FORWARD) {
  172. using namespace convolution3d;
  173. std::vector<TestArg> args = get_speed_test_args();
  174. Benchmarker<Convolution3DForward> marker(handle_cuda());
  175. NormalRNG default_rng;
  176. for (auto&& arg : args) {
  177. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] *
  178. arg.filter[3] * arg.filter[4]);
  179. UniformFloatRNG rng(scale, 2 * scale);
  180. marker.set_dtype(0, dtype::Float32())
  181. .set_dtype(1, dtype::Float32())
  182. .set_rng(0, &default_rng)
  183. .set_rng(1, &default_rng)
  184. . //set_param(arg.param).
  185. execs({arg.src, arg.filter, {}});
  186. }
  187. }
  188. TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_1X1X1_FORWARD) {
  189. using namespace convolution3d;
  190. std::vector<TestArg> args = get_1x1x1_args();
  191. Benchmarker<Convolution3DForward> marker(handle_cuda());
  192. NormalRNG default_rng;
  193. for (auto&& arg : args) {
  194. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] *
  195. arg.filter[3] * arg.filter[4]);
  196. UniformFloatRNG rng(scale, 2 * scale);
  197. marker.set_dtype(0, dtype::Float32())
  198. .set_dtype(1, dtype::Float32())
  199. .set_rng(0, &default_rng)
  200. .set_rng(1, &default_rng)
  201. .
  202. // set_param(arg.param).
  203. execs({arg.src, arg.filter, {}});
  204. }
  205. }
  206. TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_FORWARD) {
  207. using namespace convolution3d;
  208. std::vector<TestArg> args = get_args();
  209. {
  210. auto v = get_chanwise_args();
  211. for (auto&& a : v)
  212. args.push_back(std::move(a));
  213. }
  214. {
  215. auto v = get_1x1x1_args();
  216. for (auto&& a : v)
  217. args.push_back(std::move(a));
  218. }
  219. {
  220. auto v = get_dilated_args();
  221. for (auto&& a : v)
  222. args.push_back(std::move(a));
  223. }
  224. Benchmarker<Convolution3DForward> marker(handle_cuda());
  225. NormalRNG default_rng;
  226. for (auto&& arg : args) {
  227. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] *
  228. arg.filter[3] * arg.filter[4]);
  229. UniformFloatRNG rng(scale, 2 * scale);
  230. marker.set_dtype(0, dtype::Float32())
  231. .set_dtype(1, dtype::Float32())
  232. .set_rng(0, &default_rng)
  233. .set_rng(1, &default_rng)
  234. .set_param(arg.param)
  235. .execs({arg.src, arg.filter, {}});
  236. marker.set_dtype(0, dtype::Float16())
  237. .set_dtype(1, dtype::Float16())
  238. .set_rng(0, &rng)
  239. .set_rng(1, &rng)
  240. .set_param(arg.param)
  241. .execs({arg.src, arg.filter, {}});
  242. }
  243. }
  244. #endif
  245. TEST_F(CUDA, CONVOLUTION3D_BACKWARD_DATA) {
  246. using namespace convolution3d;
  247. std::vector<TestArg> args = get_args();
  248. Checker<Convolution3DBackwardData> checker(handle_cuda());
  249. NormalRNG default_rng;
  250. for (auto&& arg : args) {
  251. float scale = 1.0f / sqrt(arg.filter[0] * arg.filter[2] *
  252. arg.filter[3] * arg.filter[4]);
  253. UniformFloatRNG rng(scale, 2 * scale);
  254. auto src = TensorLayout(arg.src, dtype::Float32());
  255. auto filter = TensorLayout(arg.filter, dtype::Float32());
  256. TensorLayout dst;
  257. {
  258. auto opr = handle_cuda()->create_operator<Convolution3D>();
  259. opr->param() = arg.param;
  260. opr->deduce_layout(src, filter, dst);
  261. }
  262. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  263. checker.set_rng(0, &default_rng)
  264. .set_rng(1, &default_rng)
  265. .set_epsilon(1e-3)
  266. .set_param(arg.param)
  267. .exec(TensorLayoutArray{filter, dst, src});
  268. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  269. checker.set_rng(0, &rng)
  270. .set_rng(1, &rng)
  271. .set_epsilon(1e-1)
  272. .set_param(arg.param)
  273. .exec(TensorLayoutArray{filter, dst, src});
  274. }
  275. }
  276. TEST_F(CUDA, CONVOLUTION3D_BACKWARD_FILTER) {
  277. using namespace convolution3d;
  278. std::vector<TestArg> args = get_args();
  279. Checker<Convolution3DBackwardFilter> checker(handle_cuda());
  280. NormalRNG default_rng;
  281. for (auto&& arg : args) {
  282. auto src = TensorLayout(arg.src, dtype::Float32());
  283. auto filter = TensorLayout(arg.filter, dtype::Float32());
  284. TensorLayout dst;
  285. {
  286. auto opr = handle_cuda()->create_operator<Convolution3D>();
  287. opr->param() = arg.param;
  288. opr->deduce_layout(src, filter, dst);
  289. }
  290. float scale = 1.0f / sqrt(dst[0] * dst[2] * dst[3] * dst[4]);
  291. UniformFloatRNG rng(scale, 2 * scale);
  292. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  293. checker.set_rng(0, &default_rng)
  294. .set_rng(1, &default_rng)
  295. .set_epsilon(1e-3)
  296. .set_param(arg.param)
  297. .exec(TensorLayoutArray{src, dst, filter});
  298. if (dst.total_nr_elems() >= 1000)
  299. continue;
  300. src.dtype = dst.dtype = filter.dtype = dtype::Float16();
  301. checker.set_rng(0, &rng)
  302. .set_rng(1, &rng)
  303. .set_epsilon(1e-1)
  304. .set_param(arg.param)
  305. .exec(TensorLayoutArray{src, dst, filter});
  306. }
  307. }
  308. TEST_F(CUDA, CONVOLUTION3D_MATMUL_BACKWARD_FILTER) {
  309. using namespace convolution3d;
  310. std::vector<TestArg> args = get_args();
  311. Checker<Convolution3DBackwardFilter> checker(handle_cuda());
  312. NormalRNG default_rng;
  313. for (auto&& arg : args) {
  314. float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] *
  315. arg.filter[3] * arg.filter[4]);
  316. UniformFloatRNG rng(scale, 2 * scale);
  317. auto src = TensorLayout(arg.src, dtype::Float32());
  318. auto filter = TensorLayout(arg.filter, dtype::Float32());
  319. TensorLayout dst;
  320. auto opr = handle_cuda()->create_operator<Convolution3D>();
  321. opr->param() = arg.param;
  322. opr->deduce_layout(src, filter, dst);
  323. src.dtype = dst.dtype = filter.dtype = dtype::Float32();
  324. checker.set_rng(0, &default_rng)
  325. .set_rng(1, &default_rng)
  326. .set_param(arg.param)
  327. .exec(TensorLayoutArray{src, dst, filter});
  328. }
  329. }
  330. /*
  331. TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) {
  332. auto eps_getter = [](bool f16, int stage, const char *name) -> float {
  333. if (f16) {
  334. return stage == 2 ? 0.9 : 0.7;
  335. }
  336. if (strstr(name, "WINOGRAD_NONFUSED"))
  337. return 0.3;
  338. return 1e-3;
  339. };
  340. convolution3d::test_conv_config_combinations(handle_cuda(), false, true,
  341. true, eps_getter);
  342. }
  343. */
  344. } // namespace test
  345. } // namespace megdnn
  346. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台