You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

opr_impl.cpp 14 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. /**
  2. * \file dnn/src/cuda/convolution3d/opr_impl.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./opr_impl.h"
  12. #include "./backward_data/algo.h"
  13. #include "./backward_filter/algo.h"
  14. #include "./forward/algo.h"
  15. #include "./helper.h"
  16. #include "src/common/algo_chooser.h"
  17. #include "src/cuda/utils.h"
  18. using namespace megdnn;
  19. using namespace cuda;
  20. using namespace convolution3d;
  21. #define TO_STRING2(v) #v
  22. #define TO_STRING(v) TO_STRING2(v)
  23. #define CUDNN_VERSION_STR \
  24. TO_STRING(CUDNN_MAJOR) \
  25. "." TO_STRING(CUDNN_MINOR) "." TO_STRING(CUDNN_PATCHLEVEL)
  26. /* ============== Convolution3DForwardImpl ============== */
  27. Convolution3DForwardImpl::Algorithm*
  28. Convolution3DForwardImpl::get_algorithm_heuristic(
  29. const TensorLayout& src, const TensorLayout& filter,
  30. const TensorLayout& dst, size_t workspace_limit_in_bytes,
  31. const AlgoAttribute& positive_attr,
  32. const AlgoAttribute& negative_attr) {
  33. auto fm = check_layout_fwd(src, filter, dst);
  34. return get_algorithm_heuristic(src, fm, dst, workspace_limit_in_bytes,
  35. positive_attr, negative_attr);
  36. }
  37. Convolution3DForwardImpl::Algorithm*
  38. Convolution3DForwardImpl::get_algorithm_heuristic(
  39. const TensorLayout& src, const CanonizedFilterMeta& filter,
  40. const TensorLayout& dst, size_t workspace_limit_in_bytes,
  41. const AlgoAttribute& positive_attr,
  42. const AlgoAttribute& negative_attr) {
  43. AlgoBase::SizeArgs args(this, src, filter, dst);
  44. #if CUDNN_MAJOR < 7 || (CUDNN_MAJOR == 7 && CUDNN_MINOR < 5)
  45. if (args.filter_meta.group > 1) {
  46. // prefer special chanwise impl since as the group conv of cudnn whose
  47. // version is lower than v7.5.0 is still slower than our implementation
  48. // in many channel-wise cases
  49. if (sm_algo_pack.chanwise.is_available_attribute(
  50. args, positive_attr, negative_attr,
  51. workspace_limit_in_bytes)) {
  52. return &sm_algo_pack.chanwise;
  53. }
  54. }
  55. #endif
  56. auto prefer_1x1x1 = [&args, positive_attr, negative_attr,
  57. workspace_limit_in_bytes]() {
  58. const size_t MAX_BATCH_SIZE_FOR_1x1x1_MAT_ALGO = 4;
  59. size_t batch_size = args.src_layout->shape[0];
  60. if (batch_size > MAX_BATCH_SIZE_FOR_1x1x1_MAT_ALGO) {
  61. return false;
  62. }
  63. return sm_algo_pack.a1x1x1.is_available_attribute(
  64. args, positive_attr, negative_attr, workspace_limit_in_bytes);
  65. };
  66. auto get_cudnn_algo =
  67. [this, &args, workspace_limit_in_bytes, positive_attr,
  68. negative_attr]() -> Convolution3DForwardImpl::AlgoBase* {
  69. auto cudnn_handle = cuda::cudnn_handle(this->handle());
  70. cudnnConvolutionFwdAlgo_t algo;
  71. CUDNNForwardDescs desc;
  72. args.init_desc(desc);
  73. bool got = cudnn_get_convolution_fwd_algo_helper(
  74. cudnn_handle, desc.src_desc.desc, desc.filter_desc.desc,
  75. desc.conv_desc.desc, desc.dst_desc.desc,
  76. workspace_limit_in_bytes, &algo, positive_attr, negative_attr);
  77. if (got) {
  78. return static_cast<AlgoBase*>(
  79. megdnn::get_algo_match_attribute<Convolution3DForwardImpl>(
  80. sm_algo_pack.cudnn_from_enum(algo), positive_attr,
  81. negative_attr));
  82. } else {
  83. return nullptr;
  84. }
  85. };
  86. if (prefer_1x1x1()) {
  87. return &sm_algo_pack.a1x1x1;
  88. }
  89. if (is_cudnn_supported(args)) {
  90. if (auto algo = get_cudnn_algo())
  91. return algo;
  92. }
  93. if (args.filter_meta.group > 1) {
  94. auto orig_args = args;
  95. TensorLayout a, b;
  96. AlgoGroupConvGeneral::modify_size_args(args, a, b);
  97. if (prefer_1x1x1()) {
  98. return sm_algo_pack.algo2gconv.at(&sm_algo_pack.a1x1x1);
  99. }
  100. if (is_cudnn_supported(args)) {
  101. if (auto algo = get_cudnn_algo())
  102. return sm_algo_pack.algo2gconv.at(algo);
  103. }
  104. args = orig_args;
  105. }
  106. return megdnn::get_algo_match_attribute<Convolution3DForwardImpl>(
  107. sm_algo_pack.non_cudnn_algos, args, workspace_limit_in_bytes,
  108. "cuda conv3d fwd", positive_attr, negative_attr);
  109. }
  110. std::vector<Convolution3DForwardImpl::Algorithm*>
  111. Convolution3DForwardImpl::get_all_algorithms(const TensorLayout& src,
  112. const TensorLayout& filter,
  113. const TensorLayout& dst) {
  114. return megdnn::get_all_algorithms<Convolution3DForwardImpl>(
  115. {this, src, filter, dst});
  116. }
  117. size_t Convolution3DForwardImpl::get_workspace_in_bytes(
  118. const TensorLayout& src, const TensorLayout& filter,
  119. const TensorLayout& dst) {
  120. AlgoBase::SizeArgs args(this, src, filter, dst);
  121. return get_algorithm(this, src, args.filter_meta, dst)
  122. ->get_workspace_in_bytes(args);
  123. }
  124. void Convolution3DForwardImpl::exec(_megdnn_tensor_in src,
  125. _megdnn_tensor_in filter,
  126. _megdnn_tensor_out dst,
  127. _megdnn_workspace workspace) {
  128. AlgoBase::ExecArgs args(this, src, filter, dst, workspace);
  129. auto algo = get_algorithm(this, src.layout, args.filter_meta, dst.layout);
  130. algo->check_workspace(args, workspace).exec(args);
  131. }
  132. const char* Convolution3DForwardImpl::get_algorithm_set_name() const {
  133. return "CUDACONV0+CUDNN" CUDNN_VERSION_STR;
  134. }
  135. void Convolution3DBackwardDataImpl::exec(_megdnn_tensor_in filter,
  136. _megdnn_tensor_in diff,
  137. _megdnn_tensor_out grad,
  138. _megdnn_workspace workspace) {
  139. AlgoBase::ExecArgs args(this, filter, diff, grad, workspace);
  140. auto algo = get_algorithm(this, args.filter_meta, diff.layout, grad.layout);
  141. algo->check_workspace(args, workspace).exec(args);
  142. }
  143. std::vector<Convolution3DBackwardDataImpl::Algorithm*>
  144. Convolution3DBackwardDataImpl::get_all_algorithms(const TensorLayout& filter,
  145. const TensorLayout& diff,
  146. const TensorLayout& grad) {
  147. return megdnn::get_all_algorithms<Convolution3DBackwardDataImpl>(
  148. {this, filter, diff, grad});
  149. }
  150. Convolution3DBackwardDataImpl::Algorithm*
  151. Convolution3DBackwardDataImpl::get_algorithm_heuristic(
  152. const TensorLayout& filter, const TensorLayout& diff,
  153. const TensorLayout& grad, size_t workspace_limit_in_bytes,
  154. const AlgoAttribute& positive_attr,
  155. const AlgoAttribute& negative_attr) {
  156. auto fm = check_layout_fwd(grad, filter, diff);
  157. return get_algorithm_heuristic(fm, diff, grad, workspace_limit_in_bytes,
  158. positive_attr, negative_attr);
  159. }
  160. Convolution3DBackwardDataImpl::Algorithm*
  161. Convolution3DBackwardDataImpl::get_algorithm_heuristic(
  162. const CanonizedFilterMeta& filter, const TensorLayout& diff,
  163. const TensorLayout& grad, size_t workspace_limit_in_bytes,
  164. const AlgoAttribute& positive_attr,
  165. const AlgoAttribute& negative_attr) {
  166. AlgoBase::SizeArgs args(this, filter, diff, grad);
  167. if (args.filter_meta.group > 1 &&
  168. sm_algo_pack.chanwise.is_available_attribute(
  169. args, positive_attr, negative_attr, workspace_limit_in_bytes)) {
  170. return &sm_algo_pack.chanwise;
  171. }
  172. auto get_cudnn_algo =
  173. [this, &args, workspace_limit_in_bytes, positive_attr,
  174. negative_attr]() -> Convolution3DBackwardDataImpl::AlgoBase* {
  175. auto cudnn_handle = cuda::cudnn_handle(this->handle());
  176. cudnnConvolutionBwdDataAlgo_t algo;
  177. CUDNNBwdDataDescs desc;
  178. args.init_desc(desc);
  179. bool got = cudnn_get_convolution_bwd_data_algo_helper(
  180. cudnn_handle, desc.filter_desc.desc, desc.diff_desc.desc,
  181. desc.conv_desc.desc, desc.grad_desc.desc,
  182. workspace_limit_in_bytes, &algo, positive_attr, negative_attr);
  183. if (got) {
  184. return static_cast<AlgoBase*>(megdnn::get_algo_match_attribute<
  185. Convolution3DBackwardDataImpl>(
  186. sm_algo_pack.cudnn_from_enum(algo), positive_attr,
  187. negative_attr));
  188. } else {
  189. return nullptr;
  190. }
  191. };
  192. if (is_cudnn_supported(args.as_fwd_args())) {
  193. if (auto algo = get_cudnn_algo())
  194. return algo;
  195. }
  196. if (args.filter_meta.group > 1) {
  197. auto orig_args = args;
  198. TensorLayout a, b;
  199. AlgoGroupConvGeneral::modify_size_args(args, a, b);
  200. if (is_cudnn_supported(args.as_fwd_args())) {
  201. if (auto algo = get_cudnn_algo())
  202. return sm_algo_pack.algo2gconv.at(algo);
  203. }
  204. args = orig_args;
  205. }
  206. return megdnn::get_algo_match_attribute<Convolution3DBackwardDataImpl>(
  207. sm_algo_pack.non_cudnn_algos, args, workspace_limit_in_bytes,
  208. "cuda conv3d bwd data", positive_attr, negative_attr);
  209. }
  210. size_t Convolution3DBackwardDataImpl::get_workspace_in_bytes(
  211. const TensorLayout& filter, const TensorLayout& diff,
  212. const TensorLayout& grad) {
  213. AlgoBase::SizeArgs args(this, filter, diff, grad);
  214. return get_algorithm(this, args.filter_meta, diff, grad)
  215. ->get_workspace_in_bytes(args);
  216. }
  217. const char* Convolution3DBackwardDataImpl::get_algorithm_set_name() const {
  218. return "CUDACONV0+CUDNN" CUDNN_VERSION_STR;
  219. }
  220. void Convolution3DBackwardFilterImpl::exec(_megdnn_tensor_in src,
  221. _megdnn_tensor_in diff,
  222. _megdnn_tensor_out grad,
  223. _megdnn_workspace workspace) {
  224. AlgoBase::ExecArgs args(this, src, diff, grad, workspace);
  225. auto algo =
  226. get_algorithm(this, src.layout, diff.layout, args.grad_filter_meta);
  227. algo->check_workspace(args, workspace).exec(args);
  228. }
  229. std::vector<Convolution3DBackwardFilterImpl::Algorithm*>
  230. Convolution3DBackwardFilterImpl::get_all_algorithms(const TensorLayout& src,
  231. const TensorLayout& diff,
  232. const TensorLayout& grad) {
  233. return megdnn::get_all_algorithms<Convolution3DBackwardFilterImpl>(
  234. {this, src, diff, grad});
  235. }
  236. Convolution3DBackwardFilterImpl::Algorithm*
  237. Convolution3DBackwardFilterImpl::get_algorithm_heuristic(
  238. const TensorLayout& src, const TensorLayout& diff,
  239. const TensorLayout& grad, size_t workspace_limit_in_bytes,
  240. const AlgoAttribute& positive_attr,
  241. const AlgoAttribute& negative_attr) {
  242. auto fm = check_layout_fwd(src, grad, diff);
  243. return get_algorithm_heuristic(src, diff, fm, workspace_limit_in_bytes,
  244. positive_attr, negative_attr);
  245. }
  246. Convolution3DBackwardFilterImpl::Algorithm*
  247. Convolution3DBackwardFilterImpl::get_algorithm_heuristic(
  248. const TensorLayout& src, const TensorLayout& diff,
  249. const CanonizedFilterMeta& grad, size_t workspace_limit_in_bytes,
  250. const AlgoAttribute& positive_attr,
  251. const AlgoAttribute& negative_attr) {
  252. AlgoBase::SizeArgs args(this, src, diff, grad);
  253. if (args.grad_filter_meta.group > 1 &&
  254. sm_algo_pack.chanwise.is_available_attribute(
  255. args, positive_attr, negative_attr, workspace_limit_in_bytes)) {
  256. return &sm_algo_pack.chanwise;
  257. }
  258. auto get_cudnn_algo =
  259. [this, &args, workspace_limit_in_bytes, positive_attr,
  260. negative_attr]() -> Convolution3DBackwardFilterImpl::AlgoBase* {
  261. auto cudnn_handle = cuda::cudnn_handle(this->handle());
  262. cudnnConvolutionBwdFilterAlgo_t algo;
  263. CUDNNBwdFilterDescs desc;
  264. args.init_desc(desc);
  265. bool got = cudnn_get_convolution_bwd_filter_algo_helper(
  266. cudnn_handle, desc.src_desc.desc, desc.diff_desc.desc,
  267. desc.conv_desc.desc, desc.grad_desc.desc,
  268. workspace_limit_in_bytes, &algo, positive_attr, negative_attr);
  269. if (got) {
  270. return static_cast<AlgoBase*>(megdnn::get_algo_match_attribute<
  271. Convolution3DBackwardFilterImpl>(
  272. sm_algo_pack.cudnn_from_enum(algo), positive_attr,
  273. negative_attr));
  274. } else {
  275. return nullptr;
  276. }
  277. };
  278. if (is_cudnn_supported(args.as_fwd_args())) {
  279. if (auto algo = get_cudnn_algo())
  280. return algo;
  281. }
  282. if (args.grad_filter_meta.group > 1) {
  283. auto orig_args = args;
  284. TensorLayout a, b;
  285. AlgoGroupConvGeneral::modify_size_args(args, a, b);
  286. if (is_cudnn_supported(args.as_fwd_args())) {
  287. if (auto algo = get_cudnn_algo())
  288. return sm_algo_pack.algo2gconv.at(algo);
  289. }
  290. args = orig_args;
  291. }
  292. return megdnn::get_algo_match_attribute<Convolution3DBackwardFilterImpl>(
  293. sm_algo_pack.non_cudnn_algos, args, workspace_limit_in_bytes,
  294. "cuda conv3d bwd filter", positive_attr, negative_attr);
  295. }
  296. size_t Convolution3DBackwardFilterImpl::get_workspace_in_bytes(
  297. const TensorLayout& src, const TensorLayout& diff,
  298. const TensorLayout& grad) {
  299. AlgoBase::SizeArgs args(this, src, diff, grad);
  300. return get_algorithm(this, src, diff, args.grad_filter_meta)
  301. ->get_workspace_in_bytes(args);
  302. }
  303. const char* Convolution3DBackwardFilterImpl::get_algorithm_set_name() const {
  304. return "CUDACONV0+CUDNN" CUDNN_VERSION_STR;
  305. }
  306. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台