You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

helper.cpp 8.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. /**
  2. * \file dnn/src/cuda/conv_bias/helper.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "src/cuda/conv_bias/helper.h"
  12. #include "src/cuda/utils.h"
  13. namespace megdnn {
  14. namespace cuda {
  15. ConvBiasDesc::ConvBiasDesc() {
  16. cudnn_check(cudnnCreateActivationDescriptor(&act_desc));
  17. cudnn_check(cudnnCreateConvolutionDescriptor(&conv_desc));
  18. #if CUDNN_VERSION >= 7000
  19. cudnn_check(cudnnSetConvolutionMathType(conv_desc, CUDNN_TENSOR_OP_MATH));
  20. #endif
  21. }
  22. ConvBiasDesc::~ConvBiasDesc() {
  23. cudnn_check(cudnnDestroyConvolutionDescriptor(conv_desc));
  24. cudnn_check(cudnnDestroyActivationDescriptor(act_desc));
  25. }
  26. void ConvBiasDesc::set_conv_bias(DType data_type, const param::ConvBias& param,
  27. size_t nr_group) {
  28. #if CUDNN_VERSION < 7100
  29. megdnn_throw(megdnn_mangle(
  30. "ConvBias(CUDNN_ACTIVATION_IDENTITY) require cudnn 7.1 or higher"));
  31. #else
  32. cudnnConvolutionMode_t mode;
  33. using Param = param::ConvBias;
  34. switch (param.mode) {
  35. case Param::Mode::CROSS_CORRELATION:
  36. mode = CUDNN_CROSS_CORRELATION;
  37. break;
  38. case Param::Mode::CONVOLUTION:
  39. mode = CUDNN_CONVOLUTION;
  40. break;
  41. default:
  42. megdnn_throw(megdnn_mangle("conv mode must be conv or xcorr."));
  43. }
  44. cudnn_check(cudnnSetConvolutionGroupCount(conv_desc, nr_group));
  45. cudnnDataType_t compute_type;
  46. switch (data_type.category()) {
  47. case DTypeCategory::FLOAT:
  48. compute_type = CUDNN_DATA_FLOAT;
  49. break;
  50. case DTypeCategory::INT:
  51. case DTypeCategory::QUANTIZED:
  52. compute_type = CUDNN_DATA_INT32;
  53. break;
  54. default:
  55. megdnn_throw(megdnn_mangle("unspport data type for conv bias"));
  56. }
  57. if (data_type.enumv() == DTypeEnum::Float16) {
  58. auto comp_mode = param.compute_mode;
  59. compute_type = get_compute_type_fp16(comp_mode);
  60. }
  61. cudnn_check(cudnnSetConvolution2dDescriptor(
  62. conv_desc, param.pad_h, param.pad_w, param.stride_h, param.stride_w,
  63. param.dilate_h, param.dilate_w, mode, compute_type));
  64. switch (param.nonlineMode) {
  65. case Param::NonlineMode::IDENTITY:
  66. case Param::NonlineMode::SIGMOID:
  67. case Param::NonlineMode::H_SWISH:
  68. cudnn_check(cudnnSetActivationDescriptor(
  69. act_desc, CUDNN_ACTIVATION_IDENTITY,
  70. CUDNN_NOT_PROPAGATE_NAN, 0));
  71. break;
  72. case Param::NonlineMode::RELU:
  73. cudnn_check(cudnnSetActivationDescriptor(
  74. act_desc, CUDNN_ACTIVATION_RELU, CUDNN_NOT_PROPAGATE_NAN,
  75. 0));
  76. break;
  77. default:
  78. megdnn_throw(megdnn_mangle("unsupported non linear mode"));
  79. }
  80. #endif
  81. }
  82. void ConvBiasDesc::set_conv(DType data_type, const param::ConvBias& param,
  83. const size_t nr_group) {
  84. using Param = param::ConvBias;
  85. cudnnConvolutionMode_t mode;
  86. switch (param.mode) {
  87. case Param::Mode::CROSS_CORRELATION:
  88. mode = CUDNN_CROSS_CORRELATION;
  89. break;
  90. case Param::Mode::CONVOLUTION:
  91. mode = CUDNN_CONVOLUTION;
  92. break;
  93. default:
  94. megdnn_throw(megdnn_mangle("conv mode must be conv or xcorr."));
  95. }
  96. cudnnDataType_t compute_type;
  97. MEGDNN_MARK_USED_VAR(compute_type);
  98. if (data_type.enumv() == DTypeEnum::Float32) {
  99. // FLOAT_CONFIG
  100. compute_type = CUDNN_DATA_FLOAT;
  101. } else if (data_type.enumv() == DTypeEnum::Float16) {
  102. auto comp_mode = param.compute_mode;
  103. compute_type = get_compute_type_fp16(comp_mode);
  104. #if CUDNN_MAJOR >= 7
  105. } else if (data_type.category() == DTypeCategory::INT ||
  106. data_type.category() == DTypeCategory::QUANTIZED) {
  107. compute_type = CUDNN_DATA_INT32;
  108. #endif
  109. } else {
  110. megdnn_throw(megdnn_mangle("unspport data type for conv bias"));
  111. }
  112. #if CUDNN_MAJOR >= 7
  113. cudnn_check(cudnnSetConvolutionGroupCount(conv_desc, nr_group));
  114. #else
  115. megdnn_assert(nr_group == 1);
  116. #endif
  117. #if CUDNN_MAJOR >= 6
  118. cudnn_check(cudnnSetConvolution2dDescriptor(
  119. conv_desc, param.pad_h, param.pad_w, param.stride_h, param.stride_w,
  120. param.dilate_h, param.dilate_w, mode, compute_type));
  121. #else
  122. cudnn_check(cudnnSetConvolution2dDescriptor(
  123. conv_desc, param.pad_h, param.pad_w, param.stride_h, param.stride_w,
  124. param.dilate_h, param.dilate_w, mode));
  125. #endif
  126. }
  127. namespace conv_bias {
  128. bool is_cudnn_supported(const BiasForwardSizeArgs& args) {
  129. if (args.src_layout->dtype == args.filter_layout->dtype &&
  130. args.src_layout->dtype == dtype::BFloat16()) {
  131. return false;
  132. }
  133. // CUDNN_STATUS_EXECUTION_FAILED on Tegra K1, so disable CUDNN
  134. // on Tegra K1.
  135. if (args.handle->is_tegra_k1())
  136. return false;
  137. // TODO: We only support NCHW format now. It seems cuDNN provides support
  138. // for NHWC as well.
  139. if (args.filter_meta.format == param::Convolution::Format::NCHW4) {
  140. if (args.dst_layout->dtype.enumv() != DTypeEnum::Int8 &&
  141. args.dst_layout->dtype.enumv() != DTypeEnum::QuantizedS8) {
  142. return false;
  143. }
  144. } else if (args.filter_meta.format != param::Convolution::Format::NCHW) {
  145. return false;
  146. }
  147. auto& fm = args.filter_meta;
  148. bool supported = true;
  149. supported &= (fm.spatial_ndim == 2);
  150. #if CUDNN_VERSION < 7000
  151. supported &= (fm.group == 1);
  152. #endif
  153. #if CUDNN_VERSION < 7500
  154. supported &= (fm.dilation[0] == 1 && fm.dilation[1] == 1);
  155. #endif
  156. return supported;
  157. }
  158. bool check_bias_share_in_channel(const TensorLayout& bias,
  159. const param::ConvBias::Format format) {
  160. bool share_in_channel = false;
  161. if (format == param::ConvBias::Format::NCHW ||
  162. format == param::ConvBias::Format::NCHW4_NCHW) {
  163. share_in_channel = (bias.ndim == 4 && bias[0] == 1 && bias[2] == 1 &&
  164. bias[3] == 1);
  165. } else if (format == param::ConvBias::Format::NHWC) {
  166. share_in_channel = (bias.ndim == 4 && bias[0] == 1 && bias[1] == 1 &&
  167. bias[2] == 1);
  168. } else if (format == param::ConvBias::Format::NCHW4 ||
  169. format == param::ConvBias::Format::NCHW8 ||
  170. format == param::ConvBias::Format::NCHW32 ||
  171. format == param::ConvBias::Format::NCHW4_NCHW32 ||
  172. format == param::ConvBias::Format::NCHW32_NCHW4) {
  173. share_in_channel = (bias.ndim == 5 && bias[0] == 1 && bias[2] == 1 &&
  174. bias[3] == 1);
  175. } else if (format == param::ConvBias::Format::NHWCD4) {
  176. share_in_channel = (bias.ndim == 5 && bias[0] == 1 && bias[1] == 1 &&
  177. bias[3] == 1);
  178. } else {
  179. megdnn_assert(format == param::ConvBias::Format::CHWN4);
  180. share_in_channel = (bias.ndim == 5 && bias[1] == 1 && bias[2] == 1 &&
  181. bias[3] == 1);
  182. }
  183. return share_in_channel;
  184. }
  185. SmallVector<size_t> matmul_get_workspace_bundle(
  186. const BiasForwardSizeArgs& args) {
  187. auto dtype = args.src_layout->dtype;
  188. auto&& fm = args.filter_meta;
  189. megdnn_assert(fm.group == 1);
  190. auto N = args.src_layout->shape[0];
  191. auto OC = fm.ocpg, IC = fm.icpg, FH = fm.spatial[0], FW = fm.spatial[1];
  192. auto OH = args.dst_layout->shape[2], OW = args.dst_layout->shape[3];
  193. SmallVector<size_t> sizes{dtype.size() * args.dst_layout->total_nr_elems(),
  194. dtype.size() * IC * FH * FW * OH * OW * N};
  195. if (args.filter_meta.should_flip) {
  196. sizes.push_back(dtype.size() * OC * IC * FH * FW);
  197. }
  198. return sizes;
  199. }
  200. void flip_filter(const BiasForwardSizeArgs& args, const Workspace& workspace,
  201. void*& raw_ptr) {
  202. auto&& fm = args.filter_meta;
  203. megdnn_assert(fm.group == 1 && fm.spatial_ndim == 2);
  204. auto OC = fm.ocpg, IC = fm.icpg, FH = fm.spatial[0], FW = fm.spatial[1];
  205. auto dtype = fm.dtype;
  206. megdnn_assert(workspace.size >= dtype.size() * OC * IC * FH * FW);
  207. TensorND src{raw_ptr, {{OC, IC, FH, FW}, dtype}},
  208. dst{workspace.raw_ptr + (FH * FW - 1) * dtype.size(), src.layout};
  209. dst.layout.stride[2] = -dst.layout.stride[2];
  210. dst.layout.stride[3] = -dst.layout.stride[3];
  211. args.handle->relayout_opr()->exec(src, dst);
  212. raw_ptr = workspace.raw_ptr;
  213. }
  214. } // conv_bias
  215. } // cuda
  216. } // megdnn
  217. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台