You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cudnn_conv.cpp 3.4 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. #include "src/common/conv_bias.h"
  2. #include "src/cuda/conv_bias/algo.h"
  3. #include "src/cuda/cudnn_wrapper.h"
  4. #include "src/cuda/utils.h"
  5. using namespace megdnn;
  6. using namespace cuda;
  7. using namespace conv_bias;
  8. bool ConvBiasForwardImpl::AlgoCUDNNConv::is_available(const SizeArgs& args) const {
  9. if (args.filter_meta.format != Param::Format::NCHW &&
  10. args.filter_meta.format != Param::Format::NHWC) {
  11. if (!args.src_layout->is_contiguous() || !args.dst_layout->is_contiguous()) {
  12. return false;
  13. }
  14. }
  15. if (args.dst_layout->dtype.enumv() == DTypeEnum::QuantizedS4 ||
  16. args.dst_layout->dtype.enumv() == DTypeEnum::Quantized4Asymm) {
  17. return false;
  18. }
  19. // FIXME: cudnn cannot handle the case when the initial value of dst tensor
  20. // contains nan and beta is zero, because the result of 0.f * nan is still
  21. // nan
  22. if (args.src_layout->dtype.enumv() == DTypeEnum::QuantizedS8 &&
  23. args.dst_layout->dtype.enumv() == DTypeEnum::Float32 &&
  24. args.opr->param().format == param::ConvBias::Format::NCHW) {
  25. return false;
  26. }
  27. // In conv_args.init_conv_desc will call cudnnSetTensor4dDescriptorEx(),which can't
  28. // been supported when total_nr_elems() > 2 ^ 31
  29. if (args.src_layout->total_nr_elems() > INT_MAX ||
  30. args.dst_layout->total_nr_elems() > INT_MAX) {
  31. return false;
  32. }
  33. auto dst_layout = *args.dst_layout;
  34. if (dst_layout.dtype.enumv() != args.bias_layout->dtype.enumv()) {
  35. dst_layout.dtype = DType();
  36. args.opr->check_or_deduce_dtype_fwd(
  37. args.src_layout->dtype, args.filter_layout->dtype, dst_layout.dtype);
  38. }
  39. SizeArgs conv_args = args;
  40. conv_args.dst_layout = &dst_layout;
  41. if (!is_cudnn_supported(conv_args))
  42. return false;
  43. CUDNNForwardDescs D;
  44. conv_args.init_conv_desc(D);
  45. size_t workspace_size;
  46. auto status = cudnnGetConvolutionForwardWorkspaceSize(
  47. conv_args.handle->cudnn_handle(), D.src_desc.desc, D.filter_desc.desc,
  48. D.conv_desc.conv_desc, D.dst_desc.desc, m_cudnn_enum, &workspace_size);
  49. return status == CUDNN_STATUS_SUCCESS;
  50. }
  51. size_t ConvBiasForwardImpl::AlgoCUDNNConv::cudnn_get_workspace_in_bytes(
  52. const SizeArgs& args) const {
  53. CUDNNForwardDescs D;
  54. args.init_conv_desc(D);
  55. size_t conv_workspace_size;
  56. cudnn_check(cudnnGetConvolutionForwardWorkspaceSize(
  57. args.handle->cudnn_handle(), D.src_desc.desc, D.filter_desc.desc,
  58. D.conv_desc.conv_desc, D.dst_desc.desc, m_cudnn_enum,
  59. &conv_workspace_size));
  60. return conv_workspace_size;
  61. }
  62. void ConvBiasForwardImpl::AlgoCUDNNConv::cudnn_execute(
  63. const ExecArgs& args, const Workspace& workspace) const {
  64. CUDNNForwardDescs D;
  65. args.init_conv_desc(D);
  66. float alpha = 1.0f, beta = 0.0f;
  67. auto status = cudnnConvolutionForward(
  68. args.handle->cudnn_handle(), &alpha, D.src_desc.desc,
  69. args.src_tensor->raw_ptr(), D.filter_desc.desc,
  70. args.filter_tensor->raw_ptr(), D.conv_desc.conv_desc, m_cudnn_enum,
  71. workspace.raw_ptr, workspace.size, &beta, D.dst_desc.desc,
  72. args.dst_tensor->raw_ptr());
  73. megdnn_assert(
  74. status == CUDNN_STATUS_SUCCESS, "conv fwd failed: %s; info: %s",
  75. cudnnGetErrorString(status), args.to_string().c_str());
  76. }
  77. // vim: syntax=cpp.doxygen