You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

group_conv.cpp 4.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. /**
  2. * \file dnn/src/cuda/convolution/backward_data/group_conv.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "./algo.h"
  13. using namespace megdnn;
  14. using namespace cuda;
  15. using namespace convolution;
  16. namespace {
  17. std::pair<TensorLayoutArray, Convolution::Param> sub_opr_config(
  18. const ConvolutionBackwardDataImpl::AlgoBase::SizeArgs& args) {
  19. TensorLayout filter_pg = *args.filter_layout;
  20. TensorLayout diff_pg = *args.diff_layout;
  21. TensorLayout grad_pg = *args.grad_layout;
  22. filter_pg.remove_axis_inplace(0);
  23. auto nr_grp = args.filter_meta.group;
  24. size_t c_pos = 1;
  25. diff_pg.shape[c_pos] /= nr_grp;
  26. grad_pg.shape[c_pos] /= nr_grp;
  27. megdnn::param::Convolution param = args.opr->param();
  28. param.sparse = megdnn::param::ConvBias::Sparse::DENSE;
  29. std::pair<TensorLayoutArray, ConvolutionBackwardDataImpl::Param> ret;
  30. ret.first = {filter_pg, diff_pg, grad_pg};
  31. ret.second = param;
  32. return ret;
  33. }
  34. std::pair<TensorLayoutArray, std::unique_ptr<ConvolutionBackwardData>>
  35. prepare_sub_opr(const ConvolutionBackwardDataImpl::AlgoBase::SizeArgs& args) {
  36. auto conv_bwd_data_opr =
  37. args.handle->create_operator<ConvolutionBackwardData>();
  38. set_execution_policy<ConvolutionBackwardData, ConvolutionBackwardData*>(
  39. args.opr, conv_bwd_data_opr.get());
  40. auto&& config = sub_opr_config(args);
  41. conv_bwd_data_opr->param() = config.second;
  42. return {config.first, std::move(conv_bwd_data_opr)};
  43. }
  44. } // namespace
  45. std::vector<Algorithm::SearchItem>
  46. ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::get_subopr_list(
  47. const TensorLayoutArray& layouts, const OperatorBase* opr) const {
  48. AlgoBase::SizeArgs args{
  49. static_cast<const ConvolutionBackwardDataImpl*>(opr), layouts[0],
  50. layouts[1], layouts[2]};
  51. auto&& config = sub_opr_config(args);
  52. std::string param_str;
  53. Algorithm::serialize_write_pod(config.second, param_str);
  54. return {{Algorithm::OprType::CONVOLUTION_BACKWARD_DATA, param_str,
  55. config.first}};
  56. }
  57. bool ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::is_available(
  58. const SizeArgs& args) const {
  59. if ((args.diff_layout->dtype == args.filter_layout->dtype &&
  60. args.diff_layout->dtype == dtype::BFloat16()) ||
  61. (args.diff_layout->dtype == args.filter_layout->dtype &&
  62. args.diff_layout->dtype == dtype::QuantizedS8())) {
  63. return false;
  64. }
  65. if (args.filter_meta.group <= 1)
  66. return false;
  67. if (args.filter_meta.format !=
  68. megdnn::param::Convolution::Format::NCHW) {
  69. return false;
  70. }
  71. auto config = prepare_sub_opr(args);
  72. return has_available_algo<ConvolutionBackwardDataImpl>(
  73. static_cast<ConvolutionBackwardDataImpl*>(config.second.get()),
  74. config.first[0], config.first[1], config.first[2]);
  75. }
  76. WorkspaceBundle
  77. ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::get_workspace_bundle(
  78. void* ptr, const SizeArgs& args) const {
  79. auto config = prepare_sub_opr(args);
  80. size_t sizes = config.second->get_workspace_in_bytes(
  81. config.first[0], config.first[1], config.first[2]);
  82. return {ptr, {sizes}};
  83. }
  84. size_t
  85. ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::get_workspace_in_bytes(
  86. const SizeArgs& args) const {
  87. return get_workspace_bundle(nullptr, args).total_size_in_bytes();
  88. }
  89. void ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::exec(
  90. const ExecArgs& args) const {
  91. auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args);
  92. {
  93. auto config = prepare_sub_opr(args);
  94. TensorND tfilter{args.filter_tensor->raw_ptr, config.first[0]};
  95. TensorND tdiff{args.diff_tensor->raw_ptr, config.first[1]};
  96. TensorND tgrad{args.grad_tensor->raw_ptr, config.first[2]};
  97. size_t c_pos = 1;
  98. auto&& fm = args.filter_meta;
  99. auto strd_flt = fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] *
  100. tfilter.layout.dtype.size(),
  101. strd_diff = tdiff.layout.stride[c_pos] * fm.ocpg *
  102. tdiff.layout.dtype.size(),
  103. strd_grad = (tgrad.layout.stride[c_pos] * fm.icpg *
  104. tgrad.layout.dtype.size());
  105. auto grp = args.filter_meta.group;
  106. for (uint32_t g = 0; g < grp; ++g) {
  107. config.second->exec(tfilter, tdiff, tgrad, bundle.get_workspace(0));
  108. incr_voidp(tfilter.raw_ptr, strd_flt);
  109. incr_voidp(tdiff.raw_ptr, strd_diff);
  110. incr_voidp(tgrad.raw_ptr, strd_grad);
  111. }
  112. }
  113. }
  114. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台