You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

group_conv.cpp 4.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. /**
  2. * \file dnn/src/cuda/convolution3d/backward_data/group_conv.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./algo.h"
  12. using namespace megdnn;
  13. using namespace cuda;
  14. using namespace convolution3d;
  15. namespace {
  16. std::pair<TensorLayoutArray, Convolution3DBackwardDataImpl::Param>
  17. sub_opr_config(const Convolution3DBackwardDataImpl::AlgoBase::SizeArgs& args) {
  18. TensorLayout filter_pg = *args.filter_layout;
  19. TensorLayout diff_pg = *args.diff_layout;
  20. TensorLayout grad_pg = *args.grad_layout;
  21. filter_pg.remove_axis_inplace(0);
  22. auto nr_grp = args.filter_meta.group;
  23. size_t c_pos = 1;
  24. diff_pg.shape[c_pos] /= nr_grp;
  25. grad_pg.shape[c_pos] /= nr_grp;
  26. megdnn::param::Convolution3D param = args.opr->param();
  27. param.sparse = megdnn::param::Convolution3D::Sparse::DENSE;
  28. std::pair<TensorLayoutArray, Convolution3DBackwardDataImpl::Param> ret;
  29. ret.first = {filter_pg, diff_pg, grad_pg};
  30. ret.second = param;
  31. return ret;
  32. }
  33. std::pair<TensorLayoutArray, std::unique_ptr<Convolution3DBackwardData>>
  34. prepare_sub_opr(const Convolution3DBackwardDataImpl::AlgoBase::SizeArgs& args) {
  35. auto conv3d_backdata_opr =
  36. args.handle->create_operator<Convolution3DBackwardData>();
  37. set_execution_policy<Convolution3DBackwardData, Convolution3DBackwardData*>(
  38. args.opr, conv3d_backdata_opr.get());
  39. auto&& config = sub_opr_config(args);
  40. conv3d_backdata_opr->param() = config.second;
  41. return {config.first, std::move(conv3d_backdata_opr)};
  42. }
  43. } // namespace
  44. std::vector<Algorithm::SearchItem>
  45. Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::get_subopr_list(
  46. const TensorLayoutArray& layouts, const OperatorBase* opr) const {
  47. AlgoBase::SizeArgs args{
  48. static_cast<const Convolution3DBackwardDataImpl*>(opr), layouts[0],
  49. layouts[1], layouts[2]};
  50. auto&& config = sub_opr_config(args);
  51. std::string param_str;
  52. Algorithm::serialize_write_pod(config.second, param_str);
  53. return {{Algorithm::OprType::CONVOLUTION3D_BACKWARD_DATA, param_str,
  54. config.first}};
  55. }
  56. bool Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::is_available(
  57. const SizeArgs &args) const {
  58. if (args.filter_meta.group <= 1)
  59. return false;
  60. if (args.filter_meta.format != Param::Format::NCDHW) {
  61. return false;
  62. }
  63. auto config = prepare_sub_opr(args);
  64. return has_available_algo<Convolution3DBackwardDataImpl>(
  65. static_cast<Convolution3DBackwardDataImpl*>(config.second.get()),
  66. config.first[0], config.first[1], config.first[2]);
  67. }
  68. WorkspaceBundle
  69. Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::get_workspace_bundle(
  70. void* ptr, const SizeArgs& args) const {
  71. auto config = prepare_sub_opr(args);
  72. size_t sizes = config.second->get_workspace_in_bytes(
  73. config.first[0], config.first[1], config.first[2]);
  74. return {ptr, {sizes}};
  75. }
  76. size_t
  77. Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::get_workspace_in_bytes(
  78. const SizeArgs& args) const {
  79. return get_workspace_bundle(nullptr, args).total_size_in_bytes();
  80. }
  81. void Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::exec(
  82. const ExecArgs& args) const {
  83. auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args);
  84. {
  85. auto config = prepare_sub_opr(args);
  86. TensorND tfilter{args.filter_tensor->raw_ptr, config.first[0]};
  87. TensorND tdiff{args.diff_tensor->raw_ptr, config.first[1]};
  88. TensorND tgrad{args.grad_tensor->raw_ptr, config.first[2]};
  89. size_t c_pos = 1;
  90. auto grp = args.filter_meta.group;
  91. auto&& fm = args.filter_meta;
  92. auto strd_flt = (fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] *
  93. fm.spatial[2] * tfilter.layout.dtype.size()),
  94. strd_diff = (tdiff.layout.stride[c_pos] * fm.ocpg *
  95. tdiff.layout.dtype.size()),
  96. strd_grad = (tgrad.layout.stride[c_pos] * fm.icpg *
  97. tgrad.layout.dtype.size());
  98. for (uint32_t g = 0; g < grp; ++g) {
  99. config.second->exec(tfilter, tdiff, tgrad, bundle.get_workspace(0));
  100. incr_voidp(tfilter.raw_ptr, strd_flt);
  101. incr_voidp(tdiff.raw_ptr, strd_diff);
  102. incr_voidp(tgrad.raw_ptr, strd_grad);
  103. }
  104. }
  105. }
  106. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台