You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

group_conv.cpp 8.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. /**
  2. * \file dnn/src/cuda/conv_bias/group_conv.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include <utility>
  12. #include "src/common/conv_bias.h"
  13. #include "src/cuda/conv_bias/algo.h"
  14. using namespace megdnn;
  15. using namespace cuda;
  16. using namespace conv_bias;
  17. namespace {
  18. std::pair<TensorLayoutArray, ConvBiasForwardImpl::Param> sub_opr_config(
  19. const ConvBiasForwardImpl::AlgoBase::SizeArgs& args) {
  20. TensorLayout src_pg = *args.src_layout;
  21. TensorLayout filter_pg = *args.filter_layout;
  22. TensorLayout bias_pg = *args.bias_layout;
  23. TensorLayout z_pg = *args.z_layout;
  24. TensorLayout dst_pg = *args.dst_layout;
  25. auto nr_grp = args.filter_meta.group;
  26. size_t c_pos;
  27. if (args.filter_meta.format == megdnn::param::ConvBias::Format::NCHW ||
  28. args.filter_meta.format == megdnn::param::ConvBias::Format::NCHW4) {
  29. c_pos = 1;
  30. } else {
  31. megdnn_assert(args.filter_meta.format ==
  32. megdnn::param::ConvBias::Format::NHWC,
  33. "invalid conv format");
  34. c_pos = 3;
  35. }
  36. filter_pg.remove_axis_inplace(0);
  37. src_pg.shape[c_pos] /= nr_grp;
  38. bias_pg.ndim = 0;
  39. dst_pg.shape[c_pos] /= nr_grp;
  40. megdnn::param::ConvBias param = args.opr->param();
  41. param.sparse = megdnn::param::ConvBias::Sparse::DENSE;
  42. param.nonlineMode =
  43. megdnn::param::ConvBias::NonlineMode::IDENTITY;
  44. std::pair<TensorLayoutArray, ConvBiasForwardImpl::Param> ret;
  45. ret.first = {src_pg, filter_pg, bias_pg, z_pg, dst_pg};
  46. ret.second = param;
  47. return ret;
  48. }
  49. std::pair<TensorLayoutArray, std::unique_ptr<ConvBiasForward>> prepare_sub_opr(
  50. const ConvBiasForwardImpl::AlgoBase::SizeArgs& args) {
  51. auto convbias_opr = args.handle->create_operator<ConvBias>();
  52. set_execution_policy<ConvBiasForward, ConvBiasForward*>(
  53. args.opr, convbias_opr.get());
  54. auto&& config = sub_opr_config(args);
  55. convbias_opr->param() = config.second;
  56. return {config.first, std::move(convbias_opr)};
  57. }
  58. } // namespace
  59. std::vector<Algorithm::SearchItem>
  60. ConvBiasForwardImpl::AlgoGroupConvGeneral::get_subopr_list(
  61. const TensorLayoutArray& layouts, const OperatorBase* opr) const {
  62. AlgoBase::SizeArgs args{static_cast<const ConvBiasForwardImpl*>(opr),
  63. layouts[0],
  64. layouts[1],
  65. layouts[2],
  66. layouts[3],
  67. layouts[4]};
  68. auto&& config = sub_opr_config(args);
  69. std::string param_str;
  70. Algorithm::serialize_write_pod(config.second, param_str);
  71. return {{Algorithm::OprType::CONVBIAS_FORWARD, param_str, config.first}};
  72. }
  73. bool ConvBiasForwardImpl::AlgoGroupConvGeneral::is_available(
  74. const SizeArgs& args) const {
  75. if (args.src_layout->dtype == args.filter_layout->dtype &&
  76. args.src_layout->dtype == dtype::BFloat16()) {
  77. return false;
  78. }
  79. if (args.z_layout->ndim > 0 || args.filter_meta.group <= 1)
  80. return false;
  81. auto&& param = args.opr->param();
  82. if (param.format == param::ConvBias::Format::NCHW8 ||
  83. param.format == param::ConvBias::Format::CHWN4 ||
  84. param.format == param::ConvBias::Format::NCHW32)
  85. return false;
  86. auto dst_layout = *args.dst_layout;
  87. if (dst_layout.dtype.enumv() != args.bias_layout->dtype.enumv()) {
  88. dst_layout.dtype = DType();
  89. args.opr->check_or_deduce_dtype_fwd(args.src_layout->dtype,
  90. args.filter_layout->dtype,
  91. dst_layout.dtype);
  92. }
  93. auto conv_args = args;
  94. conv_args.dst_layout = &dst_layout;
  95. auto config = prepare_sub_opr(conv_args);
  96. AlgoBase::SizeArgs sub_args{
  97. static_cast<ConvBiasForwardImpl*>(config.second.get()),
  98. config.first[0],
  99. config.first[1],
  100. config.first[2],
  101. config.first[3],
  102. config.first[4]};
  103. bool ret = has_available_algo<ConvBiasForwardImpl>(sub_args);
  104. return ret;
  105. }
  106. WorkspaceBundle ConvBiasForwardImpl::AlgoGroupConvGeneral::get_workspace_bundle(
  107. void* ptr, const SizeArgs& args) const {
  108. auto dst_layout = *args.dst_layout;
  109. SmallVector<size_t> sizes;
  110. if (dst_layout.dtype.enumv() != args.bias_layout->dtype.enumv()) {
  111. dst_layout.dtype = DType();
  112. args.opr->check_or_deduce_dtype_fwd(args.src_layout->dtype,
  113. args.filter_layout->dtype,
  114. dst_layout.dtype);
  115. sizes.push_back(dst_layout.span().dist_byte());
  116. }
  117. auto conv_args = args;
  118. conv_args.dst_layout = &dst_layout;
  119. auto config = prepare_sub_opr(conv_args);
  120. size_t mm_ws = config.second->get_workspace_in_bytes(
  121. config.first[0], config.first[1], config.first[2],
  122. config.first[3], config.first[4], nullptr);
  123. sizes.insert(sizes.begin(), mm_ws);
  124. return {ptr, std::move(sizes)};
  125. }
  126. size_t ConvBiasForwardImpl::AlgoGroupConvGeneral::get_workspace_in_bytes(
  127. const SizeArgs& args) const {
  128. return get_workspace_bundle(nullptr, args).total_size_in_bytes();
  129. }
  130. void ConvBiasForwardImpl::AlgoGroupConvGeneral::exec(
  131. const ExecArgs& args) const {
  132. auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args);
  133. auto conv_dst_tensor = *args.dst_tensor;
  134. if (args.dst_layout->dtype.enumv() != args.bias_layout->dtype.enumv()) {
  135. conv_dst_tensor.raw_ptr = bundle.get(bundle.nr_workspace() - 1);
  136. conv_dst_tensor.layout.dtype = DType();
  137. args.opr->check_or_deduce_dtype_fwd(args.src_layout->dtype,
  138. args.filter_layout->dtype,
  139. conv_dst_tensor.layout.dtype);
  140. }
  141. {
  142. auto sub_args = args;
  143. sub_args.dst_tensor = &conv_dst_tensor;
  144. sub_args.dst_layout = &conv_dst_tensor.layout;
  145. auto config = prepare_sub_opr(sub_args);
  146. TensorND tsrc{args.src_tensor->raw_ptr, config.first[0]};
  147. TensorND tfilter{args.filter_tensor->raw_ptr, config.first[1]};
  148. TensorND tbias{args.bias_tensor->raw_ptr, config.first[2]};
  149. TensorND tz{args.z_tensor->raw_ptr, config.first[3]};
  150. TensorND tdst{conv_dst_tensor.raw_ptr, config.first[4]};
  151. size_t c_pos;
  152. if (args.filter_meta.format == Param::Format::NCHW ||
  153. args.filter_meta.format == Param::Format::NCHW4) {
  154. c_pos = 1;
  155. } else {
  156. megdnn_assert(args.filter_meta.format == Param::Format::NHWC,
  157. "invalid conv format");
  158. c_pos = 3;
  159. }
  160. auto grp = args.filter_meta.group;
  161. auto&& fm = args.filter_meta;
  162. auto strd_src = tsrc.layout.stride[c_pos] * fm.icpg *
  163. tsrc.layout.dtype.size(),
  164. strd_dst = tdst.layout.stride[c_pos] * fm.ocpg *
  165. tdst.layout.dtype.size(),
  166. strd_flt = fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] *
  167. tfilter.layout.dtype.size();
  168. if (args.filter_meta.format == Param::Format::NCHW4) {
  169. strd_src >>= 2;
  170. strd_dst >>= 2;
  171. }
  172. for (uint32_t g = 0; g < grp; ++g) {
  173. config.second->exec(tsrc, tfilter, tbias,
  174. tz, tdst, nullptr, bundle.get_workspace(0));
  175. incr_voidp(tsrc.raw_ptr, strd_src);
  176. incr_voidp(tdst.raw_ptr, strd_dst);
  177. incr_voidp(tfilter.raw_ptr, strd_flt);
  178. }
  179. }
  180. handle_bias_and_nonlinear(args.handle, args.nonlinear_mode,
  181. &conv_dst_tensor, args.dst_tensor,
  182. args.bias_tensor);
  183. }
  184. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台