You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

algo.h 7.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. /**
  2. * \file dnn/src/cuda/convolution3d/backward_data/algo.h
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #pragma once
  13. #include <unordered_map>
  14. #include "src/cuda/convolution3d/helper.h"
  15. #include "src/common/algo_base.h"
  16. #include "src/common/metahelper.h"
  17. namespace megdnn {
  18. namespace cuda {
  19. /*!
  20. * \brief base class for convolution3d algos
  21. *
  22. * All the algo impls should try to support non-contiguous batch dim, for group
  23. * conv execution.
  24. */
  25. class Convolution3DBackwardDataImpl::AlgoBase : public Algorithm {
  26. protected:
  27. ~AlgoBase() = default;
  28. public:
  29. enum class AlgoType : uint32_t {
  30. CUDA_GROUP_CONV_GENERAL,
  31. CUDA_CUDNN,
  32. CUDA_CHANWISE,
  33. };
  34. using Mapper = std::unordered_map<AlgorithmDesc, AlgoBase*>;
  35. AlgoBase() : Algorithm() { m_handle_type = Handle::HandleType::CUDA; }
  36. struct SizeArgs {
  37. HandleImpl* handle;
  38. CanonizedFilterMeta filter_meta;
  39. const TensorLayout *diff_layout, *grad_layout;
  40. Convolution3DBackwardDataImpl* opr;
  41. std::string to_string() const;
  42. void init_desc(convolution3d::CUDNNBwdDataDescs& desc) const {
  43. desc.set(filter_meta, *diff_layout, *grad_layout, opr->param());
  44. }
  45. SizeArgs(Convolution3DBackwardDataImpl* opr, const TensorLayout& filter,
  46. const TensorLayout& diff, const TensorLayout& grad);
  47. SizeArgs(Convolution3DBackwardDataImpl* opr,
  48. const CanonizedFilterMeta& filter, const TensorLayout& diff,
  49. const TensorLayout& grad);
  50. convolution3d::ForwardSizeArgs as_fwd_args() const {
  51. return {handle, grad_layout, filter_meta, diff_layout,
  52. opr->param().data_type};
  53. }
  54. };
  55. struct ExecArgs : public SizeArgs {
  56. const TensorND *filter_tensor, *diff_tensor, *grad_tensor;
  57. Workspace workspace;
  58. ExecArgs(Convolution3DBackwardDataImpl* opr, _megdnn_tensor_in filter,
  59. _megdnn_tensor_in diff, _megdnn_tensor_out grad,
  60. _megdnn_workspace workspace);
  61. };
  62. virtual bool is_available(const SizeArgs& args) const = 0;
  63. virtual size_t get_workspace_in_bytes(const SizeArgs& args) const = 0;
  64. virtual void exec(const ExecArgs& args) const = 0;
  65. bool is_available_wk(const SizeArgs& args, size_t limit) {
  66. return is_available(args) && get_workspace_in_bytes(args) <= limit;
  67. }
  68. bool is_available_attribute(
  69. const SizeArgs& args,
  70. const AlgoAttribute& attr = AlgoAttribute::REPRODUCIBLE,
  71. size_t limit = std::numeric_limits<size_t>::max()) {
  72. return contain_attribute(attr) && is_available_wk(args, limit);
  73. }
  74. AlgoBase& check_workspace(const SizeArgs& args,
  75. const Workspace& workspace) {
  76. auto req = get_workspace_in_bytes(args);
  77. megdnn_assert(req <= workspace.size,
  78. "conv bwd data algo %s: "
  79. "required workspace %zu bytes, got %zu",
  80. name(), req, workspace.size);
  81. return *this;
  82. }
  83. virtual bool is_cudnn() const { return false; }
  84. };
  85. class Convolution3DBackwardDataImpl::AlgoCUDNN final : public AlgoBase {
  86. cudnnConvolutionBwdDataAlgo_t m_cudnn_enum;
  87. CudnnAlgoPack::Attr m_attr;
  88. public:
  89. AlgoCUDNN(cudnnConvolutionBwdDataAlgo_t cudnn_enum)
  90. : m_cudnn_enum(cudnn_enum) {
  91. megdnn_assert(CudnnAlgoPack::conv3d_bwd_data_algos().find(cudnn_enum) !=
  92. CudnnAlgoPack::conv3d_bwd_data_algos().end());
  93. m_attr = CudnnAlgoPack::conv3d_bwd_data_algos().at(cudnn_enum);
  94. }
  95. bool is_available(const SizeArgs& args) const override;
  96. size_t get_workspace_in_bytes(const SizeArgs& args) const override;
  97. void exec(const ExecArgs& args) const override;
  98. const char* name() const override { return m_attr.name.c_str(); }
  99. AlgoAttribute attribute() const override {
  100. auto ret = static_cast<AlgoAttribute>(0);
  101. if (m_attr.is_reproducible) {
  102. ret |= AlgoAttribute::REPRODUCIBLE;
  103. }
  104. return ret;
  105. }
  106. cudnnConvolutionBwdDataAlgo_t cudnn_enum() const { return m_cudnn_enum; }
  107. bool is_cudnn() const override { return true; }
  108. MEGDNN_DECL_ALGO_TYPE(CUDA_CUDNN)
  109. std::string param() const override {
  110. std::string ret;
  111. serialize_write_pod(m_cudnn_enum, ret);
  112. return ret;
  113. }
  114. };
  115. class Convolution3DBackwardDataImpl::AlgoChanwise final : public AlgoBase {
  116. public:
  117. bool is_available(const SizeArgs& args) const override;
  118. size_t get_workspace_in_bytes(const SizeArgs& args) const override;
  119. void exec(const ExecArgs& args) const override;
  120. const char* name() const override { return "CHANNEL_WISE"; }
  121. MEGDNN_DECL_ALGO_TYPE(CUDA_CHANWISE)
  122. AlgoAttribute attribute() const override {
  123. return AlgoAttribute::REPRODUCIBLE;
  124. }
  125. };
  126. //! implement group conv by another algo
  127. class Convolution3DBackwardDataImpl::AlgoGroupConvGeneral final
  128. : public AlgoBase {
  129. AlgoBase* m_impl;
  130. std::string m_name;
  131. public:
  132. AlgoGroupConvGeneral(AlgoBase* impl);
  133. bool is_available(const SizeArgs& args) const override;
  134. size_t get_workspace_in_bytes(const SizeArgs& args) const override;
  135. void exec(const ExecArgs& args) const override;
  136. const char* name() const override { return m_name.c_str(); }
  137. static void modify_size_args(SizeArgs& args, TensorLayout& diff_pg,
  138. TensorLayout& grad_pg);
  139. AlgoAttribute attribute() const override {
  140. auto ret = static_cast<AlgoAttribute>(0);
  141. if (m_impl->contain_attribute(AlgoAttribute::REPRODUCIBLE)) {
  142. ret |= AlgoAttribute::REPRODUCIBLE;
  143. }
  144. return ret;
  145. }
  146. MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL)
  147. std::string param() const override {
  148. std::string ret;
  149. serialize_write_pod(m_impl->name(), ret);
  150. return ret;
  151. }
  152. };
  153. class Convolution3DBackwardDataImpl::AlgoPack : NonCopyableObj {
  154. // defined in cudnn.cpp
  155. void fill_cudnn_algos();
  156. AlgoBase::Mapper m_all_algos_map;
  157. public:
  158. AlgoPack();
  159. std::vector<AlgoCUDNN> cudnn;
  160. AlgoChanwise chanwise;
  161. std::vector<AlgoGroupConvGeneral> gconv;
  162. std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv;
  163. std::vector<AlgoBase*>
  164. //! all algorithms
  165. all_algos,
  166. //! non-cudnn algos, used for heuristic if cudnn is not supported
  167. non_cudnn_algos;
  168. AlgoCUDNN* cudnn_from_enum(cudnnConvolutionBwdDataAlgo_t algo);
  169. const AlgoBase::Mapper& all_algos_map() const { return m_all_algos_map; }
  170. };
  171. } // namespace cuda
  172. } // namespace megdnn
  173. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台