You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

algo.h 6.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. /**
  2. * \file dnn/src/rocm/convolution/backward_data/algo.h
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #pragma once
  12. #include "src/rocm/convolution/helper.h"
  13. #include "src/common/utils.h"
  14. #include "src/common/algo_base.h"
  15. #include "src/common/metahelper.h"
  16. #include <unordered_map>
  17. namespace megdnn {
  18. namespace rocm {
  19. /*!
  20. * \brief base class for convolution algos
  21. *
  22. */
  23. class ConvolutionBackwardDataImpl::AlgoBase : public Algorithm {
  24. protected:
  25. ~AlgoBase() = default;
  26. public:
  27. enum class AlgoType : uint32_t {
  28. ROCM_MIOPEN,
  29. ROCM_MATMUL,
  30. ROCM_CHANWISE
  31. };
  32. using Mapper = std::unordered_map<AlgorithmDesc, AlgoBase*>;
  33. AlgoBase() : Algorithm() { m_handle_type = Handle::HandleType::ROCM; }
  34. struct SizeArgs {
  35. HandleImpl* handle;
  36. CanonizedFilterMeta filter_meta;
  37. const TensorLayout *diff_layout, *grad_layout;
  38. ConvolutionBackwardDataImpl* opr;
  39. std::string to_string() const;
  40. convolution::MIOpenCacheKey to_miopen_algo_cache_key() const;
  41. void init_desc(convolution::MIOpenBwdDataDescs& desc) const {
  42. desc.set(filter_meta, *diff_layout, *grad_layout, opr->param());
  43. }
  44. SizeArgs(ConvolutionBackwardDataImpl* opr, const TensorLayout& filter,
  45. const TensorLayout& diff, const TensorLayout& grad);
  46. SizeArgs(ConvolutionBackwardDataImpl* opr,
  47. const CanonizedFilterMeta& filter, const TensorLayout& diff,
  48. const TensorLayout& grad);
  49. convolution::ForwardSizeArgs as_fwd_args() const {
  50. return {handle, grad_layout, filter_meta, diff_layout};
  51. }
  52. };
  53. struct ExecArgs : public SizeArgs {
  54. const TensorND *filter_tensor, *diff_tensor, *grad_tensor;
  55. Workspace workspace;
  56. ExecArgs(ConvolutionBackwardDataImpl* opr, _megdnn_tensor_in filter,
  57. _megdnn_tensor_in diff, _megdnn_tensor_out grad,
  58. _megdnn_workspace workspace);
  59. };
  60. virtual bool is_available(const SizeArgs& args) const = 0;
  61. virtual size_t get_workspace_in_bytes(const SizeArgs& args) const = 0;
  62. virtual void exec(const ExecArgs& args) const = 0;
  63. bool is_available_wk(const SizeArgs& args, size_t limit) {
  64. return is_available(args) && get_workspace_in_bytes(args) <= limit;
  65. }
  66. bool is_available_reproducible(
  67. const SizeArgs& args, bool reproducible = true,
  68. size_t limit = std::numeric_limits<size_t>::max()) {
  69. return (!reproducible ||
  70. contain_attribute(AlgoAttribute::REPRODUCIBLE)) &&
  71. is_available_wk(args, limit);
  72. }
  73. AlgoBase& check_workspace(const SizeArgs& args,
  74. const Workspace& workspace) {
  75. auto req = get_workspace_in_bytes(args);
  76. megdnn_assert(req <= workspace.size,
  77. "conv bwd data algo %s: "
  78. "required workspace %zu bytes, got %zu",
  79. name(), req, workspace.size);
  80. return *this;
  81. }
  82. virtual bool is_miopen() const { return false; }
  83. };
  84. class ConvolutionBackwardDataImpl::AlgoMIOpen final : public AlgoBase {
  85. bool m_is_reproducible;
  86. const char* m_name;
  87. miopenConvBwdDataAlgorithm_t find_best_algo(const ExecArgs& args);
  88. public:
  89. AlgoMIOpen() = delete;
  90. AlgoMIOpen(bool is_reproducible) : m_is_reproducible(is_reproducible) {}
  91. bool is_available(const SizeArgs& args) const override;
  92. size_t get_workspace_in_bytes(const SizeArgs& args) const override;
  93. void exec(const ExecArgs& args) const override;
  94. AlgoAttribute attribute() const override {
  95. auto ret = static_cast<AlgoAttribute>(0);
  96. if (m_is_reproducible) {
  97. ret |= AlgoAttribute::REPRODUCIBLE;
  98. }
  99. return ret;
  100. }
  101. const char* name() const override {
  102. return "MIOpenConvolutionBackwardData";
  103. }
  104. bool is_miopen() const override { return true; }
  105. MEGDNN_DECL_ALGO_TYPE(ROCM_MIOPEN)
  106. std::string param() const override {
  107. std::string ret;
  108. serialize_write_pod(m_is_reproducible, ret);
  109. return ret;
  110. }
  111. static convolution::MIOpenCache<SizeArgs, miopenConvBwdDataAlgorithm_t>
  112. sm_miopen_algo_cache;
  113. static convolution::MIOpenCache<SizeArgs, size_t> sm_miopen_ws_cache;
  114. };
  115. class ConvolutionBackwardDataImpl::AlgoMatmul final : public AlgoBase {
  116. template <typename T>
  117. static void exec_internal(const ExecArgs& args);
  118. public:
  119. bool is_available(const SizeArgs& args) const override;
  120. size_t get_workspace_in_bytes(const SizeArgs& args) const override;
  121. void exec(const ExecArgs& args) const override;
  122. const char* name() const override { return "MATMUL"; }
  123. MEGDNN_DECL_ALGO_TYPE(ROCM_MATMUL)
  124. AlgoAttribute attribute() const override {
  125. return AlgoAttribute::REPRODUCIBLE;
  126. }
  127. };
  128. class ConvolutionBackwardDataImpl::AlgoChanwise final : public AlgoBase {
  129. public:
  130. bool is_available(const SizeArgs& args) const override;
  131. size_t get_workspace_in_bytes(const SizeArgs& args) const override;
  132. void exec(const ExecArgs& args) const override;
  133. const char* name() const override { return "CHANNEL_WISE"; }
  134. MEGDNN_DECL_ALGO_TYPE(ROCM_CHANWISE)
  135. AlgoAttribute attribute() const override {
  136. return AlgoAttribute::REPRODUCIBLE;
  137. }
  138. };
  139. class ConvolutionBackwardDataImpl::AlgoPack : NonCopyableObj {
  140. // defined in miopen.cpp
  141. void fill_miopen_algos();
  142. AlgoBase::Mapper m_all_algos_map;
  143. public:
  144. AlgoPack();
  145. AlgoMIOpen miopen{true};
  146. AlgoMatmul matmul;
  147. AlgoChanwise chanwise;
  148. std::vector<AlgoBase*>
  149. //! all algorithms
  150. all_algos, miopen_algos, non_miopen_algos;
  151. const AlgoBase::Mapper& all_algos_map() const { return m_all_algos_map; }
  152. };
  153. } // namespace rocm
  154. } // namespace megdnn
  155. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台