You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

opr_impl.h 4.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. /**
  2. * \file dnn/src/cuda/local_share/opr_impl.h
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #pragma once
  12. #include "megdnn/oprs.h"
  13. #include "src/cuda/utils.h"
  14. namespace megdnn {
  15. namespace cuda {
  16. class LocalShareForwardImpl : public LocalShareForward {
  17. public:
  18. using LocalShareForward::LocalShareForward;
  19. void exec(_megdnn_tensor_in src, _megdnn_tensor_in filter,
  20. _megdnn_tensor_out dst, _megdnn_workspace workspace) override;
  21. size_t get_workspace_in_bytes(const TensorLayout& src,
  22. const TensorLayout& filter,
  23. const TensorLayout& dst) override;
  24. const char* get_algorithm_set_name() const override;
  25. class AlgoBase;
  26. class AlgoCHWNBatchSizeAware;
  27. class AlgoCHWNBatchSizeAwareSmallImage;
  28. class AlgoBatchedMatMul;
  29. class AlgoPack;
  30. static const AlgoPack& algo_pack() { return sm_algo_pack; }
  31. Algorithm* get_algorithm_from_desc(const AlgorithmDesc& desc) override;
  32. protected:
  33. std::vector<Algorithm*> get_all_algorithms(
  34. const TensorLayout& filter, const TensorLayout& diff,
  35. const TensorLayout& grad) override;
  36. std::vector<Algorithm*> get_all_algorithms_safe(
  37. const TensorLayout& src, const TensorLayout& filter,
  38. const TensorLayout& dst) override;
  39. Algorithm* get_algorithm_heuristic(
  40. const TensorLayout& src, const TensorLayout& filter,
  41. const TensorLayout& dst, size_t workspace_limit_in_bytes,
  42. const AlgoAttribute& positive_attr,
  43. const AlgoAttribute& negative_attr) override;
  44. private:
  45. static AlgoPack sm_algo_pack;
  46. };
  47. class LocalShareBackwardDataImpl : public LocalShareBackwardData {
  48. public:
  49. using LocalShareBackwardData::LocalShareBackwardData;
  50. void exec(_megdnn_tensor_in filter, _megdnn_tensor_in diff,
  51. _megdnn_tensor_out grad, _megdnn_workspace workspace) override;
  52. size_t get_workspace_in_bytes(const TensorLayout& filter,
  53. const TensorLayout& diff,
  54. const TensorLayout& grad) override;
  55. const char* get_algorithm_set_name() const override;
  56. class AlgoBase;
  57. class AlgoImplicitGemm;
  58. class AlgoBatchedMatMul;
  59. class AlgoPack;
  60. static const AlgoPack& algo_pack() { return sm_algo_pack; }
  61. Algorithm* get_algorithm_from_desc(const AlgorithmDesc& desc) override;
  62. protected:
  63. std::vector<Algorithm*> get_all_algorithms(
  64. const TensorLayout& filter, const TensorLayout& diff,
  65. const TensorLayout& grad) override;
  66. std::vector<Algorithm*> get_all_algorithms_safe(
  67. const TensorLayout& filter, const TensorLayout& diff,
  68. const TensorLayout& grad) override;
  69. Algorithm* get_algorithm_heuristic(
  70. const TensorLayout& filter, const TensorLayout& diff,
  71. const TensorLayout& grad, size_t workspace_limit_in_bytes,
  72. const AlgoAttribute& positive_attr,
  73. const AlgoAttribute& negative_attr) override;
  74. private:
  75. static AlgoPack sm_algo_pack;
  76. };
  77. class LocalShareBackwardFilterImpl : public LocalShareBackwardFilter {
  78. public:
  79. using LocalShareBackwardFilter::LocalShareBackwardFilter;
  80. void exec(_megdnn_tensor_in src, _megdnn_tensor_in diff,
  81. _megdnn_tensor_out grad, _megdnn_workspace workspace) override;
  82. size_t get_workspace_in_bytes(const TensorLayout& src,
  83. const TensorLayout& diff,
  84. const TensorLayout& grad) override;
  85. const char* get_algorithm_set_name() const override;
  86. class AlgoBase;
  87. class AlgoImplicitGemm;
  88. class AlgoBatchedMatMul;
  89. class AlgoPack;
  90. static const AlgoPack& algo_pack() { return sm_algo_pack; }
  91. Algorithm* get_algorithm_from_desc(const AlgorithmDesc& desc) override;
  92. protected:
  93. std::vector<Algorithm*> get_all_algorithms(
  94. const TensorLayout& src, const TensorLayout& diff,
  95. const TensorLayout& grad) override;
  96. std::vector<Algorithm*> get_all_algorithms_safe(
  97. const TensorLayout& src, const TensorLayout& diff,
  98. const TensorLayout& grad) override;
  99. Algorithm* get_algorithm_heuristic(
  100. const TensorLayout& src, const TensorLayout& diff,
  101. const TensorLayout& grad, size_t workspace_limit_in_bytes,
  102. const AlgoAttribute& positive_attr,
  103. const AlgoAttribute& negative_attr) override;
  104. private:
  105. static AlgoPack sm_algo_pack;
  106. };
  107. } // namespace cuda
  108. } // namespace megdnn
  109. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台