You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

opr_impl.h 3.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. /**
  2. * \file dnn/src/arm_common/pooling/opr_impl.h
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #pragma once
  13. #include "megdnn/oprs/base.h"
  14. #include "src/fallback/pooling/opr_impl.h"
  15. namespace megdnn {
  16. namespace arm_common {
  17. class PoolingImpl final : public fallback::PoolingImpl {
  18. public:
  19. using fallback::PoolingImpl::PoolingImpl;
  20. void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst,
  21. _megdnn_workspace workspace) override;
  22. size_t get_workspace_in_bytes(const TensorLayout&,
  23. const TensorLayout&) override;
  24. static size_t constexpr MAX_SPATIAL_DIM = 2;
  25. struct PoolingKernSizeParam {
  26. uint32_t n, ic;
  27. std::array<uint32_t, MAX_SPATIAL_DIM> isz, osz;
  28. std::array<uint32_t, MAX_SPATIAL_DIM> padding, filter, stride;
  29. DType src_type, dst_type;
  30. Handle* handle;
  31. Param::Format format;
  32. Mode mode;
  33. };
  34. struct PoolingKernParam : public PoolingKernSizeParam {
  35. void* src_ptr;
  36. void* dst_ptr;
  37. void* workspace_ptr;
  38. size_t workspace_size;
  39. template <typename T>
  40. const T* src() const {
  41. src_type.assert_is_compatible_ctype<T>();
  42. return static_cast<const T*>(src_ptr);
  43. }
  44. template <typename T>
  45. T* dst() const {
  46. dst_type.assert_is_compatible_ctype<T>();
  47. return static_cast<T*>(dst_ptr);
  48. }
  49. template <typename T>
  50. T* workspace() const {
  51. return static_cast<T*>(workspace_ptr);
  52. }
  53. };
  54. PoolingKernSizeParam make_pooling_kern_szie_param(
  55. fallback::PoolingImpl* opr, const TensorLayout& src,
  56. const TensorLayout& dst);
  57. PoolingKernParam make_pooling_kern_param(fallback::PoolingImpl* opr,
  58. _megdnn_tensor_in src,
  59. _megdnn_tensor_out dst,
  60. _megdnn_workspace workspace);
  61. class AlgoBase : public detail::Algorithm {
  62. public:
  63. virtual ~AlgoBase() = default;
  64. virtual bool usable(const PoolingKernSizeParam& param) const = 0;
  65. virtual void exec(const PoolingKernParam& param) const = 0;
  66. uint32_t type() const override { return INVALID_ALGO_TYPE; };
  67. };
  68. private:
  69. class AlgoFilterxModexStride1;
  70. class AlgoFilter2ModexStride2;
  71. class AlgoFilter3MaxStride2;
  72. class AlgoFilter3AverageStride2;
  73. class AlgoFilter4MaxStride2;
  74. class AlgoFilter5MaxStride2;
  75. class AlgoInt8Filter2MaxStride2;
  76. class AlgoInt8Filter3MaxStride2;
  77. class AlgoFilter2ModexStridexNCHW44;
  78. class AlgoFilter3ModexStridexNCHW44;
  79. class AlgoFilter4ModexStridexNCHW44;
  80. class AlgoFilter5ModexStridexNCHW44;
  81. class AlgoFp32ModexStridexNCHW44;
  82. class AlgoPack;
  83. };
  84. } // namespace arm_common
  85. } // namespace megdnn
  86. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台