You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

opr_impl.h 2.7 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. /**
  2. * \file dnn/src/arm_common/conv_bias/opr_impl.h
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #pragma once
  13. #include "src/common/utils.h"
  14. #include "src/fallback/conv_bias/opr_impl.h"
  15. namespace megdnn {
  16. namespace arm_common {
  17. class ConvBiasImpl : public fallback::ConvBiasImpl {
  18. public:
  19. using fallback::ConvBiasImpl::ConvBiasImpl;
  20. using FallbackConvBiasImpl = fallback::ConvBiasImpl;
  21. using NCBKernIndex = fallback::ConvBiasImpl::NCBKernIndex;
  22. bool is_thread_safe() const override { return true; }
  23. SmallVector<AlgoBase*> algo_pack() override;
  24. bool is_matmul_quantized_prefer(
  25. const ConvBiasImpl::NCBKernSizeParam& ncb_param) const override;
  26. class AlgoPack;
  27. protected:
  28. static void* const sm_arm_common_algo_type;
  29. const char* get_algorithm_set_name() const override;
  30. private:
  31. class AlgoS8DirectStride1;
  32. class AlgoS8DirectStride2;
  33. class AlgoS8DirectNCHW44;
  34. class AlgoS8DirectNCHWNCHW44;
  35. class AlgoQU8DirectStride1;
  36. class AlgoQU8DirectStride2;
  37. class AlgoFP32WinogradF23_4x4;
  38. class AlgoFP32WinogradF63;
  39. class AlgoFP32WinogradF63_4x4;
  40. class AlgoFP32WinogradF54;
  41. class AlgoFP32WinogradF45;
  42. class AlgoFP32WinogradF23_4x4_NCHW44;
  43. class AlgoFP32WinogradF63_4x4_NCHW44;
  44. class AlgoS8ChanWiseStride1NCHW44;
  45. class AlgoS8ChanWiseStride2NCHW44;
  46. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  47. class AlgoFP16WinogradF23;
  48. class AlgoFP16WinogradF45;
  49. class AlgoFP16WinogradF63;
  50. class AlgoFP16WinogradF23_8x8;
  51. #endif
  52. #if __ARM_FEATURE_DOTPROD
  53. class AlgoDotS8DirectNCHWNCHW44;
  54. class AlgoDotS8DirectStride1;
  55. class AlgoDotS8DirectStride2;
  56. class AlgoDotU8DirectStride1;
  57. class AlgoDotU8DirectStride2;
  58. class AlgoDotS8Direct_NCHW44;
  59. #endif
  60. class AlgoF32Direct;
  61. class AlgoF32DirectStride1;
  62. class AlgoF32DirectStride2;
  63. class AlgoF32DirectNCHWNCHW44;
  64. class AlgoF32ChannelWiseNCHW44;
  65. class AlgoF32DirectNCHW44;
  66. class AlgoI8x8x16Direct;
  67. class AlgoI8x8x16Stride2;
  68. class AlgoI8x8x16Stride2Filter2;
  69. class AlgoS8WinogradF23_8x8;
  70. class AlgoS8CF32WinogradF23_4x4_NCHW44;
  71. class AlgoS8WinogradF23_8x8_NCHW44;
  72. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  73. class AlgoF16Direct;
  74. class AlgoF16DirectStride1;
  75. #endif
  76. };
  77. } // namespace arm_common
  78. } // namespace megdnn
  79. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台