You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

opr_impl.h 2.7 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. /**
  2. * \file dnn/src/arm_common/conv_bias/opr_impl.h
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #pragma once
  13. #include "src/common/utils.h"
  14. #include "src/fallback/conv_bias/opr_impl.h"
  15. namespace megdnn {
  16. namespace arm_common {
  17. class ConvBiasImpl : public fallback::ConvBiasImpl {
  18. public:
  19. using fallback::ConvBiasImpl::ConvBiasImpl;
  20. using FallbackConvBiasImpl = fallback::ConvBiasImpl;
  21. using NCBKernIndex = fallback::ConvBiasImpl::NCBKernIndex;
  22. bool is_thread_safe() const override { return true; }
  23. SmallVector<AlgoBase*> algo_pack() override;
  24. bool is_matmul_quantized_prefer(
  25. const ConvBiasImpl::NCBKernSizeParam& ncb_param) override;
  26. class AlgoPack;
  27. protected:
  28. static void* const sm_arm_common_algo_type;
  29. const char* get_algorithm_set_name() const override;
  30. private:
  31. class AlgoS8DirectStride1;
  32. class AlgoS8DirectStride1NCHW44;
  33. class AlgoS8DirectStride2;
  34. class AlgoS8DirectStride2NCHW44;
  35. class AlgoS8DirectNCHWNCHW44;
  36. class AlgoQU8DirectStride1;
  37. class AlgoQU8DirectStride2;
  38. class AlgoFP32WinogradF23_4x4;
  39. class AlgoFP32WinogradF63;
  40. class AlgoFP32WinogradF63_4x4;
  41. class AlgoFP32WinogradF54;
  42. class AlgoFP32WinogradF45;
  43. class AlgoFP32WinogradF23_4x4_NCHW44;
  44. class AlgoFP32WinogradF63_4x4_NCHW44;
  45. class AlgoS8ChanWiseStride1NCHW44;
  46. class AlgoS8ChanWiseStride2NCHW44;
  47. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  48. class AlgoFP16WinogradF23;
  49. class AlgoFP16WinogradF45;
  50. class AlgoFP16WinogradF63;
  51. class AlgoFP16WinogradF23_8x8;
  52. #endif
  53. #if __ARM_FEATURE_DOTPROD
  54. class AlgoDotS8DirectNCHWNCHW44;
  55. class AlgoDotS8DirectStride1;
  56. class AlgoDotS8DirectStride2;
  57. class AlgoDotU8DirectStride1;
  58. class AlgoDotU8DirectStride2;
  59. class AlgoDotS8Direct_NCHW44;
  60. #endif
  61. class AlgoF32Direct;
  62. class AlgoF32DirectStride1;
  63. class AlgoF32DirectStride2;
  64. class AlgoF32DirectNCHWNCHW44;
  65. class AlgoF32ChannelWiseNCHW44;
  66. class AlgoF32DirectNCHW44;
  67. class AlgoI8x8x16Direct;
  68. class AlgoI8x8x16Stride2;
  69. class AlgoI8x8x16Stride2Filter2;
  70. class AlgoS8WinogradF23_8x8;
  71. class AlgoS8CF32WinogradF23_4x4_NCHW44;
  72. class AlgoS8WinogradF23_8x8_NCHW44;
  73. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  74. class AlgoF16Direct;
  75. class AlgoF16DirectStride1;
  76. #endif
  77. };
  78. } // namespace arm_common
  79. } // namespace megdnn
  80. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台