You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cutlass_convolution_wrapper.cuh 3.6 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. /**
  2. * \file dnn/src/cuda/conv_bias/cutlass_convolution_wrapper.cuh
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #pragma once
  13. #include "cutlass/gemm/gemm.h"
  14. #include "src/cuda/convolution_helper/parameter.cuh"
  15. #include "src/cuda/utils.cuh"
  16. namespace megdnn {
  17. namespace cuda {
  18. namespace cutlass_wrapper {
  19. using GemmCoord = cutlass::gemm::GemmCoord;
  20. template <typename Convolution>
  21. void cutlass_convolution_wrapper(
  22. const typename Convolution::ElementSrc* d_src,
  23. const typename Convolution::ElementFilter* d_filter,
  24. const typename Convolution::ElementBias* d_bias,
  25. const typename Convolution::ElementDst* d_z,
  26. typename Convolution::ElementDst* d_dst, int* workspace,
  27. typename Convolution::ConvolutionParameter const& conv_param,
  28. typename Convolution::EpilogueOutputOp::Params const& epilogue,
  29. cudaStream_t stream);
  30. template <bool NeedLoadFromConstMem>
  31. void do_conv_bias_int8_implicit_gemm_imma_ncdiv32hw32(
  32. const int8_t* d_src, const int8_t* d_filter, const int32_t* d_bias,
  33. const int8_t* d_z, int8_t* d_dst, int* workspace,
  34. const convolution::ConvParam& param, uint32_t nonlinear_mode,
  35. float alpha, float beta, float gamma, float scale,
  36. const GemmCoord& threadblock_shape, const GemmCoord& warp_shape,
  37. cudaStream_t stream);
  38. template <bool NeedLoadFromConstMem>
  39. void do_conv_bias_int8_implicit_gemm_imma_ncdiv32hw32_ncdiv4hw4(
  40. const int8_t* d_src, const int8_t* d_filter, const int32_t* d_bias,
  41. const int8_t* d_z, int8_t* d_dst, int* workspace,
  42. const convolution::ConvParam& param, uint32_t nonlinear_mode,
  43. float alpha, float beta, float gamma, float scale,
  44. const GemmCoord& threadblock_shape, const GemmCoord& warp_shape,
  45. cudaStream_t stream);
  46. template <bool NeedLoadFromConstMem>
  47. void do_conv_bias_int8_implicit_gemm_dp4a_ncdiv4hw4(
  48. const int8_t* d_src, const int8_t* d_filter, const int32_t* d_bias,
  49. const int8_t* d_z, int8_t* d_dst, int* workspace,
  50. const convolution::ConvParam& param, uint32_t nonlinear_mode,
  51. float alpha, float beta, float gamma, float scale,
  52. const GemmCoord& threadblock_shape, const GemmCoord& warp_shape,
  53. cudaStream_t stream);
  54. template <bool NeedLoadFromConstMem>
  55. void do_conv_bias_int8_implicit_gemm_dp4a_ncdiv4hw4_nchw(
  56. const int8_t* d_src, const int8_t* d_filter, const float* d_bias,
  57. const float* d_z, float* d_dst, int* workspace,
  58. const convolution::ConvParam& param, uint32_t nonlinear_mode,
  59. float alpha, float beta, float gamma, float scale,
  60. const GemmCoord& threadblock_shape, const GemmCoord& warp_shape,
  61. cudaStream_t stream);
  62. template <bool NeedLoadFromConstMem>
  63. void do_conv_bias_int8_implicit_gemm_dp4a_ncdiv4hw4_ncdiv32hw32(
  64. const int8_t* d_src, const int8_t* d_filter, const int32_t* d_bias,
  65. const int8_t* d_z, int8_t* d_dst, int* workspace,
  66. const convolution::ConvParam& param, uint32_t nonlinear_mode,
  67. float alpha, float beta, float gamma, float scale,
  68. const GemmCoord& threadblock_shape, const GemmCoord& warp_shape,
  69. cudaStream_t stream);
  70. } // namespace cutlass_wrapper
  71. } // namespace cuda
  72. } // namespace megdnn
  73. // vim: syntax=cuda.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台