You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cublasLt_wrapper.h 3.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. /**
  2. * \file dnn/src/cuda/matrix_mul/cublasLt_wrapper.h
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #pragma once
  12. #include <cuda.h>
  13. #include "./algos.h"
  14. #include "megdnn/basic_types.h"
  15. #include "megdnn/oprs/nn.h"
  16. #include "src/common/utils.h"
  17. #include "src/cuda/utils.h"
  18. #if CUDA_VERSION >= 10010
  19. #include <cublasLt.h>
  20. namespace megdnn {
  21. namespace cuda {
  22. struct CUBLASLTMatmulDesc {
  23. struct SizeArgs {
  24. using MMSizeArgs = MatrixMulForwardImpl::AlgoBase::SizeArgs;
  25. HandleImpl* handle;
  26. bool transposeA, transposeB;
  27. TensorLayout layout_a, layout_b, layout_c;
  28. std::string to_string() const;
  29. SizeArgs(HandleImpl* handle, bool transposeA, bool transposeB,
  30. const TensorLayout& A, const TensorLayout& B,
  31. const TensorLayout& C)
  32. : handle(handle),
  33. transposeA(transposeA),
  34. transposeB(transposeB),
  35. layout_a(A),
  36. layout_b(B),
  37. layout_c(C){};
  38. explicit SizeArgs(const MMSizeArgs& args)
  39. : layout_a(args.layout_a),
  40. layout_b(args.layout_b),
  41. layout_c(args.layout_c) {
  42. handle = concrete_handle(args.opr->handle());
  43. auto&& param = args.opr->param();
  44. transposeA = param.transposeA;
  45. transposeB = param.transposeB;
  46. };
  47. };
  48. bool is_batched;
  49. cublasLtMatmulDesc_t matmul_desc;
  50. cudaDataType_t dt_a, dt_b, dt_c;
  51. cublasComputeType_t dt_compute;
  52. cublasLtMatrixLayout_t layout_a, layout_b, layout_c;
  53. cublasLtMatrixLayout_t layout_trans_a, layout_trans_b, layout_trans_c;
  54. size_t workspace_a, workspace_b, workspace_c;
  55. CUBLASLTMatmulDesc(const SizeArgs& args, bool batched = false)
  56. : matmul_desc(nullptr),
  57. layout_a(nullptr),
  58. layout_b(nullptr),
  59. layout_c(nullptr),
  60. layout_trans_a(nullptr),
  61. layout_trans_b(nullptr),
  62. layout_trans_c(nullptr),
  63. workspace_a(0),
  64. workspace_b(0),
  65. workspace_c(0) {
  66. is_batched = batched;
  67. set(args, batched);
  68. }
  69. ~CUBLASLTMatmulDesc();
  70. void set(const SizeArgs& args, bool batched = false);
  71. void reset();
  72. bool get_algorithm_heuristic(const SizeArgs& args, size_t ws_limit,
  73. cublasLtMatmulAlgo_t& algo);
  74. WorkspaceBundle get_workspace_bundle(const SizeArgs& args,
  75. const cublasLtMatmulAlgo_t& algo);
  76. bool is_available(const SizeArgs& args, size_t ws_limit);
  77. };
  78. } // namespace cuda
  79. } // namespace megdnn
  80. #endif
  81. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台