You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

algo.cpp 2.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. /**
  2. * \file dnn/src/cuda/batched_matrix_mul/algo.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./algo.h"
  12. #include <cuda.h>
  13. #include "src/cuda/utils.h"
  14. #if CUDA_VERSION >= 10010
  15. #include <cublasLt.h>
  16. #endif
  17. using namespace megdnn;
  18. using namespace cuda;
  19. BatchedMatrixMulForwardImpl::AlgoPack BatchedMatrixMulForwardImpl::sm_algo_pack;
  20. std::string BatchedMatrixMulForwardImpl::AlgoBase::SizeArgs::to_string() const {
  21. auto&& param = opr->param();
  22. size_t m = layout_a.shape[0], n = layout_b.shape[1],
  23. k = layout_a.shape[param.transposeA ? 0 : 1];
  24. MEGDNN_MARK_USED_VAR(m);
  25. MEGDNN_MARK_USED_VAR(n);
  26. MEGDNN_MARK_USED_VAR(k);
  27. return ssprintf(
  28. "A={%zux%zu},B={%zux%zu},C={%zux%zu},Transpose A=%d,Transpose "
  29. "B=%d,ldA=%zu,ldB=%zu,ldC=%zu",
  30. m, k, k, n, m, n, param.transposeA, param.transposeB,
  31. layout_a.stride[0], layout_b.stride[0], layout_c.stride[0]);
  32. }
  33. BatchedMatrixMulForwardImpl::AlgoBase::SizeArgs::SizeArgs(
  34. BatchedMatrixMulForwardImpl* o, const TensorLayout& A,
  35. const TensorLayout& B, const TensorLayout& C)
  36. : opr(o), layout_a(A), layout_b(B), layout_c(C){};
  37. BatchedMatrixMulForwardImpl::AlgoBase::ExecArgs::ExecArgs(
  38. BatchedMatrixMulForwardImpl* o, _megdnn_tensor_in A,
  39. _megdnn_tensor_in B, _megdnn_tensor_in C, _megdnn_workspace workspace)
  40. : SizeArgs(o, A.layout, B.layout, C.layout),
  41. tensor_a{A},
  42. tensor_b{B},
  43. tensor_c{C},
  44. workspace{workspace} {}
  45. BatchedMatrixMulForwardImpl::AlgoPack::AlgoPack() {
  46. all_algos.push_back(&cublas);
  47. #if CUDA_VERSION >= 10010
  48. all_algos.push_back(&cublasLt);
  49. #endif
  50. all_algos.push_back(&int8x8x32);
  51. all_algos.push_back(&brute_force);
  52. for (auto&& algo : all_algos) {
  53. m_all_algos_map.emplace(algo->info().desc, algo);
  54. }
  55. }
  56. MEGDNN_DEF_GET_ALGO_FROM_DESC(BatchedMatrixMulForwardImpl)
  57. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台