You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

opr_impl.cpp 2.5 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. /**
  2. * \file dnn/src/rocm/batched_matrix_mul/opr_impl.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "./opr_impl.h"
  13. #include "./algos.h"
  14. #include "hcc_detail/hcc_defs_prologue.h"
  15. #include "src/common/algo_chooser.h"
  16. #include "src/common/utils.cuh"
  17. #include "src/rocm/handle.h"
  18. #include "src/rocm/utils.h"
  19. using namespace megdnn;
  20. using namespace rocm;
  21. std::vector<BatchedMatrixMulForwardImpl::Algorithm*>
  22. BatchedMatrixMulForwardImpl::get_all_algorithms(const TensorLayout& A,
  23. const TensorLayout& B,
  24. const TensorLayout& C) {
  25. AlgoBase::SizeArgs args{this, A, B, C};
  26. return megdnn::get_all_algorithms<BatchedMatrixMulForwardImpl>(args);
  27. }
  28. BatchedMatrixMulForwardImpl::Algorithm*
  29. BatchedMatrixMulForwardImpl::get_algorithm_heuristic(
  30. const TensorLayout& A, const TensorLayout& B, const TensorLayout& C,
  31. size_t workspace_limit_in_bytes,
  32. const AlgoAttribute& positive_attr,
  33. const AlgoAttribute& negative_attr) {
  34. AlgoBase::SizeArgs args{this, A, B, C};
  35. if (sm_algo_pack.blas.is_available_attribute(
  36. args, positive_attr, negative_attr, workspace_limit_in_bytes)) {
  37. return &sm_algo_pack.blas;
  38. }
  39. return megdnn::get_algo_match_attribute<BatchedMatrixMulForwardImpl>(
  40. sm_algo_pack.all_algos, args, workspace_limit_in_bytes,
  41. "batched matrix mul forward", positive_attr, negative_attr);
  42. }
  43. size_t BatchedMatrixMulForwardImpl::get_workspace_in_bytes(
  44. const TensorLayout& A, const TensorLayout& B, const TensorLayout& C) {
  45. return get_dnn_workspace(this, A, B, C);
  46. }
  47. void BatchedMatrixMulForwardImpl::exec(_megdnn_tensor_in A, _megdnn_tensor_in B,
  48. _megdnn_tensor_out C,
  49. _megdnn_workspace workspace) {
  50. check_exec(A.layout, B.layout, C.layout, workspace.size);
  51. AlgoBase::ExecArgs args(this, A, B, C, workspace);
  52. auto&& algo = get_algorithm(this, A.layout, B.layout, C.layout);
  53. algo->exec(args);
  54. }
  55. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台