You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

1x1x1.cpp 3.0 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. /**
  2. * \file dnn/src/cuda/convolution3d/forward/1x1x1.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./algo.h"
  12. #include "src/cuda/handle.h"
  13. #include "src/cuda/utils.cuh"
  14. using namespace megdnn;
  15. using namespace cuda;
  16. using namespace convolution3d;
  17. bool Convolution3DForwardImpl::Algo1x1x1::is_available(const SizeArgs& args) const {
  18. auto&& fm = args.filter_meta;
  19. const size_t MAX_WORKSPACE_SIZE = 2147483648; // 2 * 1024^3
  20. if (get_workspace_in_bytes(args) > MAX_WORKSPACE_SIZE) {
  21. return false;
  22. }
  23. return fm.format == Param::Format::NCDHW &&
  24. (fm.dtype_enum == DTypeEnum::Float32 ||
  25. fm.dtype_enum == DTypeEnum::Float16) &&
  26. fm.spatial_ndim == 3 && fm.group == 1 && fm.dilation[0] == 1 &&
  27. fm.dilation[1] == 1 && fm.dilation[2] == 1 && fm.spatial[0] == 1 &&
  28. fm.spatial[1] == 1 && fm.spatial[2] == 1 && fm.padding[0] == 0 &&
  29. fm.padding[1] == 0 && fm.padding[2] == 0 && fm.stride[0] == 1 &&
  30. fm.stride[1] == 1 && fm.stride[2] == 1;
  31. }
  32. void Convolution3DForwardImpl::Algo1x1x1::extract_matmul_layouts(
  33. const SizeArgs& args, TensorLayout& A, TensorLayout& B, TensorLayout& C) {
  34. auto&& fm = args.filter_meta;
  35. A = {{fm.ocpg, fm.icpg}, DType::from_enum(fm.dtype_enum)};
  36. B.ndim = 2;
  37. B.shape[0] = args.src_layout->shape[1];
  38. B.shape[1] = args.src_layout->shape[2] * args.src_layout->shape[3] *
  39. args.src_layout->shape[4];
  40. B.stride[0] = args.src_layout->stride[1];
  41. B.stride[1] = 1;
  42. B.dtype = args.src_layout->dtype;
  43. C = {{args.dst_layout->shape[1], B.shape[1]}, args.dst_layout->dtype};
  44. }
  45. size_t Convolution3DForwardImpl::Algo1x1x1::get_workspace_in_bytes(
  46. const SizeArgs& args) const {
  47. TensorLayout A, B, C;
  48. extract_matmul_layouts(args, A, B, C);
  49. return args.handle->matmul_opr()->get_workspace_in_bytes(A, B, C);
  50. }
  51. void Convolution3DForwardImpl::Algo1x1x1::exec(const ExecArgs& args) const {
  52. TensorND A, B, C;
  53. extract_matmul_layouts(args, A.layout, B.layout, C.layout);
  54. A.reset_ptr(args.filter_tensor->raw_ptr());
  55. B.reset_ptr(args.src_tensor->raw_ptr());
  56. C.reset_ptr(args.dst_tensor->raw_ptr());
  57. size_t batch = args.src_layout->shape[0];
  58. auto mm = args.handle->matmul_opr();
  59. auto strd_B = args.src_layout->stride[0] * args.src_layout->dtype.size(),
  60. strd_C = args.dst_layout->stride[0] * args.dst_layout->dtype.size();
  61. for (size_t i = 0; i < batch; ++i) {
  62. mm->exec(A, B, C, args.workspace);
  63. incr_refp(B.get_ref_ptr(), strd_B);
  64. incr_refp(C.get_ref_ptr(), strd_C);
  65. }
  66. }
  67. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台