You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

bfloat16.cpp 4.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. /**
  2. * \file dnn/src/cuda/conv_bias/bfloat16.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "src/cuda/conv_bias/algo.h"
  12. #include "src/cuda/handle.h"
  13. #include "src/cuda/utils.cuh"
  14. #include "src/cuda/utils.h"
  15. using namespace megdnn;
  16. using namespace cuda;
  17. using namespace conv_bias;
  18. ConvBiasForwardImpl::AlgoBFloat16::AlgoBFloat16(
  19. ConvBiasForwardImpl::AlgoBase* algorithm)
  20. : m_impl(algorithm) {
  21. megdnn_assert_internal(algorithm);
  22. m_name = ssprintf("BFLOAT16:%s", m_impl->name());
  23. }
  24. ConvBiasForwardImpl::AlgoBase::SizeArgs
  25. ConvBiasForwardImpl::AlgoBFloat16::float_args(
  26. const SizeArgs& args, ConvBiasForwardImpl* opr, TensorLayout& fsrc,
  27. TensorLayout& ffilter, TensorLayout& fbias, TensorLayout& fz,
  28. TensorLayout& fdst) const {
  29. fsrc = *args.src_layout;
  30. ffilter = *args.filter_layout;
  31. fbias = *args.bias_layout;
  32. fz = *args.z_layout;
  33. fdst = *args.dst_layout;
  34. auto change_dtype = [](TensorLayout& layout) {
  35. if (layout.dtype == dtype::BFloat16()) {
  36. layout.dtype = dtype::Float32();
  37. }
  38. };
  39. change_dtype(fsrc);
  40. change_dtype(ffilter);
  41. change_dtype(fbias);
  42. change_dtype(fz);
  43. change_dtype(fdst);
  44. opr->param() = args.opr->param();
  45. opr->param().compute_mode = Param::ComputeMode::DEFAULT;
  46. opr->execution_policy() = {m_impl->desc(), {}};
  47. return SizeArgs(opr, fsrc, ffilter, fbias, fz, fdst);
  48. }
  49. bool ConvBiasForwardImpl::AlgoBFloat16::is_available(
  50. const SizeArgs& args) const {
  51. TensorLayout fsrc, ffilter, fbias, fz, fdst;
  52. auto convbias_opr = args.handle->create_operator<ConvBias>();
  53. SizeArgs fargs = float_args(
  54. args, static_cast<ConvBiasForwardImpl*>(convbias_opr.get()), fsrc,
  55. ffilter, fbias, fz, fdst);
  56. return args.src_layout->dtype == args.filter_layout->dtype &&
  57. args.src_layout->dtype == dtype::BFloat16() &&
  58. m_impl->is_available(fargs);
  59. }
  60. WorkspaceBundle ConvBiasForwardImpl::AlgoBFloat16::get_workspace_bundle(
  61. void* ptr, const SizeArgs& args) const {
  62. TensorLayout fsrc, ffilter, fbias, fz, fdst;
  63. auto convbias_opr = args.handle->create_operator<ConvBias>();
  64. SizeArgs fargs = float_args(
  65. args, static_cast<ConvBiasForwardImpl*>(convbias_opr.get()), fsrc,
  66. ffilter, fbias, fz, fdst);
  67. SmallVector<size_t> sizes;
  68. auto get_workspace = [&sizes](const TensorLayout& src,
  69. const TensorLayout& dst) {
  70. if (src.dtype != dst.dtype) {
  71. sizes.push_back(dst.span().dist_byte());
  72. }
  73. };
  74. get_workspace(*args.src_layout, fsrc);
  75. get_workspace(*args.filter_layout, ffilter);
  76. get_workspace(*args.bias_layout, fbias);
  77. get_workspace(*args.z_layout, fz);
  78. get_workspace(*args.dst_layout, fdst);
  79. sizes.push_back(m_impl->get_workspace_in_bytes(fargs));
  80. return {ptr, std::move(sizes)};
  81. }
  82. size_t ConvBiasForwardImpl::AlgoBFloat16::get_workspace_in_bytes(
  83. const SizeArgs& args) const {
  84. return get_workspace_bundle(nullptr, args).total_size_in_bytes();
  85. }
  86. void ConvBiasForwardImpl::AlgoBFloat16::exec(const ExecArgs& args) const {
  87. TensorND fsrc_tensor = *args.src_tensor;
  88. TensorND ffilter_tensor = *args.filter_tensor;
  89. TensorND fbias_tensor = *args.bias_tensor;
  90. TensorND fz_tensor = *args.z_tensor;
  91. TensorND fdst_tensor = *args.dst_tensor;
  92. auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args);
  93. CompTypeCvter<dtype::BFloat16, dtype::Float32> cvter(args.handle, &bundle);
  94. {
  95. cvter.src_to_comp_type(*args.src_tensor, fsrc_tensor)
  96. .src_to_comp_type(*args.filter_tensor, ffilter_tensor)
  97. .src_to_comp_type(*args.bias_tensor, fbias_tensor)
  98. .src_to_comp_type(*args.z_tensor, fz_tensor)
  99. .src_to_comp_type(*args.dst_tensor, fdst_tensor);
  100. }
  101. {
  102. auto convbias_opr = args.handle->create_operator<ConvBias>();
  103. convbias_opr->param() = args.opr->param();
  104. convbias_opr->param().compute_mode = Param::ComputeMode::DEFAULT;
  105. convbias_opr->execution_policy() = {m_impl->desc(), {}};
  106. convbias_opr->exec(fsrc_tensor, ffilter_tensor, fbias_tensor, fz_tensor,
  107. fdst_tensor, nullptr, cvter.workspace());
  108. }
  109. { cvter.comp_to_dst_type(fdst_tensor, *args.dst_tensor); }
  110. }
  111. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台