You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

strategy_onlypacka.cpp 8.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. /**
  2. * \file dnn/src/fallback/conv_bias/im2col/algos.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "src/fallback/conv_bias/im2col/strategy_base.h"
  12. #include "src/fallback/convolution/img2col_helper.h"
  13. namespace megdnn {
  14. template <typename src_ctype, typename bias_ctype, typename dst_ctype,
  15. typename op_ctype, typename op_dtype,
  16. megdnn::PostprocessMode postprocess_mode>
  17. void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
  18. postprocess_mode, PackMode::ONLY_PACKA>::
  19. packA_kern(WorkspaceBundle bundle,
  20. const fallback::ConvBiasImpl::NCBKernParam& param,
  21. fallback::MatrixMulImpl::KernSizeParam matmulparam,
  22. fallback::MatrixMulImpl::AlgoBase* matmul_algo,
  23. const fallback::ConvBiasImpl::NCBKernIndex& ncb_index,
  24. size_t) {
  25. bundle.set(param.workspace_ptr);
  26. fallback::MatrixMulImpl::KernParam matmul_param;
  27. static_cast<fallback::MatrixMulImpl::KernSizeParam&>(matmul_param) =
  28. matmulparam;
  29. size_t OC = param.filter_meta.ocpg;
  30. size_t oc_tile_size = matmul_param.M;
  31. size_t group_id = ncb_index.ndrange_id[0];
  32. size_t output_block_oc_size =
  33. std::min(oc_tile_size, OC - ncb_index.ndrange_id[1] * oc_tile_size);
  34. size_t oc_cur_index = ncb_index.ndrange_id[1] * oc_tile_size;
  35. size_t packA_group_size =
  36. bundle.get_size(BUNDLE_PACKA_INDEX) / param.filter_meta.group;
  37. size_t a_panel_offset = ncb_index.ndrange_id[1] *
  38. matmul_algo->get_bundle(matmul_param).get_size(0);
  39. int8_t* a_panel = static_cast<int8_t*>(bundle.get(BUNDLE_PACKA_INDEX)) +
  40. group_id * packA_group_size + a_panel_offset;
  41. matmul_param.A_ptr =
  42. const_cast<src_ctype*>(param.filter<src_ctype>(group_id)) +
  43. oc_cur_index * matmul_param.K;
  44. matmul_param.M = output_block_oc_size;
  45. matmul_algo->pack_A(matmul_param, a_panel, 0_z, 0_z);
  46. }
  47. template <typename src_ctype, typename bias_ctype, typename dst_ctype,
  48. typename op_ctype, typename op_dtype,
  49. megdnn::PostprocessMode postprocess_mode>
  50. void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
  51. postprocess_mode, PackMode::ONLY_PACKA>::
  52. exec_matmul(const fallback::ConvBiasImpl::NCBKernParam& param,
  53. const StrategyParam& sparam, WorkspaceBundle bundle,
  54. WorkspaceBundle bundle_thread,
  55. fallback::MatrixMulImpl::KernParam matmul_param,
  56. fallback::MatrixMulImpl::AlgoBase* matmul_algo,
  57. const fallback::ConvBiasImpl::NCBKernIndex& ncb_index) {
  58. size_t packA_group_size =
  59. bundle.get_size(BUNDLE_PACKA_INDEX) / param.filter_meta.group;
  60. size_t a_panel_offset = ncb_index.ndrange_id[3] *
  61. matmul_algo->get_bundle(matmul_param).get_size(0);
  62. a_panel_offset = sparam.group_id * packA_group_size + a_panel_offset;
  63. void* matmul_dst = get_matmul_dst_ptr(param, bundle_thread, sparam);
  64. src_ctype* a_panel = reinterpret_cast<src_ctype*>(
  65. reinterpret_cast<uintptr_t>(bundle.get(BUNDLE_PACKA_INDEX)) +
  66. a_panel_offset);
  67. src_ctype* b_panel = nullptr;
  68. src_ctype* im2col_dst = static_cast<src_ctype*>(
  69. bundle_thread.get(THREAD_BUNDLE_IM2COL_INDEX));
  70. matmul_param.M = sparam.output_block_oc_size;
  71. matmul_param.N = sparam.output_block_size;
  72. matmul_param.LDB = sparam.output_block_size;
  73. matmul_param.LDC = sparam.output_block_size;
  74. matmul_param.B_ptr = im2col_dst;
  75. matmul_param.C_ptr = matmul_dst;
  76. auto matmul_kern = matmul_algo->get_kern_naked(matmul_param);
  77. matmul_kern(matmul_param, a_panel, b_panel);
  78. }
  79. template <typename src_ctype, typename bias_ctype, typename dst_ctype,
  80. typename op_ctype, typename op_dtype,
  81. megdnn::PostprocessMode postprocess_mode>
  82. void Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
  83. postprocess_mode, PackMode::ONLY_PACKA>::
  84. exec_im2col(WorkspaceBundle bundle, WorkspaceBundle bundle_thread,
  85. const StrategyParam& sparam,
  86. const fallback::ConvBiasImpl::NCBKernParam& param,
  87. fallback::MatrixMulImpl::KernParam matmul_param,
  88. fallback::MatrixMulImpl::AlgoBase* matmul_algo) {
  89. MEGDNN_MARK_USED_VAR(matmul_param);
  90. MEGDNN_MARK_USED_VAR(matmul_algo);
  91. size_t sh = param.filter_meta.stride[0];
  92. size_t sw = param.filter_meta.stride[1];
  93. size_t oc = param.filter_meta.ocpg;
  94. size_t oh = param.osz[0];
  95. size_t ow = param.osz[1];
  96. size_t ic = param.filter_meta.icpg;
  97. size_t ih = param.isz[0] + param.filter_meta.padding[0] * 2;
  98. size_t iw = param.isz[1] + param.filter_meta.padding[1] * 2;
  99. size_t fh = param.filter_meta.spatial[0];
  100. size_t fw = param.filter_meta.spatial[1];
  101. size_t is_xcorr = !param.filter_meta.should_flip;
  102. size_t input_offset =
  103. ih * iw * ic *
  104. (sparam.group_id + param.filter_meta.group * sparam.batch_id) *
  105. sizeof(src_ctype);
  106. src_ctype* src2 = reinterpret_cast<src_ctype*>(
  107. reinterpret_cast<uintptr_t>(bundle.get(BUNDLE_PADDING_INDEX)) +
  108. input_offset);
  109. bool is_phpwzero = param.filter_meta.padding[0] == 0 &&
  110. param.filter_meta.padding[1] == 0;
  111. if (is_phpwzero) {
  112. src2 = const_cast<src_ctype*>(
  113. param.src<src_ctype>(sparam.batch_id, sparam.group_id));
  114. }
  115. src_ctype* im2col_dst = static_cast<src_ctype*>(
  116. bundle_thread.get(THREAD_BUNDLE_IM2COL_INDEX));
  117. if (sh == 1 && sw == 1) {
  118. if (is_xcorr) {
  119. img2col<true>(src2, im2col_dst, oc, oh, ow, ic, ih, iw, fh, fw,
  120. sparam.ohw_cur_index, sparam.output_block_size);
  121. } else {
  122. img2col<false>(src2, im2col_dst, oc, oh, ow, ic, ih, iw, fh, fw,
  123. sparam.ohw_cur_index, sparam.output_block_size);
  124. }
  125. } else {
  126. if (is_xcorr) {
  127. img2col_stride<true>(src2, im2col_dst, oc, oh, ow, ic, ih, iw, fh,
  128. fw, sh, sw, sparam.ohw_cur_index,
  129. sparam.output_block_size);
  130. } else {
  131. img2col_stride<false>(src2, im2col_dst, oc, oh, ow, ic, ih, iw, fh,
  132. fw, sh, sw, sparam.ohw_cur_index,
  133. sparam.output_block_size);
  134. }
  135. }
  136. }
  137. template <typename src_ctype, typename bias_ctype, typename dst_ctype,
  138. typename op_ctype, typename op_dtype,
  139. megdnn::PostprocessMode postprocess_mode>
  140. void* Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
  141. postprocess_mode, PackMode::ONLY_PACKA>::
  142. get_matmul_dst_ptr(const fallback::ConvBiasImpl::NCBKernParam& param,
  143. const WorkspaceBundle& bundle_thread,
  144. const StrategyParam& sparam) {
  145. if (sparam.is_dst_8bit || !sparam.is_ohw_size_bigger) {
  146. return static_cast<bias_ctype*>(
  147. bundle_thread.get(THREAD_BUNDLE_MATMULDST_INDEX));
  148. } else {
  149. bias_ctype* dst =
  150. param.dst<bias_ctype>(sparam.batch_id, sparam.group_id) +
  151. sparam.oc_cur_index * sparam.ohw;
  152. return static_cast<void*>(dst);
  153. }
  154. }
  155. #define INSTANTIAL_CLASS(_src_ctype, _bias_ctype, _dst_ctype, _op_ctype, \
  156. _op_dtype, _postprocess_mode) \
  157. template class Strategy<_src_ctype, _bias_ctype, _dst_ctype, _op_ctype, \
  158. _op_dtype, _postprocess_mode, \
  159. PackMode::ONLY_PACKA>;
  160. INSTANTIAL_CLASS(dt_float32, dt_float32, dt_float32, dt_float32, dt_float32,
  161. megdnn::PostprocessMode::FLOAT)
  162. #undef INSTANTIAL_CLASS
  163. } // namespace megdnn
  164. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台