You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convolution.cpp 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. /**
  2. * \file dnn/src/naive/convolution/convolution.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./opr_impl.h"
  12. #include "./helper.h"
  13. #include "src/naive/handle.h"
  14. #include "src/naive/handle.h"
  15. #include "src/common/utils.h"
  16. #include "megdnn/dtype.h"
  17. #include "megdnn/tensor_iter.h"
  18. #include <cstring>
  19. #include "midout.h"
  20. MIDOUT_DECL(megdnn_naive_conv_fwd)
  21. using namespace megdnn;
  22. using namespace naive;
  23. void ConvolutionForwardImpl::exec(_megdnn_tensor_in src,
  24. _megdnn_tensor_in filter,
  25. _megdnn_tensor_out dst,
  26. const PreprocessedFilter* preprocessed_filter,
  27. _megdnn_workspace workspace) {
  28. MIDOUT_BEGIN(megdnn_naive_conv_fwd) {
  29. auto filter_meta = check_exec(src.layout, filter.layout, dst.layout,
  30. workspace.size, preprocessed_filter);
  31. using ComputeMode = Param::ComputeMode;
  32. #define DISPATCH_CMODE(in_dt, out_dt, in_ct, out_ct, comp_ct, cmode) \
  33. do { \
  34. using namespace dtype; \
  35. if (src.layout.dtype.enumv() == DTypeTrait<in_dt>::enumv && \
  36. dst.layout.dtype.enumv() == DTypeTrait<out_dt>::enumv && \
  37. param().compute_mode == cmode) { \
  38. MEGDNN_DISPATCH_CPU_KERN_OPR( \
  39. (convolution::forward<in_ct, in_ct, out_ct, comp_ct>( \
  40. src, filter, dst, filter_meta));); \
  41. return; \
  42. } \
  43. } while (0);
  44. #define DISPATCH(in_dt, out_dt, in_ct, out_ct, comp_ct) \
  45. DISPATCH_CMODE(in_dt, out_dt, in_ct, out_ct, comp_ct, ComputeMode::DEFAULT)
  46. #define cb(dt) \
  47. DISPATCH(dt, dt, DTypeTrait<dt>::ctype, DTypeTrait<dt>::ctype, \
  48. DTypeTrait<dt>::ctype)
  49. MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb);
  50. #undef cb
  51. DISPATCH(Int8, Int16, dt_int8, dt_int16, dt_int16);
  52. DISPATCH(Int8, Int32, dt_int8, dt_int32, dt_int32);
  53. DISPATCH(QuantizedS8, QuantizedS32, dt_int8, dt_int32, dt_int32);
  54. MEGDNN_INC_FLOAT16(DISPATCH_CMODE(Float16, Float16, dt_float16,
  55. dt_float16, dt_float32,
  56. ComputeMode::FLOAT32));
  57. MEGDNN_INC_FLOAT16(DISPATCH_CMODE(BFloat16, BFloat16, dt_bfloat16,
  58. dt_bfloat16, dt_float32,
  59. ComputeMode::FLOAT32));
  60. DISPATCH(Quantized8Asymm, QuantizedS32, dt_quint8, dt_qint32,
  61. dt_qint32);
  62. DISPATCH(QuantizedS8, QuantizedS8, dt_int8, dt_int8, dt_int32);
  63. #undef DISPATCH
  64. megdnn_throw(ssprintf("unsupported Conv(%s, %s) -> %s with cmode = %d",
  65. src.layout.dtype.name(),
  66. filter.layout.dtype.name(),
  67. dst.layout.dtype.name(),
  68. static_cast<int>(param().compute_mode)));
  69. }
  70. MIDOUT_END();
  71. }
  72. size_t ConvolutionBackwardDataImpl::get_workspace_in_bytes(const TensorLayout& filter,
  73. const TensorLayout& diff,
  74. const TensorLayout& grad) {
  75. size_t workspace_size = 0;
  76. auto flt_dt = filter.dtype.enumv();
  77. auto grad_dt = grad.dtype.enumv();
  78. auto diff_dt = diff.dtype.enumv();
  79. #if !MEGDNN_DISABLE_FLOAT16
  80. if (flt_dt == DTypeEnum::Float16 || flt_dt == DTypeEnum::BFloat16) {
  81. megdnn_assert(flt_dt == grad_dt && flt_dt == diff_dt);
  82. workspace_size = grad.span().dist_elem() * dtype::Float32().size();
  83. }
  84. #endif
  85. if ((flt_dt == DTypeEnum::Int8 || flt_dt == DTypeEnum::QuantizedS8) &&
  86. (diff_dt == DTypeEnum::Int8 || diff_dt == DTypeEnum::QuantizedS8) &&
  87. (grad_dt == DTypeEnum::Int8 || grad_dt == DTypeEnum::QuantizedS8)) {
  88. workspace_size =
  89. TensorLayout{grad, dtype::QuantizedS32()}.span().dist_byte();
  90. }
  91. return workspace_size;
  92. }
  93. void ConvolutionBackwardDataImpl::exec(_megdnn_tensor_in filter,
  94. _megdnn_tensor_in diff,
  95. _megdnn_tensor_out grad,
  96. _megdnn_workspace workspace)
  97. {
  98. auto filter_meta = check_exec(
  99. filter.layout, diff.layout, grad.layout, workspace.size);
  100. using ComputeMode = Param::ComputeMode;
  101. auto cmode = param().compute_mode;
  102. #define cb(dt) \
  103. do { \
  104. if (filter.layout.dtype == dt() && cmode == ComputeMode::DEFAULT) { \
  105. using ctype = DTypeTrait<dt>::ctype; \
  106. MEGDNN_DISPATCH_CPU_KERN_OPR( \
  107. (convolution::backward_data<ctype, ctype, ctype>( \
  108. filter, diff, grad, filter_meta));); \
  109. return; \
  110. } \
  111. } while (0);
  112. MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb);
  113. #undef cb
  114. #if !MEGDNN_DISABLE_FLOAT16
  115. if (filter.layout.dtype == dtype::Float16() &&
  116. cmode == ComputeMode::FLOAT32) {
  117. TensorND grad_fp32;
  118. grad_fp32.layout = grad.layout;
  119. grad_fp32.layout.dtype = dtype::Float32();
  120. grad_fp32.raw_ptr = workspace.raw_ptr;
  121. auto&& type_cvt = handle()->create_operator<TypeCvt>();
  122. type_cvt->exec(grad, grad_fp32);
  123. MEGDNN_DISPATCH_CPU_KERN_OPR(
  124. (convolution::backward_data<dt_float16, dt_float16, dt_float32>(
  125. filter, diff, grad_fp32, filter_meta)););
  126. type_cvt->exec(grad_fp32, grad);
  127. return;
  128. }
  129. if (filter.layout.dtype == dtype::BFloat16() &&
  130. cmode == ComputeMode::FLOAT32) {
  131. TensorND grad_fp32;
  132. grad_fp32.layout = grad.layout;
  133. grad_fp32.layout.dtype = dtype::Float32();
  134. grad_fp32.raw_ptr = workspace.raw_ptr;
  135. auto&& type_cvt = handle()->create_operator<TypeCvt>();
  136. type_cvt->exec(grad, grad_fp32);
  137. MEGDNN_DISPATCH_CPU_KERN_OPR(
  138. (convolution::backward_data<dt_bfloat16, dt_bfloat16, dt_float32>(
  139. filter, diff, grad_fp32, filter_meta)););
  140. type_cvt->exec(grad_fp32, grad);
  141. return;
  142. }
  143. #endif
  144. auto flt_dt = filter.layout.dtype.enumv();
  145. auto grad_dt = grad.layout.dtype.enumv();
  146. if ((flt_dt == DTypeEnum::Int8 || flt_dt == DTypeEnum::QuantizedS8) &&
  147. (grad_dt == DTypeEnum::Int8 || grad_dt == DTypeEnum::QuantizedS8)) {
  148. auto res = grad;
  149. auto resf_s = filter.layout.dtype.param<dtype::QuantizedS8>().scale *
  150. diff.layout.dtype.param<dtype::QuantizedS8>().scale;
  151. res = TensorND{workspace.raw_ptr,
  152. TensorLayout{grad.layout, dtype::QuantizedS32(resf_s)}};
  153. MEGDNN_DISPATCH_CPU_KERN_OPR(
  154. (convolution::backward_data<dt_qint8, dt_qint8, dt_qint32>(
  155. filter, diff, res, filter_meta)););
  156. handle()->create_operator<TypeCvt>()->exec(res, grad);
  157. return;
  158. }
  159. if ((flt_dt == DTypeEnum::Int8 || flt_dt == DTypeEnum::QuantizedS8) &&
  160. (grad_dt == DTypeEnum::Int32 || grad_dt == DTypeEnum::QuantizedS32)) {
  161. MEGDNN_DISPATCH_CPU_KERN_OPR(
  162. (convolution::backward_data<dt_int8, dt_int8, dt_int32>(
  163. filter, diff, grad, filter_meta)););
  164. return;
  165. }
  166. if (flt_dt == DTypeEnum::Quantized8Asymm &&
  167. grad_dt == DTypeEnum::QuantizedS32) {
  168. MEGDNN_DISPATCH_CPU_KERN_OPR(
  169. (convolution::backward_data<dt_quint8, dt_quint8, dt_qint32>(
  170. filter, diff, grad, filter_meta)););
  171. return;
  172. }
  173. megdnn_throw(ssprintf(
  174. "unsupported ConvolutionBackwardData(%s, %s) -> %s with cmode = %d",
  175. filter.layout.dtype.name(), diff.layout.dtype.name(),
  176. grad.layout.dtype.name(), static_cast<int>(cmode)));
  177. }
  178. size_t ConvolutionBackwardFilterImpl::get_workspace_in_bytes(
  179. const TensorLayout& src, const TensorLayout& diff,
  180. const TensorLayout& grad) {
  181. size_t workspace_size = 0;
  182. #if !MEGDNN_DISABLE_FLOAT16
  183. auto src_dt = src.dtype.enumv();
  184. auto grad_dt = grad.dtype.enumv();
  185. auto diff_dt = diff.dtype.enumv();
  186. if (src_dt == DTypeEnum::Float16 || src_dt == DTypeEnum::BFloat16) {
  187. megdnn_assert(src_dt == grad_dt && src_dt == diff_dt);
  188. workspace_size = grad.span().dist_elem() * dtype::Float32().size();
  189. }
  190. #endif
  191. return workspace_size;
  192. }
  193. void ConvolutionBackwardFilterImpl::exec(_megdnn_tensor_in src,
  194. _megdnn_tensor_in diff,
  195. _megdnn_tensor_out grad,
  196. _megdnn_workspace workspace)
  197. {
  198. auto filter_meta = check_exec(
  199. src.layout, diff.layout, grad.layout, workspace.size);
  200. using ComputeMode = Param::ComputeMode;
  201. auto cmode = param().compute_mode;
  202. #define cb(dt) \
  203. do { \
  204. if (src.layout.dtype == dt() && cmode == ComputeMode::DEFAULT) { \
  205. using ctype = DTypeTrait<dt>::ctype; \
  206. MEGDNN_DISPATCH_CPU_KERN( \
  207. static_cast<HandleImpl*>(handle()), \
  208. convolution::backward_filter< \
  209. ctype MEGDNN_COMMA ctype MEGDNN_COMMA ctype>( \
  210. src, diff, grad, filter_meta);); \
  211. return; \
  212. } \
  213. } while (0);
  214. MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb);
  215. #undef cb
  216. #if !MEGDNN_DISABLE_FLOAT16
  217. if (src.layout.dtype == dtype::Float16() && cmode == ComputeMode::FLOAT32) {
  218. TensorND grad_fp32;
  219. grad_fp32.layout = grad.layout;
  220. grad_fp32.layout.dtype = dtype::Float32();
  221. grad_fp32.raw_ptr = workspace.raw_ptr;
  222. auto&& type_cvt = handle()->create_operator<TypeCvt>();
  223. type_cvt->exec(grad, grad_fp32);
  224. MEGDNN_DISPATCH_CPU_KERN_OPR(
  225. (convolution::backward_filter<dt_float16, dt_float16,
  226. dt_float32>(src, diff, grad_fp32,
  227. filter_meta)););
  228. type_cvt->exec(grad_fp32, grad);
  229. return;
  230. }
  231. if (src.layout.dtype == dtype::BFloat16() &&
  232. cmode == ComputeMode::FLOAT32) {
  233. TensorND grad_fp32;
  234. grad_fp32.layout = grad.layout;
  235. grad_fp32.layout.dtype = dtype::Float32();
  236. grad_fp32.raw_ptr = workspace.raw_ptr;
  237. auto&& type_cvt = handle()->create_operator<TypeCvt>();
  238. type_cvt->exec(grad, grad_fp32);
  239. MEGDNN_DISPATCH_CPU_KERN_OPR(
  240. (convolution::backward_filter<dt_bfloat16, dt_bfloat16,
  241. dt_float32>(src, diff, grad_fp32,
  242. filter_meta)););
  243. type_cvt->exec(grad_fp32, grad);
  244. return;
  245. }
  246. #endif
  247. megdnn_assert_internal(0);
  248. }
  249. std::vector<ConvolutionForward::Algorithm *>
  250. ConvolutionForwardImpl:: get_all_algorithms(const TensorLayout &,
  251. const TensorLayout &, const TensorLayout &)
  252. {
  253. return {static_cast<HandleImpl *>(handle())->default_conv_fwd_algo()};
  254. }
  255. ConvolutionForward::Algorithm* ConvolutionForwardImpl::get_algorithm_heuristic(
  256. const TensorLayout& /* src */, const TensorLayout& /* diff */,
  257. const TensorLayout& /* grad */, size_t /* workspace_limit_in_bytes */,
  258. bool reproducible) {
  259. auto algo =
  260. static_cast<HandleImpl*>(handle())->default_conv_fwd_algo();
  261. if (reproducible) {
  262. megdnn_assert(algo->contain_attribute(AlgoAttribute::REPRODUCIBLE),
  263. "require reproducible algorithm, but heuristic "
  264. "algorithm(%s) is not "
  265. "reproducible",
  266. algo->name());
  267. }
  268. return algo;
  269. }
  270. ConvolutionForward::Algorithm* ConvolutionForwardImpl::get_algorithm_from_desc(
  271. const AlgorithmDesc& desc) {
  272. Algorithm* ret =
  273. static_cast<HandleImpl*>(handle())->default_conv_fwd_algo();
  274. megdnn_assert(desc == ret->info().desc);
  275. return ret;
  276. }
  277. std::vector<ConvolutionBackwardData::Algorithm *>
  278. ConvolutionBackwardDataImpl:: get_all_algorithms(const TensorLayout &,
  279. const TensorLayout &, const TensorLayout &)
  280. {
  281. return {static_cast<HandleImpl *>(handle())->default_conv_bwd_data_algo()};
  282. }
  283. ConvolutionBackwardData::Algorithm*
  284. ConvolutionBackwardDataImpl::get_algorithm_heuristic(
  285. const TensorLayout& /* filter */, const TensorLayout& /* diff */,
  286. const TensorLayout& /* grad */, size_t /* workspace_limit_in_bytes */,
  287. bool reproducible) {
  288. auto algo =
  289. static_cast<HandleImpl*>(handle())->default_conv_bwd_data_algo();
  290. if (reproducible) {
  291. megdnn_assert(algo->contain_attribute(AlgoAttribute::REPRODUCIBLE),
  292. "require reproducible algorithm, but heuristic "
  293. "algorithm(%s) is not "
  294. "reproducible",
  295. algo->name());
  296. }
  297. return algo;
  298. }
  299. ConvolutionBackwardData::Algorithm*
  300. ConvolutionBackwardDataImpl::get_algorithm_from_desc(
  301. const AlgorithmDesc& desc) {
  302. Algorithm* ret =
  303. static_cast<HandleImpl*>(handle())->default_conv_bwd_data_algo();
  304. megdnn_assert(desc == ret->info().desc);
  305. return ret;
  306. }
  307. std::vector<ConvolutionBackwardFilter::Algorithm *>
  308. ConvolutionBackwardFilterImpl:: get_all_algorithms(const TensorLayout &,
  309. const TensorLayout &, const TensorLayout &)
  310. {
  311. return {static_cast<HandleImpl*>(handle())->default_conv_bwd_filter_algo()};
  312. }
  313. ConvolutionBackwardFilter::Algorithm*
  314. ConvolutionBackwardFilterImpl::get_algorithm_heuristic(
  315. const TensorLayout& /* src */, const TensorLayout& /* diff */,
  316. const TensorLayout& /* grad */, size_t /* workspace_limit_in_bytes */,
  317. bool reproducible) {
  318. auto algo =
  319. static_cast<HandleImpl*>(handle())->default_conv_bwd_filter_algo();
  320. if (reproducible) {
  321. megdnn_assert(algo->contain_attribute(AlgoAttribute::REPRODUCIBLE),
  322. "require reproducible algorithm, but heuristic "
  323. "algorithm(%s) is not "
  324. "reproducible",
  325. algo->name());
  326. }
  327. return algo;
  328. }
  329. ConvolutionBackwardFilter::Algorithm*
  330. ConvolutionBackwardFilterImpl::get_algorithm_from_desc(
  331. const AlgorithmDesc& desc) {
  332. Algorithm* ret =
  333. static_cast<HandleImpl*>(handle())->default_conv_bwd_filter_algo();
  334. megdnn_assert(desc == ret->info().desc);
  335. return ret;
  336. }
  337. const char* ConvolutionForwardImpl::get_algorithm_set_name() const {
  338. return "DEFAULT";
  339. }
  340. const char* ConvolutionBackwardDataImpl::get_algorithm_set_name() const {
  341. return "DEFAULT";
  342. }
  343. const char* ConvolutionBackwardFilterImpl::get_algorithm_set_name() const {
  344. return "DEFAULT";
  345. }
  346. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台