You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

opr_impl.cpp 5.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. /**
  2. * \file dnn/src/cuda/fake_quant/opr_impl.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "./opr_impl.h"
  13. #include "./kern.cuh"
  14. #include "src/common/utils.h"
  15. namespace megdnn {
  16. namespace cuda {
  17. void FakeQuantForwardImpl::exec(_megdnn_tensor_in input,
  18. _megdnn_tensor_in scale,
  19. _megdnn_tensor_in zero_point,
  20. _megdnn_tensor_out output,
  21. _megdnn_workspace workspace) {
  22. check_exec(input.layout, scale.layout, zero_point.layout, output.layout,
  23. workspace.size);
  24. if (!input.layout.is_contiguous() || !output.layout.is_contiguous()) {
  25. return exec_noncontig(input, scale, zero_point, output);
  26. }
  27. ElemwiseOpParamN<2> ele_param;
  28. ele_param[0] = scale;
  29. ele_param[0].layout = ele_param[0].layout.broadcast(input.layout);
  30. ele_param[1] = zero_point;
  31. ele_param[1].layout = ele_param[1].layout.broadcast(input.layout);
  32. ele_param.init_from_given_tensor();
  33. auto stream = cuda_stream(handle());
  34. #define cb(DType) \
  35. if (input.layout.dtype == DType()) { \
  36. using T = typename DTypeTrait<DType>::ctype; \
  37. run_elemwise<FakeQuantKernOp<T>, T, 2>(ele_param, stream, \
  38. {input, output, m_param}); \
  39. return; \
  40. }
  41. cb(megdnn::dtype::Float32)
  42. #undef cb
  43. }
  44. void FakeQuantForwardImpl::exec_noncontig(_megdnn_tensor_in input,
  45. _megdnn_tensor_in scale,
  46. _megdnn_tensor_in zero_point,
  47. _megdnn_tensor_out output) {
  48. ElemwiseOpParamN<4> ele_param;
  49. ele_param[0] = output;
  50. ele_param[1] = input;
  51. ele_param[2] = scale;
  52. ele_param[2].layout = ele_param[2].layout.broadcast(input.layout);
  53. ele_param[3] = zero_point;
  54. ele_param[3].layout = ele_param[3].layout.broadcast(input.layout);
  55. ele_param.init_from_given_tensor();
  56. auto stream = cuda_stream(handle());
  57. #define cb(DType) \
  58. if (input.layout.dtype == DType()) { \
  59. using T = typename DTypeTrait<DType>::ctype; \
  60. run_elemwise<FakeQuantKernOpNonContig<T>, T, 4>(ele_param, stream, \
  61. {m_param}); \
  62. return; \
  63. }
  64. cb(megdnn::dtype::Float32)
  65. #undef cb
  66. }
  67. void FakeQuantBackwardImpl::exec(_megdnn_tensor_in diff,
  68. _megdnn_tensor_in input,
  69. _megdnn_tensor_in scale,
  70. _megdnn_tensor_in zero_point,
  71. _megdnn_tensor_out grad,
  72. _megdnn_workspace workspace) {
  73. check_exec(diff.layout, input.layout, scale.layout, zero_point.layout,
  74. grad.layout, workspace.size);
  75. if (!input.layout.is_contiguous() || !diff.layout.is_contiguous() ||
  76. !grad.layout.is_contiguous()) {
  77. return exec_noncontig(diff, input, scale, zero_point, grad);
  78. }
  79. ElemwiseOpParamN<2> ele_param;
  80. ele_param[0] = scale;
  81. ele_param[0].layout = ele_param[0].layout.broadcast(input.layout);
  82. ele_param[1] = zero_point;
  83. ele_param[1].layout = ele_param[1].layout.broadcast(input.layout);
  84. ele_param.init_from_given_tensor();
  85. auto m_param = param();
  86. auto stream = cuda_stream(handle());
  87. #define cb(DType) \
  88. if (grad.layout.dtype == DType()) { \
  89. using T = typename DTypeTrait<DType>::ctype; \
  90. run_elemwise<FakeQuantBwdKernOp<T>, T, 2>( \
  91. ele_param, stream, {diff, input, grad, m_param}); \
  92. return; \
  93. }
  94. cb(megdnn::dtype::Float32)
  95. #undef cb
  96. }
  97. void FakeQuantBackwardImpl::exec_noncontig(_megdnn_tensor_in diff,
  98. _megdnn_tensor_in input,
  99. _megdnn_tensor_in scale,
  100. _megdnn_tensor_in zero_point,
  101. _megdnn_tensor_out grad) {
  102. ElemwiseOpParamN<5> ele_param;
  103. ele_param[0] = grad;
  104. ele_param[1] = diff;
  105. ele_param[2] = input;
  106. ele_param[3] = scale;
  107. ele_param[3].layout = ele_param[3].layout.broadcast(input.layout);
  108. ele_param[4] = zero_point;
  109. ele_param[4].layout = ele_param[4].layout.broadcast(input.layout);
  110. ele_param.init_from_given_tensor();
  111. auto m_param = param();
  112. auto stream = cuda_stream(handle());
  113. #define cb(DType) \
  114. if (grad.layout.dtype == DType()) { \
  115. using T = typename DTypeTrait<DType>::ctype; \
  116. run_elemwise<FakeQuantBwdKernOpNonContig<T>, T, 5>(ele_param, stream, \
  117. {m_param}); \
  118. return; \
  119. }
  120. cb(megdnn::dtype::Float32)
  121. #undef cb
  122. }
  123. } // namespace cuda
  124. } // namespace megdnn

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台