You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

opr_impl.cpp 4.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. /**
  2. * \file dnn/src/naive/images2neibs/opr_impl.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "src/naive/images2neibs/opr_impl.h"
  12. #include "src/common/utils.h"
  13. #include "src/naive/handle.h"
  14. #include <cstring>
  15. namespace megdnn {
  16. namespace naive {
  17. template <typename T>
  18. void Images2NeibsForwardImpl::exec_internal(_megdnn_tensor_in src,
  19. _megdnn_tensor_out dst)
  20. {
  21. int N = src.layout.shape[0], C = src.layout.shape[1],
  22. IH = src.layout.shape[2], IW = src.layout.shape[3];
  23. auto sptr = src.ptr<T>();
  24. auto dptr = dst.ptr<T>();
  25. size_t idx = 0;
  26. int window_h = static_cast<int>(param().window_h);
  27. int window_w = static_cast<int>(param().window_w);
  28. int pad_h = static_cast<int>(param().pad_h);
  29. int pad_w = static_cast<int>(param().pad_w);
  30. int stride_h = static_cast<int>(param().stride_h);
  31. int stride_w = static_cast<int>(param().stride_w);
  32. int dilate_h = static_cast<int>(param().dilate_h);
  33. int dilate_w = static_cast<int>(param().dilate_w);
  34. int equ_window_h = dilate_h * (window_h-1) + 1;
  35. int equ_window_w = dilate_w * (window_w-1) + 1;
  36. for (int n = 0; n < N; ++n)
  37. for (int c = 0; c < C; ++c)
  38. {
  39. int ih = -pad_h;
  40. for (; ih+equ_window_h <= IH+pad_h; ih += stride_h) {
  41. int iw = -pad_w;
  42. for (; iw+equ_window_w <= IW+pad_w; iw += stride_w) {
  43. for (int kh = 0; kh < window_h; ++kh)
  44. for (int kw = 0; kw < window_w; ++kw)
  45. {
  46. int ih2 = ih+dilate_h*kh, iw2 = iw+dilate_w*kw;
  47. dptr[idx*window_h*window_w + kh*window_w + kw] =
  48. ih2 >= 0 && ih2 < IH &&
  49. iw2 >= 0 && iw2 < IW ?
  50. sptr[n*C*IH*IW + c*IH*IW + ih2*IW + iw2] : 0.0f;
  51. }
  52. ++idx;
  53. }
  54. }
  55. }
  56. }
  57. void Images2NeibsForwardImpl::exec(_megdnn_tensor_in src,
  58. _megdnn_tensor_out dst,
  59. _megdnn_workspace workspace)
  60. {
  61. check_exec(src.layout, dst.layout, workspace.size);
  62. #define cb(DType) \
  63. if (src.layout.dtype.enumv() == DTypeTrait<DType>::enumv) { \
  64. MEGDNN_DISPATCH_CPU_KERN_OPR( \
  65. exec_internal<typename DTypeTrait<DType>::ctype>(src, dst); \
  66. ); \
  67. return; \
  68. }
  69. MEGDNN_FOREACH_COMPUTING_DTYPE(cb);
  70. #undef cb
  71. megdnn_assert_internal(0);
  72. }
  73. template <typename T>
  74. void Images2NeibsBackwardImpl::exec_internal(_megdnn_tensor_in diff,
  75. _megdnn_tensor_out grad)
  76. {
  77. int N = grad.layout.shape[0], C = grad.layout.shape[1],
  78. IH = grad.layout.shape[2], IW = grad.layout.shape[3];
  79. auto sptr = grad.ptr<T>();
  80. auto dptr = diff.ptr<T>();
  81. size_t idx = 0;
  82. int window_h = static_cast<int>(param().window_h);
  83. int window_w = static_cast<int>(param().window_w);
  84. int pad_h = static_cast<int>(param().pad_h);
  85. int pad_w = static_cast<int>(param().pad_w);
  86. int stride_h = static_cast<int>(param().stride_h);
  87. int stride_w = static_cast<int>(param().stride_w);
  88. int dilate_h = static_cast<int>(param().dilate_h);
  89. int dilate_w = static_cast<int>(param().dilate_w);
  90. int equ_window_h = dilate_h * (window_h-1) + 1;
  91. int equ_window_w = dilate_w * (window_w-1) + 1;
  92. memset(sptr, 0, sizeof(T) * N*C*IH*IW);
  93. for (int n = 0; n < N; ++n)
  94. for (int c = 0; c < C; ++c)
  95. {
  96. int ih = -pad_h;
  97. for (; ih+equ_window_h <= IH+pad_h; ih += stride_h) {
  98. int iw = -pad_w;
  99. for (; iw+equ_window_w <= IW+pad_w; iw += stride_w) {
  100. for (int kh = 0; kh < window_h; ++kh)
  101. for (int kw = 0; kw < window_w; ++kw)
  102. {
  103. int ih2 = ih+dilate_h*kh, iw2 = iw+dilate_w*kw;
  104. if (ih2 >= 0 && ih2 < IH && iw2 >= 0 && iw2 < IW) {
  105. sptr[n*C*IH*IW + c*IH*IW + ih2*IW + iw2] +=
  106. dptr[idx*window_h*window_w + kh*window_w + kw];
  107. }
  108. }
  109. ++idx;
  110. }
  111. }
  112. }
  113. }
  114. void Images2NeibsBackwardImpl::exec(_megdnn_tensor_in diff,
  115. _megdnn_tensor_out grad,
  116. _megdnn_workspace workspace)
  117. {
  118. check_exec(diff.layout, grad.layout, workspace.size);
  119. #define cb(DType) \
  120. if (diff.layout.dtype == DType()) { \
  121. MEGDNN_DISPATCH_CPU_KERN_OPR( \
  122. exec_internal<typename DTypeTrait<DType>::ctype>(diff, grad); \
  123. ); \
  124. return; \
  125. }
  126. MEGDNN_FOREACH_COMPUTING_DTYPE(cb);
  127. #undef cb
  128. megdnn_assert_internal(0);
  129. }
  130. } // namespace naive
  131. } // namespace megdnn
  132. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台