You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

group_conv.cpp 8.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. /**
  2. * \file dnn/test/cuda/group_conv.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megdnn/oprs/nn.h"
  12. #include "test/common/benchmarker.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/convolution.h"
  15. #include "test/cuda/fixture.h"
  16. #include "src/cuda/utils.h"
  17. namespace megdnn {
  18. namespace test {
  19. TEST_F(CUDA, GROUP_CONV_FORWARD) {
  20. bool is_int_available = cuda::is_compute_capability_required(6, 1);
  21. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t FH, size_t FW,
  22. size_t OC, size_t /* OH */, size_t /* OW */, size_t PH, size_t PW,
  23. size_t SH, size_t SW, size_t DH, size_t DW, size_t group) {
  24. {
  25. // float case
  26. Checker<Convolution> checker(handle_cuda());
  27. Convolution::Param param;
  28. param.sparse = Convolution::Param::Sparse::GROUP;
  29. param.pad_h = PH;
  30. param.pad_w = PW;
  31. param.stride_h = SH;
  32. param.stride_w = SW;
  33. param.dilate_h = DH;
  34. param.dilate_w = DW;
  35. auto ICg = IC / group;
  36. auto OCg = OC / group;
  37. checker.set_param(param).exec(
  38. {{N, IC, IH, IW}, {group, OCg, ICg, FH, FW}, {}});
  39. }
  40. if (is_int_available) {
  41. // int 8x8x32 case
  42. Checker<Convolution> checker(handle_cuda());
  43. Convolution::Param param;
  44. param.sparse = Convolution::Param::Sparse::GROUP;
  45. param.format = Convolution::Param::Format::NHWC;
  46. param.pad_h = PH;
  47. param.pad_w = PW;
  48. param.stride_h = SH;
  49. param.stride_w = SW;
  50. param.dilate_h = DH;
  51. param.dilate_w = DW;
  52. auto ICg = IC / group;
  53. auto OCg = OC / group;
  54. UniformIntRNG rng(-4, 4);
  55. checker.set_param(param)
  56. .set_dtype(0, dtype::Int8())
  57. .set_dtype(1, dtype::Int8())
  58. .set_dtype(2, dtype::Int32())
  59. .set_rng(0, &rng)
  60. .set_rng(1, &rng)
  61. .exec({{N, IH, IW, IC}, {group, OCg, FH, FW, ICg}, {}});
  62. }
  63. };
  64. // normal case
  65. run(2, 64, 7, 7, 3, 3, 32, 5, 5, 0, 0, 1, 1, 1, 1, 2);
  66. // padded case
  67. run(2, 32, 7, 7, 3, 3, 64, 7, 7, 1, 1, 1, 1, 1, 1, 4);
  68. // strided case
  69. run(2, 32, 7, 7, 3, 3, 64, 3, 3, 0, 0, 2, 2, 1, 1, 8);
  70. // dilated case
  71. run(2, 32, 7, 7, 3, 3, 64, 3, 3, 0, 0, 1, 1, 2, 2, 8);
  72. }
  73. TEST_F(CUDA, GROUP_CONV_FORWARD_1x1) {
  74. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t FH, size_t FW,
  75. size_t OC, size_t group) {
  76. Checker<Convolution> checker(handle_cuda());
  77. std::string conv1x1_name =
  78. ConvBiasForward::algo_name<ConvBias::MatmulParam>("INPLACE_MATMUL", {});
  79. checker.set_before_exec_callback(
  80. AlgoChecker<ConvolutionForward>(ExecutionPolicyAlgoName{
  81. "DEFAULT",
  82. {{ConvBiasForward::algo_name<ConvBiasForward::DirectParam>(
  83. "CUDA:GROUP_CONV", {})
  84. .c_str(),
  85. {{conv1x1_name.c_str(), {}}}}}}));
  86. Convolution::Param param;
  87. param.sparse = Convolution::Param::Sparse::GROUP;
  88. auto ICg = IC / group;
  89. auto OCg = OC / group;
  90. checker.set_param(param).exec({{N, IC, IH, IW}, {group, OCg, ICg, FH, FW}, {}});
  91. };
  92. size_t ic = 192;
  93. for (size_t g = 2; g <= 3; g += 1) {
  94. for (size_t ih = 8; ih <= 128; ih *= 4) {
  95. size_t iw = ih;
  96. run(2, ic, ih, iw, 1, 1, ic / g, g);
  97. run(2, ic, ih + 1, iw + 1, 1, 1, ic / g, g);
  98. }
  99. }
  100. }
  101. TEST_F(CUDA, GROUP_CONV_BACKWARD_DATA) {
  102. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t FH, size_t FW,
  103. size_t OC, size_t OH, size_t OW, size_t PH, size_t PW, size_t SH,
  104. size_t SW, size_t group) {
  105. Checker<ConvolutionBackwardData> checker(handle_cuda());
  106. ConvolutionBackwardData::Param param;
  107. param.sparse = Convolution::Param::Sparse::GROUP;
  108. param.pad_h = PH;
  109. param.pad_w = PW;
  110. param.stride_h = SH;
  111. param.stride_w = SW;
  112. auto ICg = IC / group;
  113. auto OCg = OC / group;
  114. checker.set_param(param).exec(
  115. {{group, OCg, ICg, FH, FW}, {N, OC, OH, OW}, {N, IC, IH, IW}});
  116. };
  117. // normal case
  118. run(2, 64, 7, 7, 3, 3, 32, 5, 5, 0, 0, 1, 1, 2);
  119. // padded case
  120. run(2, 32, 7, 7, 3, 3, 64, 7, 7, 1, 1, 1, 1, 4);
  121. // strided case
  122. run(2, 32, 7, 7, 3, 3, 64, 3, 3, 0, 0, 2, 2, 8);
  123. }
  124. TEST_F(CUDA, GROUP_CONV_BACKWARD_DATA_CUDNN) {
  125. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t FH, size_t FW,
  126. size_t OC, size_t OH, size_t OW, size_t PH, size_t PW, size_t SH,
  127. size_t SW, size_t group) {
  128. Checker<ConvolutionBackwardData> checker(handle_cuda());
  129. checker.set_before_exec_callback(
  130. AlgoChecker<ConvolutionBackwardData>(ExecutionPolicyAlgoName{
  131. "CUDA:GROUP_CONV_BACKWARD_DATA", {{"CUDNN", {}}}}));
  132. ConvolutionBackwardData::Param param;
  133. param.sparse = Convolution::Param::Sparse::GROUP;
  134. param.pad_h = PH;
  135. param.pad_w = PW;
  136. param.stride_h = SH;
  137. param.stride_w = SW;
  138. auto ICg = IC / group;
  139. auto OCg = OC / group;
  140. checker.set_param(param).exec(
  141. {{group, OCg, ICg, FH, FW}, {N, OC, OH, OW}, {N, IC, IH, IW}});
  142. };
  143. // normal case
  144. run(2, 64, 7, 7, 3, 3, 32, 5, 5, 0, 0, 1, 1, 2);
  145. // padded case
  146. run(2, 32, 7, 7, 3, 3, 64, 7, 7, 1, 1, 1, 1, 4);
  147. // strided case
  148. run(2, 32, 7, 7, 3, 3, 64, 3, 3, 0, 0, 2, 2, 8);
  149. }
  150. TEST_F(CUDA, GROUP_CONV_BACKWARD_FILTER) {
  151. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t FH, size_t FW,
  152. size_t OC, size_t OH, size_t OW, size_t PH, size_t PW, size_t SH,
  153. size_t SW, size_t group) {
  154. Checker<ConvolutionBackwardFilter> checker(handle_cuda());
  155. ConvolutionBackwardFilter::Param param;
  156. param.sparse = Convolution::Param::Sparse::GROUP;
  157. param.pad_h = PH;
  158. param.pad_w = PW;
  159. param.stride_h = SH;
  160. param.stride_w = SW;
  161. auto ICg = IC / group;
  162. auto OCg = OC / group;
  163. checker.set_param(param).exec(
  164. {{N, IC, IH, IW}, {N, OC, OH, OW}, {group, OCg, ICg, FH, FW}});
  165. };
  166. // normal case
  167. run(2, 64, 7, 7, 3, 3, 32, 5, 5, 0, 0, 1, 1, 2);
  168. // padded case
  169. run(2, 32, 7, 7, 3, 3, 64, 7, 7, 1, 1, 1, 1, 4);
  170. // strided case
  171. run(2, 32, 7, 7, 3, 3, 64, 3, 3, 0, 0, 2, 2, 8);
  172. }
  173. TEST_F(CUDA, GROUP_CONV_BACKWARD_FILTER_CUDNN) {
  174. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t FH, size_t FW,
  175. size_t OC, size_t OH, size_t OW, size_t PH, size_t PW, size_t SH,
  176. size_t SW, size_t group) {
  177. Checker<ConvolutionBackwardFilter> checker(handle_cuda());
  178. checker.set_before_exec_callback(
  179. AlgoChecker<ConvolutionBackwardFilter>(ExecutionPolicyAlgoName{
  180. "CUDA:GROUP_CONV_BACKWARD_FILTER", {{"CUDNN", {}}}}));
  181. ConvolutionBackwardFilter::Param param;
  182. param.sparse = Convolution::Param::Sparse::GROUP;
  183. param.pad_h = PH;
  184. param.pad_w = PW;
  185. param.stride_h = SH;
  186. param.stride_w = SW;
  187. auto ICg = IC / group;
  188. auto OCg = OC / group;
  189. checker.set_param(param).exec(
  190. {{N, IC, IH, IW}, {N, OC, OH, OW}, {group, OCg, ICg, FH, FW}});
  191. };
  192. // normal case
  193. run(2, 64, 7, 7, 3, 3, 32, 5, 5, 0, 0, 1, 1, 2);
  194. // padded case
  195. run(2, 32, 7, 7, 3, 3, 64, 7, 7, 1, 1, 1, 1, 4);
  196. // strided case
  197. run(2, 32, 7, 7, 3, 3, 64, 3, 3, 0, 0, 2, 2, 8);
  198. }
  199. } // namespace test
  200. } // namespace megdnn
  201. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台