You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convolution.cpp 8.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. #include "test/cpu/fixture.h"
  2. #include "test/common/benchmarker.h"
  3. #include "test/common/checker.h"
  4. #include "test/common/convolution.h"
  5. using namespace megdnn;
  6. using namespace test;
  7. namespace {
  8. Convolution::Param gconv_param(Convolution::Param p) {
  9. p.sparse = Convolution::Param::Sparse::GROUP;
  10. return p;
  11. }
  12. } // anonymous namespace
  13. #define CONVOLUTION_ARG_DIV_SIZE 100
  14. TEST_F(CPU, CONVOLUTION_0) {
  15. using namespace convolution;
  16. std::vector<TestArg> args = get_args();
  17. auto loop_size = args.size();
  18. ASSERT_GT(loop_size, CONVOLUTION_ARG_DIV_SIZE);
  19. Checker<Convolution> checker(handle());
  20. for (unsigned int i = 0; i < CONVOLUTION_ARG_DIV_SIZE; i++) {
  21. checker.set_param(args[i].param).execs({args[i].src, args[i].filter, {}});
  22. }
  23. }
  24. #define CONVOLUTION1_ARG_LOOP_END_TIME (CONVOLUTION_ARG_DIV_SIZE + 205)
  25. TEST_F(CPU, CONVOLUTION_1) {
  26. using namespace convolution;
  27. std::vector<TestArg> args = get_args();
  28. auto loop_size = args.size();
  29. ASSERT_GT(loop_size, CONVOLUTION_ARG_DIV_SIZE);
  30. ASSERT_GT(loop_size, CONVOLUTION1_ARG_LOOP_END_TIME);
  31. Checker<Convolution> checker(handle());
  32. for (unsigned int i = CONVOLUTION_ARG_DIV_SIZE; i < CONVOLUTION1_ARG_LOOP_END_TIME;
  33. i++) {
  34. checker.set_param(args[i].param).execs({args[i].src, args[i].filter, {}});
  35. }
  36. }
  37. #define CONVOLUTION2_ARG_LOOP_END_TIME (CONVOLUTION1_ARG_LOOP_END_TIME + 200)
  38. TEST_F(CPU, CONVOLUTION_2) {
  39. using namespace convolution;
  40. std::vector<TestArg> args = get_args();
  41. auto loop_size = args.size();
  42. ASSERT_GT(loop_size, CONVOLUTION2_ARG_LOOP_END_TIME);
  43. Checker<Convolution> checker(handle());
  44. for (unsigned int i = CONVOLUTION1_ARG_LOOP_END_TIME;
  45. i < CONVOLUTION2_ARG_LOOP_END_TIME; i++) {
  46. checker.set_param(args[i].param).execs({args[i].src, args[i].filter, {}});
  47. }
  48. }
  49. TEST_F(CPU, CONVOLUTION_3) {
  50. using namespace convolution;
  51. std::vector<TestArg> args = get_args();
  52. auto loop_size = args.size();
  53. ASSERT_GT(loop_size, CONVOLUTION2_ARG_LOOP_END_TIME);
  54. Checker<Convolution> checker(handle());
  55. for (unsigned int i = CONVOLUTION2_ARG_LOOP_END_TIME; i < loop_size; i++) {
  56. checker.set_param(args[i].param).execs({args[i].src, args[i].filter, {}});
  57. }
  58. }
  59. #undef CONVOLUTION_ARG_DIV_SIZE
  60. #undef CONVOLUTION1_ARG_LOOP_END_TIME
  61. #undef CONVOLUTION2_ARG_LOOP_END_TIME
  62. #define CB_CONV_CONFIG_COMBINATIONS(KSIZE) \
  63. TEST_F(CPU, CONV_CONFIG_COMBINATIONS_KSIZE_1_KSIZE_##KSIZE) { \
  64. convolution::test_conv_config_combinations( \
  65. KSIZE, handle(), true, false, false); \
  66. }
  67. // FIXME: only test ksize=1, will crash on IOS, so we tmp test ksize_1##other_ksize
  68. CB_CONV_CONFIG_COMBINATIONS(2);
  69. CB_CONV_CONFIG_COMBINATIONS(3);
  70. CB_CONV_CONFIG_COMBINATIONS(5);
  71. #undef CB_CONV_CONFIG_COMBINATIONS
  72. #if MEGDNN_WITH_BENCHMARK
  73. TEST_F(CPU, BENCHMARK_CONVOLUTION) {
  74. using TestArg = convolution::TestArg;
  75. using Param = param::Convolution;
  76. std::vector<TestArg> args;
  77. // case 1: detection-like (padding x stride x kernel_size)
  78. // clang-format off
  79. for (size_t has_pad = 0; has_pad < 2; ++has_pad)
  80. for (uint32_t stride = 1; stride <= 2; ++stride)
  81. for (std::pair<size_t, size_t> kersize :
  82. std::vector<std::pair<size_t, size_t>>{
  83. {2, 2}, {3, 3}, {5, 5}, {7, 7}}) {
  84. uint32_t pad_h, pad_w;
  85. if (has_pad)
  86. pad_h = kersize.first / 2;
  87. else
  88. pad_h = 0;
  89. if (has_pad)
  90. pad_w = kersize.second / 2;
  91. else
  92. pad_w = 0;
  93. auto param = Param{Param::Mode::CROSS_CORRELATION, pad_h, pad_w,
  94. stride, stride};
  95. {
  96. auto arg = TestArg{param,
  97. {2, 3, 320, 240},
  98. {4, 3, kersize.first, kersize.second}};
  99. args.push_back(arg);
  100. }
  101. }
  102. // clang-format on
  103. Checker<Convolution> checker(handle());
  104. checker.set_perf_check(true).set_perf_check_threshold(2.0);
  105. for (auto&& arg : args) {
  106. checker.set_param(arg.param).execs({arg.src, arg.filter, {}});
  107. }
  108. }
  109. #endif
  110. TEST_F(CPU, CHANWISE_CONVOLUTION) {
  111. constexpr auto M = Convolution::Mode::CROSS_CORRELATION;
  112. Checker<Convolution> checker(handle());
  113. checker.set_param(gconv_param({M, 0, 0, 1, 1}))
  114. .execs({{1, 1, 2, 2}, {1, 1, 1, 2, 2}, {}})
  115. .execs({{1, 1, 5, 5}, {1, 1, 1, 2, 2}, {}})
  116. .execs({{2, 2, 5, 5}, {2, 3, 1, 2, 2}, {2, 6, 4, 4}});
  117. checker.set_param(gconv_param({M, 1, 1, 1, 1}))
  118. .execs({{2, 2, 5, 5}, {2, 1, 1, 2, 2}, {}});
  119. checker.set_param(gconv_param({M, 2, 3, 2, 1}))
  120. .execs({{32, 12, 20, 10}, {12, 2, 1, 4, 5}, {}});
  121. // padding larger than kern
  122. checker.set_param(gconv_param({M, 20, 30, 4, 5}))
  123. .execs({{32, 12, 20, 10}, {12, 2, 1, 4, 5}, {}});
  124. }
  125. TEST_F(CPU, CHANWISE_CONVOLUTION_INT8_INT8_INT16) {
  126. constexpr auto M = Convolution::Mode::CROSS_CORRELATION;
  127. Checker<Convolution> checker(handle());
  128. checker.set_dtype(0, dtype::Int8());
  129. checker.set_dtype(1, dtype::Int8());
  130. checker.set_dtype(2, dtype::Int16());
  131. checker.set_param(gconv_param({M, 0, 0, 1, 1, 1, 1}))
  132. .execs({{1, 1, 2, 2}, {1, 1, 1, 2, 2}, {}})
  133. .execs({{1, 1, 5, 5}, {1, 1, 1, 2, 2}, {}})
  134. .execs({{2, 2, 5, 5}, {2, 3, 1, 2, 2}, {2, 6, 4, 4}});
  135. checker.set_param(gconv_param({M, 1, 1, 1, 1, 1, 1}))
  136. .execs({{2, 2, 5, 5}, {2, 1, 1, 2, 2}, {}});
  137. checker.set_param(gconv_param({M, 2, 3, 2, 1, 1, 1}))
  138. .execs({{32, 12, 20, 10}, {12, 2, 1, 4, 5}, {}});
  139. // padding larger than kern
  140. checker.set_param(gconv_param({M, 20, 30, 4, 5, 1, 1}))
  141. .execs({{32, 12, 20, 10}, {12, 2, 1, 4, 5}, {}});
  142. // clang-format off
  143. for (uint32_t s : {1, 2})
  144. for (uint32_t p : {0, 1})
  145. for (size_t kh : {2, 3, 5})
  146. for (size_t kw : {kh, kh + 1})
  147. for (size_t ic : {5})
  148. for (size_t oc : {3})
  149. for (size_t h = 20; h <= 60; h += 7)
  150. for (size_t w : {h, h + 1}) {
  151. checker.set_param(gconv_param({M, p, p, s, s, 1, 1}))
  152. .execs({{2, ic, h, w}, {ic, oc, 1, kh, kw}, {}});
  153. }
  154. // clang-format on
  155. }
  156. TEST_F(CPU, GROUP_CONV) {
  157. auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t FH, size_t FW,
  158. size_t OC, size_t /* OH */, size_t /* OW */, size_t PH, size_t PW,
  159. size_t SH, size_t SW, size_t group) {
  160. Checker<Convolution> checker(handle());
  161. Convolution::Param param;
  162. param.pad_h = PH;
  163. param.pad_w = PW;
  164. param.stride_h = SH;
  165. param.stride_w = SW;
  166. auto ICg = IC / group;
  167. auto OCg = OC / group;
  168. checker.set_param(gconv_param(param))
  169. .exec({{N, IC, IH, IW}, {group, OCg, ICg, FH, FW}, {}});
  170. };
  171. // normal case
  172. run(2, 64, 7, 7, 3, 3, 32, 5, 5, 0, 0, 1, 1, 1);
  173. // padded case
  174. run(2, 32, 7, 7, 3, 3, 64, 7, 7, 1, 1, 1, 1, 4);
  175. // strided case
  176. run(2, 32, 7, 7, 3, 3, 64, 3, 3, 0, 0, 2, 2, 8);
  177. }
  178. #if MEGDNN_WITH_BENCHMARK
  179. TEST_F(CPU, BENCHMARK_7X7_CONVOLUTION) {
  180. using Param = param::Convolution;
  181. auto run = [&](const TensorShapeArray& shapes, Param param) {
  182. auto handle_naive = create_cpu_handle(2);
  183. Benchmarker<Convolution> benchmarker_naive(handle_naive.get());
  184. Benchmarker<Convolution> benchmarker_float(handle());
  185. size_t RUN = 10;
  186. auto tfloat = benchmarker_float.set_display(false)
  187. .set_times(RUN)
  188. .set_param(param)
  189. .exec(shapes);
  190. auto tnaive = benchmarker_naive.set_display(false)
  191. .set_times(RUN)
  192. .set_param(param)
  193. .exec(shapes);
  194. printf("src: %s filter: %s dst: %s naive=%.3fms float=%.3fms\n",
  195. shapes[0].to_string().c_str(), shapes[1].to_string().c_str(),
  196. shapes[2].to_string().c_str(), tnaive / RUN, tfloat / RUN);
  197. };
  198. Param param;
  199. param.stride_h = 2;
  200. param.stride_w = 2;
  201. param.pad_h = 3;
  202. param.pad_w = 3;
  203. // clang-format off
  204. for (size_t ic : {1, 3, 8, 16, 24}) {
  205. for (size_t oc : {8, 16}) {
  206. for (size_t h : {128, 224, 256, 512}) {
  207. for (size_t w : {128, 224, 256, 512}) {
  208. run({{1, ic, h, w}, {oc, ic, 7, 7}, {1, oc, h / 2, w / 2}}, param);
  209. } } } }
  210. // clang-format on
  211. // Used in FaceModel
  212. // run({{2, 3, 512, 512}, {8, 3, 7, 7}, {2, 8, 256, 256}}, param);
  213. // run({{2, 3, 128, 128}, {16, 3, 7, 7}, {2, 16, 64, 64}}, param);
  214. // run({{2, 3, 224, 224}, {32, 3, 7, 7}, {2, 32, 112, 112}}, param);
  215. }
  216. #endif
  217. // vim: syntax=cpp.doxygen