You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

adaptive_pooling.cpp 3.7 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. #include "test/arm_common/fixture.h"
  2. #include "megdnn/tensor_iter.h"
  3. #include "src/common/utils.h"
  4. #include "test/common/adaptive_pooling.h"
  5. #include "test/common/benchmarker.h"
  6. #include "test/common/checker.h"
  7. namespace megdnn {
  8. namespace test {
  9. TEST_F(ARM_COMMON, ADAPTIVE_POOLING_FORWARD_NCHW44) {
  10. auto args = adaptive_pooling::get_args_nchw44();
  11. Checker<AdaptivePooling> checker(handle());
  12. checker.set_epsilon(1e-4);
  13. for (DType dtype : {(DType)dtype::Float32(), (DType)dtype::QuantizedS8(1.0)})
  14. for (auto&& arg : args) {
  15. auto param = arg.param;
  16. auto src = arg.ishape;
  17. auto dst = arg.oshape;
  18. checker.set_param(param).set_dtype(0, dtype).set_dtype(1, dtype).exec(
  19. TensorShapeArray{src, dst, {}});
  20. }
  21. }
  22. TEST_F(ARM_COMMON, ADAPTIVE_POOLING_FORWARD) {
  23. auto args = adaptive_pooling::get_args();
  24. Checker<AdaptivePooling> checker(handle());
  25. checker.set_epsilon(1e-4);
  26. for (DType dtype : {(DType)dtype::Float32(), (DType)dtype::QuantizedS8(1.0)})
  27. for (auto&& arg : args) {
  28. auto param = arg.param;
  29. auto src = arg.ishape;
  30. auto dst = arg.oshape;
  31. checker.set_param(param).set_dtype(0, dtype).set_dtype(1, dtype).exec(
  32. TensorShapeArray{src, dst, {}});
  33. }
  34. }
  35. #if MEGDNN_WITH_BENCHMARK
  36. namespace {
  37. void benchmark_globalpooling_nchw44_fp32(Handle* handle) {
  38. using Param = param::AdaptivePooling;
  39. auto run = [&](size_t n, size_t c, size_t h, size_t w, Param::Mode mode) {
  40. Param param;
  41. param.format = Param::Format::NCHW;
  42. param.mode = mode;
  43. TensorShape nchw_shape = {n, c, h, w};
  44. TensorShape nchw_dst_shape = {n, c, 1, 1};
  45. TensorShape nchw44_shape = {n, c / 4, h, w, 4};
  46. TensorShape nchw44_dst_shape = {n, c / 4, 1, 1, 4};
  47. TensorLayout dst_layout;
  48. float calc_amount = n * c * h * w;
  49. Benchmarker<AdaptivePooling> benchmarker_float_nchw(handle);
  50. Benchmarker<AdaptivePooling> benchmarker_float_nchw44(handle);
  51. Benchmarker<AdaptivePooling> benchmarker_int_nchw44(handle);
  52. size_t RUN = 500;
  53. auto t1 = benchmarker_float_nchw.set_display(false)
  54. .set_times(RUN)
  55. .set_param(param)
  56. .exec({nchw_shape, nchw_dst_shape});
  57. param.format = Param::Format::NCHW44;
  58. auto t2 = benchmarker_int_nchw44.set_display(false)
  59. .set_times(RUN)
  60. .set_param(param)
  61. .execl({{nchw44_shape, dtype::QuantizedS8(1.0)},
  62. {nchw44_dst_shape, dtype::QuantizedS8(1.0)}});
  63. auto t3 = benchmarker_float_nchw44.set_display(false)
  64. .set_times(RUN)
  65. .set_param(param)
  66. .exec({nchw44_shape, nchw44_dst_shape});
  67. printf("{%zu %zu %zu %zu} \n"
  68. "nchw_fp32={%.3f ms, %.3f Mflops}, "
  69. "nchw44_int={%.3f ms, %.3f Mflops}, "
  70. "nchw44_fp32={%.3f ms, %.3f Mflops, speed_up %f}\n\n",
  71. n, c, h, w, t1 / RUN, calc_amount / (t1 / RUN * 1000), t2 / RUN,
  72. calc_amount / (t2 / RUN * 1000), t3 / RUN,
  73. calc_amount / (t3 / RUN * 1000), t1 / t3);
  74. };
  75. run(1, 128, 25, 25, param::AdaptivePooling::Mode::AVERAGE);
  76. }
  77. } // namespace
  78. TEST_F(ARM_COMMON, BENCHMARK_GLOBAL_POOLING_NCHW44_FP32) {
  79. benchmark_globalpooling_nchw44_fp32(handle());
  80. }
  81. #endif
  82. } // namespace test
  83. } // namespace megdnn
  84. // vim: syntax=cpp.doxygen