#include "test/arm_common/fixture.h" #include "megdnn/tensor_iter.h" #include "src/common/utils.h" #include "test/common/adaptive_pooling.h" #include "test/common/benchmarker.h" #include "test/common/checker.h" namespace megdnn { namespace test { TEST_F(ARM_COMMON, ADAPTIVE_POOLING_FORWARD_NCHW44) { auto args = adaptive_pooling::get_args_nchw44(); Checker checker(handle()); checker.set_epsilon(1e-4); for (DType dtype : {(DType)dtype::Float32(), (DType)dtype::QuantizedS8(1.0)}) for (auto&& arg : args) { auto param = arg.param; auto src = arg.ishape; auto dst = arg.oshape; checker.set_param(param).set_dtype(0, dtype).set_dtype(1, dtype).exec( TensorShapeArray{src, dst, {}}); } } TEST_F(ARM_COMMON, ADAPTIVE_POOLING_FORWARD) { auto args = adaptive_pooling::get_args(); Checker checker(handle()); checker.set_epsilon(1e-4); for (DType dtype : {(DType)dtype::Float32(), (DType)dtype::QuantizedS8(1.0)}) for (auto&& arg : args) { auto param = arg.param; auto src = arg.ishape; auto dst = arg.oshape; checker.set_param(param).set_dtype(0, dtype).set_dtype(1, dtype).exec( TensorShapeArray{src, dst, {}}); } } #if MEGDNN_WITH_BENCHMARK namespace { void benchmark_globalpooling_nchw44_fp32(Handle* handle) { using Param = param::AdaptivePooling; auto run = [&](size_t n, size_t c, size_t h, size_t w, Param::Mode mode) { Param param; param.format = Param::Format::NCHW; param.mode = mode; TensorShape nchw_shape = {n, c, h, w}; TensorShape nchw_dst_shape = {n, c, 1, 1}; TensorShape nchw44_shape = {n, c / 4, h, w, 4}; TensorShape nchw44_dst_shape = {n, c / 4, 1, 1, 4}; TensorLayout dst_layout; float calc_amount = n * c * h * w; Benchmarker benchmarker_float_nchw(handle); Benchmarker benchmarker_float_nchw44(handle); Benchmarker benchmarker_int_nchw44(handle); size_t RUN = 500; auto t1 = benchmarker_float_nchw.set_display(false) .set_times(RUN) .set_param(param) .exec({nchw_shape, nchw_dst_shape}); param.format = Param::Format::NCHW44; auto t2 = benchmarker_int_nchw44.set_display(false) .set_times(RUN) .set_param(param) .execl({{nchw44_shape, dtype::QuantizedS8(1.0)}, {nchw44_dst_shape, dtype::QuantizedS8(1.0)}}); auto t3 = benchmarker_float_nchw44.set_display(false) .set_times(RUN) .set_param(param) .exec({nchw44_shape, nchw44_dst_shape}); printf("{%zu %zu %zu %zu} \n" "nchw_fp32={%.3f ms, %.3f Mflops}, " "nchw44_int={%.3f ms, %.3f Mflops}, " "nchw44_fp32={%.3f ms, %.3f Mflops, speed_up %f}\n\n", n, c, h, w, t1 / RUN, calc_amount / (t1 / RUN * 1000), t2 / RUN, calc_amount / (t2 / RUN * 1000), t3 / RUN, calc_amount / (t3 / RUN * 1000), t1 / t3); }; run(1, 128, 25, 25, param::AdaptivePooling::Mode::AVERAGE); } } // namespace TEST_F(ARM_COMMON, BENCHMARK_GLOBAL_POOLING_NCHW44_FP32) { benchmark_globalpooling_nchw44_fp32(handle()); } #endif } // namespace test } // namespace megdnn // vim: syntax=cpp.doxygen