From 17371e79b9e2acfbf09b855b7f835c45dfd7ed8e Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Sat, 4 Sep 2021 13:44:56 +0800 Subject: [PATCH] fix(dnn/reduce): fix reduce_mean o16c32 is incorrect for large tensor GitOrigin-RevId: ebf03d814a893efca9dd9e09bb58001c22093fd4 --- dnn/src/common/reduce_helper.h | 4 ++-- dnn/test/cuda/reduce.cpp | 10 ++++++++++ dnn/test/fallback/reduce.cpp | 9 +++++++++ dnn/test/rocm/reduce.cpp | 10 ++++++++++ 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/dnn/src/common/reduce_helper.h b/dnn/src/common/reduce_helper.h index 08d4fc47..45da96e3 100644 --- a/dnn/src/common/reduce_helper.h +++ b/dnn/src/common/reduce_helper.h @@ -48,10 +48,10 @@ struct MeanOp { src_ctype* src; dst_ctype* dst; const size_t B; - + MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { return src[idx]; } MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { - dst[idx] = val / static_cast(B); + dst[idx] = val / static_cast(B); } static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) { return lhs + rhs; diff --git a/dnn/test/cuda/reduce.cpp b/dnn/test/cuda/reduce.cpp index 7815225a..0f0c2dc8 100644 --- a/dnn/test/cuda/reduce.cpp +++ b/dnn/test/cuda/reduce.cpp @@ -103,6 +103,16 @@ TEST_F(CUDA, REDUCE) { .set_param(param) .execs({{1, 4194304, 1}, {1, 1, 1}}); } + + { + // large reduce_mean for O16C32 + Reduce::Param param{Mode::MEAN, 1, + Reduce::Param::DataType::FLOAT_O16xC32}; + checker.set_dtype(0, dtype::Float16()) + .set_dtype(1, dtype::Float16()) + .set_param(param) + .execs({{1, 65536, 5}, {1, 1, 5}}); + } } // vim: syntax=cpp.doxygen diff --git a/dnn/test/fallback/reduce.cpp b/dnn/test/fallback/reduce.cpp index 303b58de..a0c65962 100644 --- a/dnn/test/fallback/reduce.cpp +++ b/dnn/test/fallback/reduce.cpp @@ -74,6 +74,15 @@ TEST_F(FALLBACK, REDUCE) { Config config(param, dtype, shape); configs.push_back(config); } + + { + // large reduce_mean for O16C32 + TensorShape shape{1, 65536, 5}; + Param param(Mode::MEAN, 1, DataType::FLOAT_O16xC32); + Config config(param, dtype::Float16(), shape); + configs.push_back(config); + } + for (auto&& config : configs) { auto&& dtype = config.dtype; auto&& param = config.param; diff --git a/dnn/test/rocm/reduce.cpp b/dnn/test/rocm/reduce.cpp index 4893aa15..0d22e106 100644 --- a/dnn/test/rocm/reduce.cpp +++ b/dnn/test/rocm/reduce.cpp @@ -103,6 +103,16 @@ TEST_F(ROCM, REDUCE) { .set_param(param) .execs({{1, 4194304, 1}, {1, 1, 1}}); } + + { + // large reduce_mean for O16C32 + Reduce::Param param{Mode::MEAN, 1, + Reduce::Param::DataType::FLOAT_O16xC32}; + checker.set_dtype(0, dtype::Float16()) + .set_dtype(1, dtype::Float16()) + .set_param(param) + .execs({{1, 65536, 5}, {1, 1, 5}}); + } #endif }