From 67f117882b4d75ffdc1c7102c2b98c60d2d0ba50 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Tue, 27 Jul 2021 15:52:59 +0800 Subject: [PATCH] perf(arm_common): add elemwise unary multithread support GitOrigin-RevId: 8eac123f67224e283b368c515bf0b8e7ef565158 --- dnn/src/arm_common/elemwise/unary/algo.cpp | 22 +++++++++++++++++----- dnn/test/arm_common/elemwise.cpp | 7 +++++++ .../cross_build_android_arm_inference.sh | 2 +- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/dnn/src/arm_common/elemwise/unary/algo.cpp b/dnn/src/arm_common/elemwise/unary/algo.cpp index 4b59b7e6..2c947eb0 100644 --- a/dnn/src/arm_common/elemwise/unary/algo.cpp +++ b/dnn/src/arm_common/elemwise/unary/algo.cpp @@ -71,12 +71,19 @@ void ElemwiseImpl::AlgoUnary::exec(const KernParam& kern_param) const { thin_function \ run = OpCallerUnary<_op<_type, _type>, \ BcastType::VEC>::run; \ - MEGDNN_DISPATCH_CPU_KERN( \ + auto kernel = [nr_elems, nr_elems_per_thread, src0, dst_tensor, \ + run](size_t task_id, size_t) { \ + size_t offset = task_id * nr_elems_per_thread; \ + size_t nr_elems_thread = \ + std::min(nr_elems - offset, nr_elems_per_thread); \ + run(static_cast(src0.raw_ptr) + offset, \ + static_cast<_type*>(dst_tensor.raw_ptr) + offset, \ + src0.layout.dtype, dst_tensor.layout.dtype, \ + nr_elems_thread); \ + }; \ + MEGDNN_DISPATCH_MULTI_THREAD_CPU_KERN( \ static_cast(kern_param.handle), \ - run(static_cast(src0.raw_ptr), \ - static_cast<_type*>(dst_tensor.raw_ptr), \ - src0.layout.dtype, dst_tensor.layout.dtype, \ - nr_elems)); \ + nr_threads, kernel); \ } \ MIDOUT_END(); \ return @@ -86,7 +93,12 @@ void ElemwiseImpl::AlgoUnary::exec(const KernParam& kern_param) const { auto& src0 = elparam[0]; auto& dst_tensor = *(kern_param.m_dst); + size_t nr_threads = static_cast(kern_param.handle) + ->megcore_dispatcher() + ->nr_threads(); + size_t nr_elems = src0.layout.total_nr_elems(); + size_t nr_elems_per_thread = (nr_elems + nr_threads - 1) / nr_threads; #define DISPATCH_MODE_FLOAT(_case, _type, _type_midout_id) \ switch (kern_param.mode) { \ diff --git a/dnn/test/arm_common/elemwise.cpp b/dnn/test/arm_common/elemwise.cpp index efedb76f..6f31d389 100644 --- a/dnn/test/arm_common/elemwise.cpp +++ b/dnn/test/arm_common/elemwise.cpp @@ -26,6 +26,13 @@ TYPED_TEST(ARM_ELEMWISE, run) { elemwise::run_test(this->handle()); } +template +class ARM_ELEMWISE_MULTI_THREADS : public ARM_COMMON_MULTI_THREADS {}; +TYPED_TEST_CASE(ARM_ELEMWISE_MULTI_THREADS, elemwise::test_types); +TYPED_TEST(ARM_ELEMWISE_MULTI_THREADS, run) { + elemwise::run_test(this->handle()); +} + TEST_F(ARM_COMMON, ELEMWISE_FORWARD_TERNARY) { using Mode = ElemwiseForward::Param::Mode; Checker checker(handle()); diff --git a/scripts/cmake-build/cross_build_android_arm_inference.sh b/scripts/cmake-build/cross_build_android_arm_inference.sh index 1e360046..1f5236e9 100755 --- a/scripts/cmake-build/cross_build_android_arm_inference.sh +++ b/scripts/cmake-build/cross_build_android_arm_inference.sh @@ -2,7 +2,7 @@ set -e ARCHS=("arm64-v8a" "armeabi-v7a") -BUILD_TYPE=RelWithDebInfo +BUILD_TYPE=Release MGE_ARMV8_2_FEATURE_FP16=OFF MGE_DISABLE_FLOAT16=OFF ARCH=arm64-v8a