From 95a30eb6f6c934fcf050ba3c93a56c029983a02e Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Thu, 24 Mar 2022 15:14:09 +0800 Subject: [PATCH] perf(imperative): speed up stackmanager guard GitOrigin-RevId: 12d23b6f7ea00f852d3a7d0641bb723ada50cb3a --- .../src/impl/interpreter/interpreter_impl.cpp | 36 ++++++++++++++-------- imperative/src/impl/interpreter/stack_manager.h | 1 + .../src/include/megbrain/imperative/utils/stats.h | 2 +- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/imperative/src/impl/interpreter/interpreter_impl.cpp b/imperative/src/impl/interpreter/interpreter_impl.cpp index 9ee07413..e3d5d059 100644 --- a/imperative/src/impl/interpreter/interpreter_impl.cpp +++ b/imperative/src/impl/interpreter/interpreter_impl.cpp @@ -138,8 +138,11 @@ Interpreter& Interpreter::inst() { Handle ChannelImpl::put(const HostTensorND& value, bool no_cache) { MGB_LOCK_GUARD(m_spin); mgb_assert(check_available(), "Channel already closed"); - auto& state = get_channel_state(); - auto _ = StackManager::Guard{"Put", &state.stack_manager}; + std::optional guard; + if (Profiler::is_profiling()) { + auto& state = get_channel_state(); + guard.emplace("Put", &state.stack_manager); + } auto info = put_impl(value, no_cache); return reinterpret_cast(info); } @@ -183,8 +186,11 @@ Handle ChannelImpl::put(const DeviceTensorND& data, const HostTensorND& hvalue) } TensorInfo* ChannelImpl::put_impl( const DeviceTensorND& data, const HostTensorND& hvalue) { - auto& state = get_channel_state(); - auto _ = StackManager::Guard{"Put", &state.stack_manager}; + std::optional guard; + if (Profiler::is_profiling()) { + auto& state = get_channel_state(); + guard.emplace("Put", &state.stack_manager); + } auto info = alloc(); MGB_RECORD_EVENT(TensorCommandEvent, info->id, TensorCommandKind::Put); constexpr int size_threshold = TensorShape::MAX_NDIM; @@ -253,8 +259,10 @@ void ChannelImpl::dispatch_default_cpu( SmallVector* outputs) { auto& state = get_channel_state(); - auto name = op->trait()->make_name(*op); - auto _ = StackManager::Guard(name, &state.stack_manager); + std::optional guard; + if (Profiler::is_profiling()) { + guard.emplace(op->trait()->make_name(*op), &state.stack_manager); + } auto [output_descs, validated] = OpDef::infer_output_attrs_fallible(*op, input_descs); @@ -329,8 +337,9 @@ void ChannelImpl::dispatch_default_cpu( return op_info; }; MGB_RECORD_EVENT( - OpDispatchEvent, op_id, name, op_info_getter, tinfo_to_tid(input_infos), - tinfo_to_tid(output_infos), state.stack_manager.dump()); + OpDispatchEvent, op_id, guard.value().name(), op_info_getter, + tinfo_to_tid(input_infos), tinfo_to_tid(output_infos), + state.stack_manager.dump()); } void ChannelImpl::dispatch_kernel( @@ -340,8 +349,10 @@ void ChannelImpl::dispatch_kernel( auto& state = get_channel_state(); auto& options = state.options; - auto name = op->trait()->make_name(*op); - auto _ = StackManager::Guard{name, &state.stack_manager}; + std::optional guard; + if (Profiler::is_profiling()) { + guard.emplace(op->trait()->make_name(*op), &state.stack_manager); + } auto [output_descs, validated] = OpDef::infer_output_attrs_fallible(*op, input_descs); @@ -376,8 +387,9 @@ void ChannelImpl::dispatch_kernel( return op_info; }; MGB_RECORD_EVENT( - OpDispatchEvent, cmd.id, name, op_info_getter, tinfo_to_tid(cmd.inputs), - tinfo_to_tid(cmd.outputs), state.stack_manager.dump()); + OpDispatchEvent, cmd.id, guard.value().name(), op_info_getter, + tinfo_to_tid(cmd.inputs), tinfo_to_tid(cmd.outputs), + state.stack_manager.dump()); m_worker.add_task( {Profiler::next_id(), std::move(cmd), get_channel_state().stack_manager.dump()}); diff --git a/imperative/src/impl/interpreter/stack_manager.h b/imperative/src/impl/interpreter/stack_manager.h index 75b1963c..d7ed81db 100644 --- a/imperative/src/impl/interpreter/stack_manager.h +++ b/imperative/src/impl/interpreter/stack_manager.h @@ -98,6 +98,7 @@ public: m_manager->enter(name); } } + std::string name() const { return m_name; } ~Guard() { release(); } void release() { if (m_manager) { diff --git a/imperative/src/include/megbrain/imperative/utils/stats.h b/imperative/src/include/megbrain/imperative/utils/stats.h index 0e82a3f3..48d36171 100644 --- a/imperative/src/include/megbrain/imperative/utils/stats.h +++ b/imperative/src/include/megbrain/imperative/utils/stats.h @@ -186,7 +186,7 @@ inline stats::Timer::Timer(std::string name, bool default_enabled) } #if MGE_ENABLE_STATS -#define MGE_TIMER_SCOPE(name) auto name = Stats::name.time_scope() +#define MGE_TIMER_SCOPE(name) auto name = Stats::name.time_scope_recursive() #define MGE_TIMER_SCOPE_RELEASE(name) name.release() #define MGE_TIMER_SCOPE_ENABLE(name) auto name = Stats::name.enable_scope() #else