From 8fa9a8defae681fe7ed91df6b4d767a89e1b12b9 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Sat, 19 Mar 2022 22:05:07 +0800 Subject: [PATCH] fix(imperative): fix dot-op implement GitOrigin-RevId: b97290e1fc81af7a84a055018f1d04e65956dca4 --- imperative/src/impl/blob_manager_impl.cpp | 2 +- .../src/impl/interpreter/interpreter_impl.cpp | 14 ++-- imperative/src/impl/ops/dot.cpp | 88 ++++++++++++++++++++++ imperative/src/impl/ops/specializations.cpp | 75 ------------------ 4 files changed, 95 insertions(+), 84 deletions(-) create mode 100644 imperative/src/impl/ops/dot.cpp diff --git a/imperative/src/impl/blob_manager_impl.cpp b/imperative/src/impl/blob_manager_impl.cpp index ee43f63f..4d1d1b28 100644 --- a/imperative/src/impl/blob_manager_impl.cpp +++ b/imperative/src/impl/blob_manager_impl.cpp @@ -72,7 +72,7 @@ DeviceTensorND BlobManagerImpl::alloc_workspace_with_defrag( dev_tensor.reset(storage, layout); return dev_tensor; } - MGB_TRY { return alloc_workspace(cn, layout); } + MGB_TRY { dev_tensor = alloc_workspace(cn, layout); } MGB_CATCH(MemAllocError&, { mgb_log_warn("memory allocation failed for workspace; try defragmenting"); defrag(cn); diff --git a/imperative/src/impl/interpreter/interpreter_impl.cpp b/imperative/src/impl/interpreter/interpreter_impl.cpp index 68fbdde9..99f69a58 100644 --- a/imperative/src/impl/interpreter/interpreter_impl.cpp +++ b/imperative/src/impl/interpreter/interpreter_impl.cpp @@ -583,9 +583,7 @@ TensorInfo* ChannelImpl::alloc() { auto& state = get_channel_state(); auto info = [this] { MGB_LOCK_GUARD(m_pool_spin); - auto* ptr = m_pool.alloc_raw(); - new (ptr) TensorInfo(); - return (TensorInfo*)ptr; + return m_pool.alloc(); }(); info->id = Profiler::next_id(); if (Profiler::is_profiling()) { @@ -816,7 +814,8 @@ void ChannelImpl::do_apply_op(const ApplyOp& cmd, std::string reason) { for (auto&& [device, kernel_id] : kernels) { MGB_RECORD_EVENT(KernelLaunchEvent, apply_id, kernel_id, device); MGB_RECORD_EVENT_IF( - profiling_device, RecordDeviceEvent, Timer::record_device(device)); + (Profiler::get_option("profile_device", 0)), RecordDeviceEvent, + Timer::record_device(device)); } // Apply op SmallVector output_descs; @@ -830,7 +829,8 @@ void ChannelImpl::do_apply_op(const ApplyOp& cmd, std::string reason) { // After execute for (auto&& [device, kernel_id] : kernels) { MGB_RECORD_EVENT_IF( - profiling_device, RecordDeviceEvent, Timer::record_device(device)); + (Profiler::get_option("profile_device", 0)), RecordDeviceEvent, + Timer::record_device(device)); MGB_RECORD_EVENT(KernelLaunchFinishEvent, apply_id, kernel_id, device); } // End profiling operator @@ -847,9 +847,7 @@ void ChannelImpl::do_apply_op(const ApplyOp& cmd, std::string reason) { MGB_RECORD_EVENT(OpOutputEvent, output->id); produce_tensor(output, outputs[i]); MGB_RECORD_EVENT(OpOutputFinishEvent, output->id); - if (Profiler::is_profiling()) { - sample_on_device(output->desc.comp_node, false); - } + sample_on_device(output->desc.comp_node, false); } } diff --git a/imperative/src/impl/ops/dot.cpp b/imperative/src/impl/ops/dot.cpp new file mode 100644 index 00000000..58698a71 --- /dev/null +++ b/imperative/src/impl/ops/dot.cpp @@ -0,0 +1,88 @@ +#include "megbrain/imperative/opr_utility.h" +#include "megbrain/imperative/ops/autogen.h" +#include "megbrain/imperative/utils/stats.h" +#include "megbrain/opr/basic_arith.h" +#include "megbrain/opr/blas.h" +#include "megbrain/opr/utility.h" + +#include "../blob_manager_impl.h" +#include "../dnn_op_helper.h" +#include "../op_trait.h" + +namespace mgb { +namespace imperative { + +namespace { +namespace dot { + +auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) { + auto&& op = def.cast_final_safe(); + mgb_assert(inputs.size() == 2); + OperatorNodeConfig config{op.make_name()}; + return opr::Dot::make(inputs[0], inputs[1], config); +} + +SmallVector apply_on_physical_tensor( + const OpDef& def, const SmallVector& inputs, + SmallVector& output_descs, const bool& validated) { + auto comp_node = inputs[0]->comp_node(); + using TensorND = megdnn::TensorND; + SmallVector inp_tensornds; + inp_tensornds.reserve(inputs.size()); + auto&& dnn_opr = opr::intl::create_megdnn_opr(comp_node); + for (unsigned i = 0; i < inputs.size(); ++i) { + auto dnn_ten = inputs[i]->dnn_tensor(); + inp_tensornds.push_back(dnn_ten); + } + TensorLayout oup_layout{inputs[0]->dtype()}; + auto inp1_tensor = inputs[0]->dnn_tensor(); + auto inp2_tensor = inputs[1]->dnn_tensor(); + dnn_opr->deduce_layout(inp1_tensor.layout, inp2_tensor.layout, oup_layout); + + if (inputs[0]->layout().is_empty() || inputs[1]->layout().is_empty()) { + auto fill_opr = opr::intl::create_megdnn_opr(comp_node); + DeviceTensorND out = + BlobManager::inst()->alloc_workspace_with_defrag(comp_node, oup_layout); + fill_opr->param() = 0; + fill_opr->exec(out.as_megdnn(), {}); + return {Tensor::make(out)}; + } + + auto wk_size = dnn_opr->get_workspace_in_bytes( + inp_tensornds[0].layout, inp_tensornds[1].layout, output_descs[0].layout); + + DeviceTensorND out_devtensor = + BlobManager::inst()->alloc_workspace_with_defrag(comp_node, oup_layout); + TensorLayout wk_layout{TensorShape{wk_size}, inputs[0]->dtype()}; + DeviceTensorND workspace = + BlobManager::inst()->alloc_workspace_with_defrag(comp_node, wk_layout); + megdnn::Workspace dnn_wk(workspace.raw_ptr(), wk_size); + + dnn_opr->exec( + inp_tensornds[0], inp_tensornds[1], out_devtensor.as_megdnn(), dnn_wk); + + return {Tensor::make(out_devtensor)}; +} + +std::tuple, bool> infer_output_attrs_fallible( + const OpDef& def, const SmallVector& inputs) { + mgb_assert( + inputs.size() == 2, "Dot expects 2 inputs; got %lu actually", + inputs.size()); + SmallVector dests(1); + dests[0].layout = TensorLayout(TensorShape{1}, inputs[0].layout.dtype); + dests[0].comp_node = inputs[0].comp_node; + bool validated = inputs[0].layout.ndim != 0 && inputs[1].layout.ndim != 0; + return {dests, validated}; +} + +OP_TRAIT_REG(Dot, Dot, mgb::opr::Dot) + .apply_on_var_node(apply_on_var_node) + .infer_output_attrs_fallible(infer_output_attrs_fallible) + .apply_on_physical_tensor(apply_on_physical_tensor) + .fallback(); + +} // namespace dot +} // anonymous namespace +} // namespace imperative +} // namespace mgb \ No newline at end of file diff --git a/imperative/src/impl/ops/specializations.cpp b/imperative/src/impl/ops/specializations.cpp index 995149ec..f1f66eb6 100644 --- a/imperative/src/impl/ops/specializations.cpp +++ b/imperative/src/impl/ops/specializations.cpp @@ -373,81 +373,6 @@ OP_TRAIT_REG(BatchedMatrixMul, BatchedMatrixMul) } // namespace namespace { -namespace dot { -auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) { - auto&& op = def.cast_final_safe(); - mgb_assert(inputs.size() == 2); - OperatorNodeConfig config{op.make_name()}; - return opr::Dot::make(inputs[0], inputs[1], config); -} - -// std::shared_ptr make_from_op_node(cg::OperatorNodeBase* node_) { -// auto* node = &node_->cast_final_safe(); -// return Dot::make(node->param()); -// } - -SmallVector apply_on_physical_tensor( - const OpDef& def, const SmallVector& inputs, - SmallVector& output_descs, const bool& validated) { - auto a = inputs[0]->layout(); - auto comp_node = inputs[0]->comp_node(); - using TensorND = megdnn::TensorND; - SmallVector inp_tensornds; - inp_tensornds.reserve(inputs.size()); - auto dnn_opr = opr::intl::create_megdnn_opr(comp_node); - for (unsigned i = 0; i < inputs.size(); ++i) { - auto dnn_ten = inputs[i]->dnn_tensor(); - inp_tensornds.push_back(dnn_ten); - } - TensorLayout oup_layout{inputs[0]->dtype()}; - auto inp1_tensor = inputs[0]->dnn_tensor(); - auto inp2_tensor = inputs[1]->dnn_tensor(); - dnn_opr->deduce_layout(inp1_tensor.layout, inp2_tensor.layout, oup_layout); - - if (inputs[0]->layout().is_empty() || inputs[1]->layout().is_empty()) { - auto fill_opr = opr::intl::create_megdnn_opr(comp_node); - DeviceTensorND out = - BlobManager::inst()->alloc_workspace_with_defrag(comp_node, oup_layout); - fill_opr->param() = 0; - fill_opr->exec(out.as_megdnn(), {}); - return {Tensor::make(out)}; - } - - auto wk_size = dnn_opr->get_workspace_in_bytes( - inp_tensornds[0].layout, inp_tensornds[1].layout, output_descs[0].layout); - - DeviceTensorND out_devtensor = - BlobManager::inst()->alloc_workspace_with_defrag(comp_node, oup_layout); - TensorLayout wk_layout{TensorShape{wk_size}, inputs[0]->dtype()}; - DeviceTensorND workspace = - BlobManager::inst()->alloc_workspace_with_defrag(comp_node, wk_layout); - megdnn::Workspace dnn_wk(workspace.raw_ptr(), wk_size); - - dnn_opr->exec( - inp_tensornds[0], inp_tensornds[1], out_devtensor.as_megdnn(), dnn_wk); - - return {Tensor::make(out_devtensor)}; -} - -std::tuple, bool> infer_output_attrs_fallible( - const OpDef& def, const SmallVector& inputs) { - auto&& op_def = def.cast_final_safe(); - SmallVector dests(1); - dests[0].layout = TensorLayout(TensorShape{1}, inputs[0].layout.dtype); - dests[0].comp_node = inputs[0].comp_node; - return {dests, true}; -} - -OP_TRAIT_REG(Dot, Dot, opr::Dot) - .apply_on_var_node(apply_on_var_node) - .infer_output_attrs_fallible(infer_output_attrs_fallible) - .apply_on_physical_tensor(apply_on_physical_tensor) - .fallback(); - -} // namespace dot -} // namespace - -namespace { namespace argsort { auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) { auto&& argsort = static_cast(def);