Browse Source

fix(imperative): fix dot-op implement

GitOrigin-RevId: b97290e1fc
tags/v1.9.0
Megvii Engine Team 3 years ago
parent
commit
8fa9a8defa
4 changed files with 95 additions and 84 deletions
  1. +1
    -1
      imperative/src/impl/blob_manager_impl.cpp
  2. +6
    -8
      imperative/src/impl/interpreter/interpreter_impl.cpp
  3. +88
    -0
      imperative/src/impl/ops/dot.cpp
  4. +0
    -75
      imperative/src/impl/ops/specializations.cpp

+ 1
- 1
imperative/src/impl/blob_manager_impl.cpp View File

@@ -72,7 +72,7 @@ DeviceTensorND BlobManagerImpl::alloc_workspace_with_defrag(
dev_tensor.reset(storage, layout); dev_tensor.reset(storage, layout);
return dev_tensor; return dev_tensor;
} }
MGB_TRY { return alloc_workspace(cn, layout); }
MGB_TRY { dev_tensor = alloc_workspace(cn, layout); }
MGB_CATCH(MemAllocError&, { MGB_CATCH(MemAllocError&, {
mgb_log_warn("memory allocation failed for workspace; try defragmenting"); mgb_log_warn("memory allocation failed for workspace; try defragmenting");
defrag(cn); defrag(cn);


+ 6
- 8
imperative/src/impl/interpreter/interpreter_impl.cpp View File

@@ -583,9 +583,7 @@ TensorInfo* ChannelImpl::alloc() {
auto& state = get_channel_state(); auto& state = get_channel_state();
auto info = [this] { auto info = [this] {
MGB_LOCK_GUARD(m_pool_spin); MGB_LOCK_GUARD(m_pool_spin);
auto* ptr = m_pool.alloc_raw();
new (ptr) TensorInfo();
return (TensorInfo*)ptr;
return m_pool.alloc();
}(); }();
info->id = Profiler::next_id(); info->id = Profiler::next_id();
if (Profiler::is_profiling()) { if (Profiler::is_profiling()) {
@@ -816,7 +814,8 @@ void ChannelImpl::do_apply_op(const ApplyOp& cmd, std::string reason) {
for (auto&& [device, kernel_id] : kernels) { for (auto&& [device, kernel_id] : kernels) {
MGB_RECORD_EVENT(KernelLaunchEvent, apply_id, kernel_id, device); MGB_RECORD_EVENT(KernelLaunchEvent, apply_id, kernel_id, device);
MGB_RECORD_EVENT_IF( MGB_RECORD_EVENT_IF(
profiling_device, RecordDeviceEvent, Timer::record_device(device));
(Profiler::get_option("profile_device", 0)), RecordDeviceEvent,
Timer::record_device(device));
} }
// Apply op // Apply op
SmallVector<LogicalTensorDesc> output_descs; SmallVector<LogicalTensorDesc> output_descs;
@@ -830,7 +829,8 @@ void ChannelImpl::do_apply_op(const ApplyOp& cmd, std::string reason) {
// After execute // After execute
for (auto&& [device, kernel_id] : kernels) { for (auto&& [device, kernel_id] : kernels) {
MGB_RECORD_EVENT_IF( MGB_RECORD_EVENT_IF(
profiling_device, RecordDeviceEvent, Timer::record_device(device));
(Profiler::get_option("profile_device", 0)), RecordDeviceEvent,
Timer::record_device(device));
MGB_RECORD_EVENT(KernelLaunchFinishEvent, apply_id, kernel_id, device); MGB_RECORD_EVENT(KernelLaunchFinishEvent, apply_id, kernel_id, device);
} }
// End profiling operator // End profiling operator
@@ -847,9 +847,7 @@ void ChannelImpl::do_apply_op(const ApplyOp& cmd, std::string reason) {
MGB_RECORD_EVENT(OpOutputEvent, output->id); MGB_RECORD_EVENT(OpOutputEvent, output->id);
produce_tensor(output, outputs[i]); produce_tensor(output, outputs[i]);
MGB_RECORD_EVENT(OpOutputFinishEvent, output->id); MGB_RECORD_EVENT(OpOutputFinishEvent, output->id);
if (Profiler::is_profiling()) {
sample_on_device(output->desc.comp_node, false);
}
sample_on_device(output->desc.comp_node, false);
} }
} }




+ 88
- 0
imperative/src/impl/ops/dot.cpp View File

@@ -0,0 +1,88 @@
#include "megbrain/imperative/opr_utility.h"
#include "megbrain/imperative/ops/autogen.h"
#include "megbrain/imperative/utils/stats.h"
#include "megbrain/opr/basic_arith.h"
#include "megbrain/opr/blas.h"
#include "megbrain/opr/utility.h"

#include "../blob_manager_impl.h"
#include "../dnn_op_helper.h"
#include "../op_trait.h"

namespace mgb {
namespace imperative {

namespace {
namespace dot {

auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
auto&& op = def.cast_final_safe<Dot>();
mgb_assert(inputs.size() == 2);
OperatorNodeConfig config{op.make_name()};
return opr::Dot::make(inputs[0], inputs[1], config);
}

SmallVector<TensorPtr> apply_on_physical_tensor(
const OpDef& def, const SmallVector<TensorPtr>& inputs,
SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) {
auto comp_node = inputs[0]->comp_node();
using TensorND = megdnn::TensorND;
SmallVector<TensorND> inp_tensornds;
inp_tensornds.reserve(inputs.size());
auto&& dnn_opr = opr::intl::create_megdnn_opr<megdnn::Dot>(comp_node);
for (unsigned i = 0; i < inputs.size(); ++i) {
auto dnn_ten = inputs[i]->dnn_tensor();
inp_tensornds.push_back(dnn_ten);
}
TensorLayout oup_layout{inputs[0]->dtype()};
auto inp1_tensor = inputs[0]->dnn_tensor();
auto inp2_tensor = inputs[1]->dnn_tensor();
dnn_opr->deduce_layout(inp1_tensor.layout, inp2_tensor.layout, oup_layout);

if (inputs[0]->layout().is_empty() || inputs[1]->layout().is_empty()) {
auto fill_opr = opr::intl::create_megdnn_opr<megdnn::Fill>(comp_node);
DeviceTensorND out =
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, oup_layout);
fill_opr->param() = 0;
fill_opr->exec(out.as_megdnn(), {});
return {Tensor::make(out)};
}

auto wk_size = dnn_opr->get_workspace_in_bytes(
inp_tensornds[0].layout, inp_tensornds[1].layout, output_descs[0].layout);

DeviceTensorND out_devtensor =
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, oup_layout);
TensorLayout wk_layout{TensorShape{wk_size}, inputs[0]->dtype()};
DeviceTensorND workspace =
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, wk_layout);
megdnn::Workspace dnn_wk(workspace.raw_ptr(), wk_size);

dnn_opr->exec(
inp_tensornds[0], inp_tensornds[1], out_devtensor.as_megdnn(), dnn_wk);

return {Tensor::make(out_devtensor)};
}

std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) {
mgb_assert(
inputs.size() == 2, "Dot expects 2 inputs; got %lu actually",
inputs.size());
SmallVector<LogicalTensorDesc> dests(1);
dests[0].layout = TensorLayout(TensorShape{1}, inputs[0].layout.dtype);
dests[0].comp_node = inputs[0].comp_node;
bool validated = inputs[0].layout.ndim != 0 && inputs[1].layout.ndim != 0;
return {dests, validated};
}

OP_TRAIT_REG(Dot, Dot, mgb::opr::Dot)
.apply_on_var_node(apply_on_var_node)
.infer_output_attrs_fallible(infer_output_attrs_fallible)
.apply_on_physical_tensor(apply_on_physical_tensor)
.fallback();

} // namespace dot
} // anonymous namespace
} // namespace imperative
} // namespace mgb

+ 0
- 75
imperative/src/impl/ops/specializations.cpp View File

@@ -373,81 +373,6 @@ OP_TRAIT_REG(BatchedMatrixMul, BatchedMatrixMul)
} // namespace } // namespace


namespace { namespace {
namespace dot {
auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
auto&& op = def.cast_final_safe<Dot>();
mgb_assert(inputs.size() == 2);
OperatorNodeConfig config{op.make_name()};
return opr::Dot::make(inputs[0], inputs[1], config);
}

// std::shared_ptr<OpDef> make_from_op_node(cg::OperatorNodeBase* node_) {
// auto* node = &node_->cast_final_safe<opr::Dot>();
// return Dot::make(node->param());
// }

SmallVector<TensorPtr> apply_on_physical_tensor(
const OpDef& def, const SmallVector<TensorPtr>& inputs,
SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) {
auto a = inputs[0]->layout();
auto comp_node = inputs[0]->comp_node();
using TensorND = megdnn::TensorND;
SmallVector<TensorND> inp_tensornds;
inp_tensornds.reserve(inputs.size());
auto dnn_opr = opr::intl::create_megdnn_opr<megdnn::Dot>(comp_node);
for (unsigned i = 0; i < inputs.size(); ++i) {
auto dnn_ten = inputs[i]->dnn_tensor();
inp_tensornds.push_back(dnn_ten);
}
TensorLayout oup_layout{inputs[0]->dtype()};
auto inp1_tensor = inputs[0]->dnn_tensor();
auto inp2_tensor = inputs[1]->dnn_tensor();
dnn_opr->deduce_layout(inp1_tensor.layout, inp2_tensor.layout, oup_layout);

if (inputs[0]->layout().is_empty() || inputs[1]->layout().is_empty()) {
auto fill_opr = opr::intl::create_megdnn_opr<megdnn::Fill>(comp_node);
DeviceTensorND out =
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, oup_layout);
fill_opr->param() = 0;
fill_opr->exec(out.as_megdnn(), {});
return {Tensor::make(out)};
}

auto wk_size = dnn_opr->get_workspace_in_bytes(
inp_tensornds[0].layout, inp_tensornds[1].layout, output_descs[0].layout);

DeviceTensorND out_devtensor =
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, oup_layout);
TensorLayout wk_layout{TensorShape{wk_size}, inputs[0]->dtype()};
DeviceTensorND workspace =
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, wk_layout);
megdnn::Workspace dnn_wk(workspace.raw_ptr(), wk_size);

dnn_opr->exec(
inp_tensornds[0], inp_tensornds[1], out_devtensor.as_megdnn(), dnn_wk);

return {Tensor::make(out_devtensor)};
}

std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) {
auto&& op_def = def.cast_final_safe<Dot>();
SmallVector<LogicalTensorDesc> dests(1);
dests[0].layout = TensorLayout(TensorShape{1}, inputs[0].layout.dtype);
dests[0].comp_node = inputs[0].comp_node;
return {dests, true};
}

OP_TRAIT_REG(Dot, Dot, opr::Dot)
.apply_on_var_node(apply_on_var_node)
.infer_output_attrs_fallible(infer_output_attrs_fallible)
.apply_on_physical_tensor(apply_on_physical_tensor)
.fallback();

} // namespace dot
} // namespace

namespace {
namespace argsort { namespace argsort {
auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) { auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
auto&& argsort = static_cast<const Argsort&>(def); auto&& argsort = static_cast<const Argsort&>(def);


Loading…
Cancel
Save