GitOrigin-RevId: 1b438fc436
HuaHua404-patch-1
@@ -27,10 +27,16 @@ struct DnnOprCaller { | |||||
return mgb::opr::intl::create_megdnn_opr<Opr>(cn); | return mgb::opr::intl::create_megdnn_opr<Opr>(cn); | ||||
} | } | ||||
megdnn::Workspace create_workspace(TensorLayout layout) { | |||||
dev_tensor = Tensor::make(layout, cn)->dev_tensor(); | |||||
workspace = | |||||
megdnn::Workspace(dev_tensor.raw_ptr(), dev_tensor.storage().size()); | |||||
Workspace create_workspace(size_t sz) { | |||||
if (workspace.raw_ptr) { | |||||
mgb_throw(MegBrainError, "workspace should not be applicated many times"); | |||||
} | |||||
if (sz) { | |||||
TensorLayout layout({sz}, dtype::Byte()); | |||||
dev_tensor = Tensor::make(layout, cn)->dev_tensor(); | |||||
workspace = megdnn::Workspace( | |||||
dev_tensor.raw_ptr(), dev_tensor.storage().size()); | |||||
} | |||||
return workspace; | return workspace; | ||||
} | } | ||||
@@ -135,21 +135,16 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
param.window_w = IW - (OW - 1) * param.stride_w; | param.window_w = IW - (OW - 1) * param.stride_w; | ||||
TensorND src = inputs[0]->dnn_tensor(); | TensorND src = inputs[0]->dnn_tensor(); | ||||
DeviceTensorND dst = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(cn, dst_layout); | |||||
auto dst = Tensor::make(dst_layout, cn); | |||||
size_t sz = setup_algo<megdnn::Pooling>( | size_t sz = setup_algo<megdnn::Pooling>( | ||||
{src_layout, dst_layout}, dnn_opr.op.get(), 0, false, false, cn, | {src_layout, dst_layout}, dnn_opr.op.get(), 0, false, false, cn, | ||||
::megdnn::param::ExecutionPolicy{}, false); | ::megdnn::param::ExecutionPolicy{}, false); | ||||
megdnn::Workspace dnn_wk; | |||||
if (sz) { | |||||
TensorLayout w_layout({sz}, dtype::Byte()); | |||||
dnn_wk = dnn_opr.create_workspace(w_layout); | |||||
} | |||||
dnn_opr.op->exec(src, dst.as_megdnn(), dnn_wk); | |||||
auto dnn_wk = dnn_opr.create_workspace(sz); | |||||
dnn_opr.op->exec(src, dst->dnn_tensor(), dnn_wk); | |||||
return {Tensor::make(dst)}; | |||||
return {dst}; | |||||
} | } | ||||
OP_TRAIT_REG(AdaptivePooling, AdaptivePooling) | OP_TRAIT_REG(AdaptivePooling, AdaptivePooling) | ||||
@@ -160,10 +160,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
bool empty_input = src_layout.is_empty(); | bool empty_input = src_layout.is_empty(); | ||||
size_t nr_inp = inputs.size(); | size_t nr_inp = inputs.size(); | ||||
DeviceTensorND reserve; | |||||
size_t sz = 0, rsz = 0; | size_t sz = 0, rsz = 0; | ||||
TensorLayout w_layout({sz}, dtype::Byte()); | |||||
TensorLayout r_layout({rsz}, dtype::Byte()); | TensorLayout r_layout({rsz}, dtype::Byte()); | ||||
if (!empty_input) { | if (!empty_input) { | ||||
@@ -172,79 +170,71 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
src_layout, src_layout, src_layout); | src_layout, src_layout, src_layout); | ||||
rsz = dnn_opr.op->get_reserve_in_bytes(src_layout); | rsz = dnn_opr.op->get_reserve_in_bytes(src_layout); | ||||
w_layout = TensorLayout({sz}, dtype::Byte()); | |||||
r_layout = TensorLayout({rsz}, dtype::Byte()); | r_layout = TensorLayout({rsz}, dtype::Byte()); | ||||
} | } | ||||
auto dnn_wk = dnn_opr.create_workspace(w_layout); | |||||
reserve = BlobManager::inst()->alloc_workspace_with_defrag(comp_node, r_layout); | |||||
auto dnn_wk = dnn_opr.create_workspace(sz); | |||||
auto reserve = Tensor::make(r_layout, comp_node); | |||||
// alloc memory | // alloc memory | ||||
DeviceTensorND y = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, src_layout); | |||||
auto y = Tensor::make(src_layout, comp_node); | |||||
DeviceTensorND save_mean = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, scale_layout); | |||||
DeviceTensorND save_variance = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, scale_layout); | |||||
auto save_mean = Tensor::make(scale_layout, comp_node); | |||||
auto save_variance = Tensor::make(scale_layout, comp_node); | |||||
if (op_def.fwd_mode == ::megdnn::param::BN::FwdMode::INFERENCE) { | if (op_def.fwd_mode == ::megdnn::param::BN::FwdMode::INFERENCE) { | ||||
if (!empty_input) | if (!empty_input) | ||||
dnn_opr.op->exec( | dnn_opr.op->exec( | ||||
inp_tensornds[0], inp_tensornds[1], inp_tensornds[2], | inp_tensornds[0], inp_tensornds[1], inp_tensornds[2], | ||||
inp_tensornds[3], inp_tensornds[4], save_mean.as_megdnn(), | |||||
save_variance.as_megdnn(), reserve.as_megdnn(), y.as_megdnn(), | |||||
inp_tensornds[3], inp_tensornds[4], save_mean->dnn_tensor(), | |||||
save_variance->dnn_tensor(), reserve->dnn_tensor(), y->dnn_tensor(), | |||||
dnn_wk); | dnn_wk); | ||||
return {inputs[3], inputs[4], Tensor::make(reserve), Tensor::make(y)}; | |||||
return {inputs[3], inputs[4], reserve, y}; | |||||
} else { | } else { | ||||
DeviceTensorND mean, variance; | |||||
if (nr_inp == 5) { | if (nr_inp == 5) { | ||||
mean = BlobManager::inst()->alloc_workspace_with_defrag( | |||||
comp_node, scale_layout); | |||||
variance = BlobManager::inst()->alloc_workspace_with_defrag( | |||||
comp_node, scale_layout); | |||||
auto mean = Tensor::make(scale_layout, comp_node); | |||||
auto variance = Tensor::make(scale_layout, comp_node); | |||||
megdnn::RefPtr src_ptr1( | megdnn::RefPtr src_ptr1( | ||||
inp_tensornds[3].get_ref_ptr().get_ptr(), inputs[3]->offset()); | inp_tensornds[3].get_ref_ptr().get_ptr(), inputs[3]->offset()); | ||||
megdnn::RefPtr dst_ptr1( | megdnn::RefPtr dst_ptr1( | ||||
mean.storage().get_ref_ptr(), mean.storage().offset(), false); | |||||
mean->dev_tensor().storage().get_ref_ptr(), | |||||
mean->dev_tensor().storage().offset(), false); | |||||
comp_node.peer_copy_to_ref( | comp_node.peer_copy_to_ref( | ||||
comp_node, dst_ptr1, src_ptr1, scale_layout.span().high_byte); | comp_node, dst_ptr1, src_ptr1, scale_layout.span().high_byte); | ||||
megdnn::RefPtr src_ptr2( | megdnn::RefPtr src_ptr2( | ||||
inp_tensornds[4].get_ref_ptr().get_ptr(), inputs[4]->offset()); | inp_tensornds[4].get_ref_ptr().get_ptr(), inputs[4]->offset()); | ||||
megdnn::RefPtr dst_ptr2( | megdnn::RefPtr dst_ptr2( | ||||
variance.storage().get_ref_ptr(), variance.storage().offset(), | |||||
false); | |||||
variance->dev_tensor().storage().get_ref_ptr(), | |||||
variance->dev_tensor().storage().offset(), false); | |||||
comp_node.peer_copy_to_ref( | comp_node.peer_copy_to_ref( | ||||
comp_node, dst_ptr2, src_ptr2, scale_layout.span().high_byte); | comp_node, dst_ptr2, src_ptr2, scale_layout.span().high_byte); | ||||
if (!empty_input) | if (!empty_input) | ||||
dnn_opr.op->exec( | dnn_opr.op->exec( | ||||
inp_tensornds[0], inp_tensornds[1], inp_tensornds[2], | inp_tensornds[0], inp_tensornds[1], inp_tensornds[2], | ||||
mean.as_megdnn(), variance.as_megdnn(), save_mean.as_megdnn(), | |||||
save_variance.as_megdnn(), reserve.as_megdnn(), y.as_megdnn(), | |||||
dnn_wk); | |||||
mean->dnn_tensor(), variance->dnn_tensor(), | |||||
save_mean->dnn_tensor(), save_variance->dnn_tensor(), | |||||
reserve->dnn_tensor(), y->dnn_tensor(), dnn_wk); | |||||
return {Tensor::make(mean), Tensor::make(variance), | |||||
Tensor::make(save_mean), Tensor::make(save_variance), | |||||
Tensor::make(reserve), Tensor::make(y)}; | |||||
return {mean, variance, save_mean, save_variance, reserve, y}; | |||||
} | } | ||||
TensorLayout m_layout({0}, scale_layout.dtype); | TensorLayout m_layout({0}, scale_layout.dtype); | ||||
mean = BlobManager::inst()->alloc_workspace_with_defrag(comp_node, m_layout); | |||||
variance = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, m_layout); | |||||
auto mean = Tensor::make(m_layout, comp_node); | |||||
auto variance = Tensor::make(m_layout, comp_node); | |||||
if (!empty_input) { | if (!empty_input) { | ||||
dnn_opr.op->exec( | dnn_opr.op->exec( | ||||
inp_tensornds[0], inp_tensornds[1], inp_tensornds[2], | inp_tensornds[0], inp_tensornds[1], inp_tensornds[2], | ||||
mean.as_megdnn(), variance.as_megdnn(), save_mean.as_megdnn(), | |||||
save_variance.as_megdnn(), reserve.as_megdnn(), y.as_megdnn(), | |||||
mean->dnn_tensor(), variance->dnn_tensor(), save_mean->dnn_tensor(), | |||||
save_variance->dnn_tensor(), reserve->dnn_tensor(), y->dnn_tensor(), | |||||
dnn_wk); | dnn_wk); | ||||
} | } | ||||
return {Tensor::make(save_mean), Tensor::make(save_variance), | |||||
Tensor::make(reserve), Tensor::make(y)}; | |||||
return {save_mean, save_variance, reserve, y}; | |||||
} | } | ||||
} | } | ||||
@@ -44,10 +44,9 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
DnnOprCaller<megdnn::CondTake> dnn_op(inp->comp_node()); | DnnOprCaller<megdnn::CondTake> dnn_op(inp->comp_node()); | ||||
dnn_op.op->param().val = 1; | dnn_op.op->param().val = 1; | ||||
TensorLayout m_layout( | |||||
{dnn_op.op->get_workspace_in_bytes(inp->layout())}, dtype::Byte()); | |||||
size_t sz = dnn_op.op->get_workspace_in_bytes(inp->layout()); | |||||
auto dnn_workspace = dnn_op.create_workspace(m_layout); | |||||
auto dnn_workspace = dnn_op.create_workspace(sz); | |||||
dnn_op.op->exec( | dnn_op.op->exec( | ||||
inp->dev_tensor().as_megdnn(), msk->dev_tensor().as_megdnn(), | inp->dev_tensor().as_megdnn(), msk->dev_tensor().as_megdnn(), | ||||
@@ -165,11 +165,10 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
TensorLayout empty_shp({0}, inputs[0]->dtype()); | TensorLayout empty_shp({0}, inputs[0]->dtype()); | ||||
empty_shp.ndim = 0; | empty_shp.ndim = 0; | ||||
DeviceTensorND empty_bias = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(cn, empty_shp); | |||||
auto empty_bias = Tensor::make(empty_shp, cn); | |||||
inp_tensornds[2] = empty_bias.as_megdnn(); | |||||
inp_tensornds[3] = empty_bias.as_megdnn(); | |||||
inp_tensornds[2] = empty_bias->dnn_tensor(); | |||||
inp_tensornds[3] = empty_bias->dnn_tensor(); | |||||
size_t sz = setup_algo<megdnn::ConvBiasForward>( | size_t sz = setup_algo<megdnn::ConvBiasForward>( | ||||
{inp_shapes[0], inp_shapes[1], empty_shp, empty_shp, oup_shapes[0]}, | {inp_shapes[0], inp_shapes[1], empty_shp, empty_shp, oup_shapes[0]}, | ||||
@@ -177,17 +176,15 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
&inp_tensornds); | &inp_tensornds); | ||||
// alloc memory | // alloc memory | ||||
DeviceTensorND out = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(cn, out_layout); | |||||
auto out = Tensor::make(out_layout, cn); | |||||
TensorLayout w_layout({sz}, dtype::Byte()); | |||||
auto dnn_wk = dnn_opr.create_workspace(w_layout); | |||||
auto dnn_wk = dnn_opr.create_workspace(sz); | |||||
// exeucte | // exeucte | ||||
dnn_opr.op->exec( | dnn_opr.op->exec( | ||||
inp_tensornds[0], inp_tensornds[1], empty_bias.as_megdnn(), | |||||
empty_bias.as_megdnn(), out.as_megdnn(), nullptr, dnn_wk); | |||||
return {Tensor::make(out)}; | |||||
inp_tensornds[0], inp_tensornds[1], inp_tensornds[2], inp_tensornds[3], | |||||
out->dnn_tensor(), nullptr, dnn_wk); | |||||
return {out}; | |||||
} | } | ||||
OP_TRAIT_REG(Convolution, Convolution, opr::Convolution) | OP_TRAIT_REG(Convolution, Convolution, opr::Convolution) | ||||
@@ -368,6 +365,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
def, inputs[1]->layout().ndim, inputs[0]->layout(), inputs[1]->layout(), | def, inputs[1]->layout().ndim, inputs[0]->layout(), inputs[1]->layout(), | ||||
cn); | cn); | ||||
auto out = Tensor::make(out_layout, cn); | |||||
using TensorND = megdnn::TensorND; | using TensorND = megdnn::TensorND; | ||||
SmallVector<TensorND> inp_tensornds(inputs.size()); | SmallVector<TensorND> inp_tensornds(inputs.size()); | ||||
TensorLayoutArray inp_shapes(inputs.size()), oup_shapes(output_descs.size()); | TensorLayoutArray inp_shapes(inputs.size()), oup_shapes(output_descs.size()); | ||||
@@ -383,16 +382,11 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
{inp_shapes[0], inp_shapes[1], oup_shapes[0]}, dnn_opr.op.get(), 0, false, | {inp_shapes[0], inp_shapes[1], oup_shapes[0]}, dnn_opr.op.get(), 0, false, | ||||
false, cn, convbwd.policy(), false, &inp_tensornds); | false, cn, convbwd.policy(), false, &inp_tensornds); | ||||
DeviceTensorND out = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(cn, out_layout); | |||||
auto wk = Blob::make(cn, sz); | |||||
auto ptr = wk->storage().get(); | |||||
megdnn::Workspace dnn_wk(ptr, sz); | |||||
auto dnn_wk = dnn_opr.create_workspace(sz); | |||||
// exeucte | // exeucte | ||||
dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out.as_megdnn(), dnn_wk); | |||||
return {Tensor::make(out)}; | |||||
dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out->dnn_tensor(), dnn_wk); | |||||
return {out}; | |||||
} | } | ||||
OP_TRAIT_REG(ConvolutionBackwardData, ConvolutionBackwardData) | OP_TRAIT_REG(ConvolutionBackwardData, ConvolutionBackwardData) | ||||
@@ -549,18 +543,13 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
false, cn, conv.policy(), false, &inp_tensornds); | false, cn, conv.policy(), false, &inp_tensornds); | ||||
// alloc memory | // alloc memory | ||||
DeviceTensorND out = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(cn, out_layout); | |||||
auto out = Tensor::make(out_layout, cn); | |||||
megdnn::Workspace dnn_wk; | |||||
if (sz != 0) { | |||||
TensorLayout w_layout({sz}, dtype::Byte()); | |||||
dnn_wk = dnn_opr.create_workspace(w_layout); | |||||
} | |||||
auto dnn_wk = dnn_opr.create_workspace(sz); | |||||
// exeucte | // exeucte | ||||
dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out.as_megdnn(), dnn_wk); | |||||
return {Tensor::make(out)}; | |||||
dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out->dnn_tensor(), dnn_wk); | |||||
return {out}; | |||||
} | } | ||||
OP_TRAIT_REG(Convolution3D, Convolution3D, opr::Convolution3D) | OP_TRAIT_REG(Convolution3D, Convolution3D, opr::Convolution3D) | ||||
@@ -615,8 +604,7 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
megdnn::Convolution3DBackwardData::deduce_layout_impl( | megdnn::Convolution3DBackwardData::deduce_layout_impl( | ||||
wlayout, dlayout, op_def.param(), oup_layout); | wlayout, dlayout, op_def.param(), oup_layout); | ||||
} | } | ||||
DeviceTensorND oup = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(cn, oup_layout); | |||||
auto oup = Tensor::make(oup_layout, cn); | |||||
SmallVector<megdnn::TensorND> inp_tensornds(inputs.size()); | SmallVector<megdnn::TensorND> inp_tensornds(inputs.size()); | ||||
inp_tensornds[0] = inputs[0]->dnn_tensor(); | inp_tensornds[0] = inputs[0]->dnn_tensor(); | ||||
@@ -624,14 +612,10 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
size_t wk_size = setup_algo<megdnn::Convolution3DBackwardData>( | size_t wk_size = setup_algo<megdnn::Convolution3DBackwardData>( | ||||
{wlayout, dlayout, oup_layout}, dnn_opr.get(), 0, false, false, cn, | {wlayout, dlayout, oup_layout}, dnn_opr.get(), 0, false, false, cn, | ||||
op_def.policy(), false, &inp_tensornds); | op_def.policy(), false, &inp_tensornds); | ||||
megdnn::Workspace dnn_wk; | |||||
if (wk_size != 0) { | |||||
TensorLayout w_layout({wk_size}, dtype::Byte()); | |||||
dnn_wk = caller.create_workspace(w_layout); | |||||
} | |||||
auto dnn_wk = caller.create_workspace(wk_size); | |||||
dnn_opr->exec(inp_tensornds[0], inp_tensornds[1], oup.as_megdnn(), dnn_wk); | |||||
return {Tensor::make(oup)}; | |||||
dnn_opr->exec(inp_tensornds[0], inp_tensornds[1], oup->dnn_tensor(), dnn_wk); | |||||
return {oup}; | |||||
} | } | ||||
auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) { | auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) { | ||||
@@ -121,10 +121,10 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
megdnn::Elemwise::deduce_shape(inp_shapes, layout); | megdnn::Elemwise::deduce_shape(inp_shapes, layout); | ||||
layout.init_contiguous_stride(); | layout.init_contiguous_stride(); | ||||
DeviceTensorND out = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, layout); | |||||
auto out = Tensor::make(layout, comp_node); | |||||
if (is_empty) { | if (is_empty) { | ||||
return {Tensor::make(out)}; | |||||
return {out}; | |||||
} | } | ||||
DnnOprCaller<megdnn::Elemwise> dnn_opr(comp_node); | DnnOprCaller<megdnn::Elemwise> dnn_opr(comp_node); | ||||
@@ -133,12 +133,13 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
dnn_opr.op->param().mode == Mode::FUSE_MUL_ADD4 || | dnn_opr.op->param().mode == Mode::FUSE_MUL_ADD4 || | ||||
(inp_tensornds.size() && | (inp_tensornds.size() && | ||||
inp_tensornds[0].layout.dtype.category() == DTypeCategory::QUANTIZED)) { | inp_tensornds[0].layout.dtype.category() == DTypeCategory::QUANTIZED)) { | ||||
opr::Elemwise::perform_dnn(comp_node, out, inp_tensornds, dnn_opr.op); | |||||
opr::Elemwise::perform_dnn( | |||||
comp_node, out->dnn_tensor(), inp_tensornds, dnn_opr.op); | |||||
} else { | } else { | ||||
dnn_opr.op->exec(inp_tensornds, out.as_megdnn()); | |||||
dnn_opr.op->exec(inp_tensornds, out->dnn_tensor()); | |||||
} | } | ||||
return {Tensor::make(out)}; | |||||
return {out}; | |||||
} | } | ||||
MGB_DEFINE_OPR_CLASS( | MGB_DEFINE_OPR_CLASS( | ||||
@@ -85,10 +85,9 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
TensorPtr out = Tensor::make(tlayout, inp->comp_node()); | TensorPtr out = Tensor::make(tlayout, inp->comp_node()); | ||||
megdnn::TensorND in = inp->dnn_tensor(); | megdnn::TensorND in = inp->dnn_tensor(); | ||||
megdnn::TensorND ind = index->dnn_tensor(); | megdnn::TensorND ind = index->dnn_tensor(); | ||||
TensorLayout m_layout( | |||||
{dnn_op.op->get_workspace_in_bytes(layout, index_layout, tlayout)}, | |||||
dtype::Byte()); | |||||
auto dnn_workspace = dnn_op.create_workspace(m_layout); | |||||
size_t sz = dnn_op.op->get_workspace_in_bytes(layout, index_layout, tlayout); | |||||
auto dnn_workspace = dnn_op.create_workspace(sz); | |||||
dnn_op.op->exec(in, ind, out->dnn_tensor(), dnn_workspace); | dnn_op.op->exec(in, ind, out->dnn_tensor(), dnn_workspace); | ||||
return {out}; | return {out}; | ||||
} | } | ||||
@@ -152,10 +151,9 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
megdnn::TensorND in = inp->dnn_tensor(); | megdnn::TensorND in = inp->dnn_tensor(); | ||||
megdnn::TensorND ind = index->dnn_tensor(); | megdnn::TensorND ind = index->dnn_tensor(); | ||||
megdnn::TensorND su = sub->dnn_tensor(); | megdnn::TensorND su = sub->dnn_tensor(); | ||||
TensorLayout m_layout( | |||||
{dnn_op.op->get_workspace_in_bytes(layout, index_layout, tlayout)}, | |||||
dtype::Byte()); | |||||
auto dnn_workspace = dnn_op.create_workspace(m_layout); | |||||
size_t sz = dnn_op.op->get_workspace_in_bytes(layout, index_layout, tlayout); | |||||
auto dnn_workspace = dnn_op.create_workspace(sz); | |||||
dnn_op.op->exec(out->dnn_tensor(), ind, su, dnn_workspace); | dnn_op.op->exec(out->dnn_tensor(), ind, su, dnn_workspace); | ||||
return {out}; | return {out}; | ||||
} | } | ||||
@@ -45,29 +45,25 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
TensorLayout v_t_1_layout{v_t_1->layout()}; | TensorLayout v_t_1_layout{v_t_1->layout()}; | ||||
TensorLayout lamb_param_layout{lamb_param->layout()}; | TensorLayout lamb_param_layout{lamb_param->layout()}; | ||||
DeviceTensorND m_t = BlobManager::inst()->alloc_workspace_with_defrag( | |||||
m_t_1->comp_node(), m_t_1_layout); | |||||
auto m_t = Tensor::make(m_t_1_layout, m_t_1->comp_node()); | |||||
DeviceTensorND v_t = BlobManager::inst()->alloc_workspace_with_defrag( | |||||
v_t_1->comp_node(), v_t_1_layout); | |||||
auto v_t = Tensor::make(v_t_1_layout, v_t_1->comp_node()); | |||||
DeviceTensorND new_param = BlobManager::inst()->alloc_workspace_with_defrag( | |||||
lamb_param->comp_node(), lamb_param_layout); | |||||
auto new_param = Tensor::make(lamb_param_layout, lamb_param->comp_node()); | |||||
DnnOprCaller<megdnn::LAMBUpdate> caller{lamb_param->comp_node()}; | DnnOprCaller<megdnn::LAMBUpdate> caller{lamb_param->comp_node()}; | ||||
TensorLayout m_layout( | |||||
{caller.op->get_workspace_in_bytes( | |||||
m_t_1->layout(), v_t_1->layout(), lamb_param->layout(), | |||||
grad->layout(), m_t.layout(), v_t.layout(), new_param.layout())}, | |||||
dtype::Byte()); | |||||
size_t sz = caller.op->get_workspace_in_bytes( | |||||
m_t_1->layout(), v_t_1->layout(), lamb_param->layout(), grad->layout(), | |||||
m_t->layout(), v_t->layout(), new_param->layout()); | |||||
auto dnn_workspace = caller.create_workspace(m_layout); | |||||
auto dnn_workspace = caller.create_workspace(sz); | |||||
caller.op->param() = op.param(); | caller.op->param() = op.param(); | ||||
caller.op->exec( | caller.op->exec( | ||||
m_t_1->dev_tensor().as_megdnn(), v_t_1->dev_tensor().as_megdnn(), | m_t_1->dev_tensor().as_megdnn(), v_t_1->dev_tensor().as_megdnn(), | ||||
lamb_param->dev_tensor().as_megdnn(), grad->dev_tensor().as_megdnn(), | lamb_param->dev_tensor().as_megdnn(), grad->dev_tensor().as_megdnn(), | ||||
m_t.as_megdnn(), v_t.as_megdnn(), new_param.as_megdnn(), dnn_workspace); | |||||
return {Tensor::make(m_t), Tensor::make(v_t), Tensor::make(new_param)}; | |||||
m_t->dnn_tensor(), v_t->dnn_tensor(), new_param->dnn_tensor(), | |||||
dnn_workspace); | |||||
return {m_t, v_t, new_param}; | |||||
} | } | ||||
OP_TRAIT_REG(LAMBUpdate, LAMBUpdate) | OP_TRAIT_REG(LAMBUpdate, LAMBUpdate) | ||||
@@ -77,32 +77,25 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
megdnn::LayerNorm::deduce_layout_fwd_impl( | megdnn::LayerNorm::deduce_layout_fwd_impl( | ||||
inputs[0]->dnn_tensor().layout, p, oup_layout, mean_layout, rstd_layout); | inputs[0]->dnn_tensor().layout, p, oup_layout, mean_layout, rstd_layout); | ||||
DeviceTensorND out_devtensor = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(cn, oup_layout); | |||||
DeviceTensorND mean_devtensor = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(cn, mean_layout); | |||||
DeviceTensorND rstd_devtensor = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(cn, rstd_layout); | |||||
megdnn::Workspace dnn_wk; | |||||
auto out = Tensor::make(oup_layout, cn); | |||||
auto mean = Tensor::make(mean_layout, cn); | |||||
auto rstd = Tensor::make(rstd_layout, cn); | |||||
auto wk_size = caller.op->get_workspace_in_bytes( | auto wk_size = caller.op->get_workspace_in_bytes( | ||||
inputs[0]->dnn_tensor().layout, | inputs[0]->dnn_tensor().layout, | ||||
p.affine ? inputs[1]->dnn_tensor().layout : TensorLayout(), | p.affine ? inputs[1]->dnn_tensor().layout : TensorLayout(), | ||||
p.affine ? inputs[2]->dnn_tensor().layout : TensorLayout(), oup_layout, | p.affine ? inputs[2]->dnn_tensor().layout : TensorLayout(), oup_layout, | ||||
mean_layout, rstd_layout); | mean_layout, rstd_layout); | ||||
if (wk_size != 0) { | |||||
TensorLayout w_layout({wk_size}, dtype::Byte()); | |||||
dnn_wk = caller.create_workspace(w_layout); | |||||
} | |||||
auto dnn_wk = caller.create_workspace(wk_size); | |||||
dnn_opr->exec( | |||||
caller.op->exec( | |||||
inputs[0]->dnn_tensor(), | inputs[0]->dnn_tensor(), | ||||
p.affine ? inputs[1]->dnn_tensor() : megdnn::TensorND(), | p.affine ? inputs[1]->dnn_tensor() : megdnn::TensorND(), | ||||
p.affine ? inputs[2]->dnn_tensor() : megdnn::TensorND(), | |||||
out_devtensor.as_megdnn(), mean_devtensor.as_megdnn(), | |||||
rstd_devtensor.as_megdnn(), dnn_wk); | |||||
return {Tensor::make(out_devtensor), Tensor::make(mean_devtensor), | |||||
Tensor::make(rstd_devtensor)}; | |||||
p.affine ? inputs[2]->dnn_tensor() : megdnn::TensorND(), out->dnn_tensor(), | |||||
mean->dnn_tensor(), rstd->dnn_tensor(), dnn_wk); | |||||
return {out, mean, rstd}; | |||||
} | } | ||||
OP_TRAIT_REG(LayerNorm, LayerNorm) | OP_TRAIT_REG(LayerNorm, LayerNorm) | ||||
@@ -185,12 +185,12 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
} | } | ||||
if (dim1 == 0 || dim2 == 0 || layout1[layout1.ndim - 1] == 0) { | if (dim1 == 0 || dim2 == 0 || layout1[layout1.ndim - 1] == 0) { | ||||
DeviceTensorND out = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(cn, real_dst_layout); | |||||
if (!out.empty()) { | |||||
dev_tensor_memset(out, 0); | |||||
auto out = Tensor::make(real_dst_layout, cn); | |||||
if (!out->empty()) { | |||||
dev_tensor_memset(out->dev_tensor(), 0); | |||||
} | } | ||||
return {Tensor::make(out)}; | |||||
return {out}; | |||||
} | } | ||||
TensorLayout layout_a = layout1, layout_b = layout2; | TensorLayout layout_a = layout1, layout_b = layout2; | ||||
@@ -232,13 +232,11 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
size_t sz = setup_algo<megdnn::MatrixMul>( | size_t sz = setup_algo<megdnn::MatrixMul>( | ||||
{layout_a, layout_b, dst_layout}, dnn_opr.op.get(), 0, false, false, cn, | {layout_a, layout_b, dst_layout}, dnn_opr.op.get(), 0, false, false, cn, | ||||
matmul.policy(), false, &inp_tensornds); | matmul.policy(), false, &inp_tensornds); | ||||
DeviceTensorND out = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(cn, dst_layout); | |||||
TensorLayout w_layout({sz}, dtype::Byte()); | |||||
auto dnn_wk = dnn_opr.create_workspace(w_layout); | |||||
auto out = Tensor::make(dst_layout, cn); | |||||
auto dnn_wk = dnn_opr.create_workspace(sz); | |||||
dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out.as_megdnn(), dnn_wk); | |||||
return {Tensor::make(out.sub(SubTensorSpec::make_from_layout(real_dst_layout)))}; | |||||
dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out->dnn_tensor(), dnn_wk); | |||||
return {out->sub(0, real_dst_layout)}; | |||||
} | } | ||||
SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | ||||
@@ -461,12 +459,12 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
dst_layout.init_contiguous_stride(); | dst_layout.init_contiguous_stride(); | ||||
if (dim1 == 0 || dim2 == 0 || layout1[layout1.ndim - 1] == 0) { | if (dim1 == 0 || dim2 == 0 || layout1[layout1.ndim - 1] == 0) { | ||||
DeviceTensorND out = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(cn, dst_layout); | |||||
if (!out.empty()) { | |||||
dev_tensor_memset(out, 0); | |||||
auto out = Tensor::make(dst_layout, cn); | |||||
if (!out->empty()) { | |||||
dev_tensor_memset(out->dev_tensor(), 0); | |||||
} | } | ||||
return {Tensor::make(out)}; | |||||
return {out}; | |||||
} | } | ||||
SmallVector<megdnn::TensorND> inp_tensornds(2u); | SmallVector<megdnn::TensorND> inp_tensornds(2u); | ||||
@@ -479,19 +477,17 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
{layout1, layout2, dst_layout}, dnn_opr.op.get(), 0, false, false, cn, | {layout1, layout2, dst_layout}, dnn_opr.op.get(), 0, false, false, cn, | ||||
matmul.policy(), false, &inp_tensornds); | matmul.policy(), false, &inp_tensornds); | ||||
DeviceTensorND out = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(cn, dst_layout); | |||||
auto out = Tensor::make(dst_layout, cn); | |||||
TensorLayout w_layout({sz}, dtype::Byte()); | |||||
auto dnn_wk = dnn_opr.create_workspace(w_layout); | |||||
dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out.as_megdnn(), dnn_wk); | |||||
auto dnn_wk = dnn_opr.create_workspace(sz); | |||||
dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out->dnn_tensor(), dnn_wk); | |||||
shp1[shp1.ndim - 2] = dst_layout[dst_layout.ndim - 2]; | shp1[shp1.ndim - 2] = dst_layout[dst_layout.ndim - 2]; | ||||
shp1[shp1.ndim - 1] = dst_layout[dst_layout.ndim - 1]; | shp1[shp1.ndim - 1] = dst_layout[dst_layout.ndim - 1]; | ||||
if (maxdim > 3) { | if (maxdim > 3) { | ||||
dst_layout = dst_layout.reshape(shp1); | dst_layout = dst_layout.reshape(shp1); | ||||
} | } | ||||
return {Tensor::make(out.sub(SubTensorSpec::make_from_layout(dst_layout)))}; | |||||
return {out->sub(0, dst_layout)}; | |||||
} | } | ||||
SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | ||||
@@ -540,27 +536,23 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
dnn_opr.op->deduce_layout(inp1_tensor.layout, inp2_tensor.layout, oup_layout); | dnn_opr.op->deduce_layout(inp1_tensor.layout, inp2_tensor.layout, oup_layout); | ||||
if (inputs[0]->layout().is_empty() || inputs[1]->layout().is_empty()) { | if (inputs[0]->layout().is_empty() || inputs[1]->layout().is_empty()) { | ||||
DeviceTensorND out = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, oup_layout); | |||||
if (!out.empty()) { | |||||
dev_tensor_memset(out, 0); | |||||
auto out = Tensor::make(oup_layout, comp_node); | |||||
if (!out->empty()) { | |||||
dev_tensor_memset(out->dev_tensor(), 0); | |||||
} | } | ||||
return {Tensor::make(out)}; | |||||
return {out}; | |||||
} | } | ||||
auto sz = dnn_opr.op->get_workspace_in_bytes( | auto sz = dnn_opr.op->get_workspace_in_bytes( | ||||
inp_tensornds[0].layout, inp_tensornds[1].layout, output_descs[0].layout); | inp_tensornds[0].layout, inp_tensornds[1].layout, output_descs[0].layout); | ||||
DeviceTensorND out_devtensor = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, oup_layout); | |||||
auto out = Tensor::make(oup_layout, comp_node); | |||||
TensorLayout w_layout({sz}, dtype::Byte()); | |||||
auto dnn_wk = dnn_opr.create_workspace(w_layout); | |||||
auto dnn_wk = dnn_opr.create_workspace(sz); | |||||
dnn_opr.op->exec( | |||||
inp_tensornds[0], inp_tensornds[1], out_devtensor.as_megdnn(), dnn_wk); | |||||
dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out->dnn_tensor(), dnn_wk); | |||||
return {Tensor::make(out_devtensor)}; | |||||
return {out}; | |||||
} | } | ||||
std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( | std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( | ||||
@@ -36,9 +36,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
megdnn::CheckNonFinite::Param param({op.scale}); | megdnn::CheckNonFinite::Param param({op.scale}); | ||||
dnn_opr.op->param() = param; | dnn_opr.op->param() = param; | ||||
size_t sz = dnn_opr.op->get_workspace_in_bytes(srcs, dest->layout()); | size_t sz = dnn_opr.op->get_workspace_in_bytes(srcs, dest->layout()); | ||||
TensorLayout w_layout({sz}, dtype::Byte()); | |||||
auto dnn_wk = dnn_opr.create_workspace(w_layout); | |||||
dnn_opr.op->exec(srcs, dest->dev_tensor().as_megdnn(), dnn_wk); | |||||
auto dnn_wk = dnn_opr.create_workspace(sz); | |||||
dnn_opr.op->exec(srcs, dest->dnn_tensor(), dnn_wk); | |||||
return outputs; | return outputs; | ||||
} | } | ||||
@@ -66,17 +66,12 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
{inp_tensornds[0].layout, oup_layout}, dnn_opr.get(), 0, false, false, cn, | {inp_tensornds[0].layout, oup_layout}, dnn_opr.get(), 0, false, false, cn, | ||||
op_def.policy(), false, &inp_tensornds); | op_def.policy(), false, &inp_tensornds); | ||||
DeviceTensorND out_devtensor = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(cn, oup_layout); | |||||
auto out = Tensor::make(oup_layout, cn); | |||||
megdnn::Workspace dnn_wk; | |||||
if (wk_size) { | |||||
TensorLayout w_layout({wk_size}, dtype::Byte()); | |||||
dnn_wk = caller.create_workspace(w_layout); | |||||
} | |||||
auto dnn_wk = caller.create_workspace(wk_size); | |||||
dnn_opr->exec(inp_tensornds[0], out_devtensor.as_megdnn(), dnn_wk); | |||||
return {Tensor::make(out_devtensor)}; | |||||
caller.op->exec(inp_tensornds[0], out->dnn_tensor(), dnn_wk); | |||||
return {out}; | |||||
} | } | ||||
OP_TRAIT_REG(Pooling, Pooling) | OP_TRAIT_REG(Pooling, Pooling) | ||||
@@ -117,20 +117,20 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
layout.remove_axis_inplace(axis); | layout.remove_axis_inplace(axis); | ||||
layout.init_contiguous_stride(); | layout.init_contiguous_stride(); | ||||
} | } | ||||
DeviceTensorND out = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, layout); | |||||
auto out = Tensor::make(layout, comp_node); | |||||
std::string err_msg; | std::string err_msg; | ||||
switch (mode) { | switch (mode) { | ||||
case Reduce::Mode::SUM: | case Reduce::Mode::SUM: | ||||
if (!out.empty()) { | |||||
dev_tensor_memset(out, 0); | |||||
if (!out->empty()) { | |||||
dev_tensor_memset(out->dev_tensor(), 0); | |||||
} | } | ||||
break; | break; | ||||
case Reduce::Mode::PRODUCT: | case Reduce::Mode::PRODUCT: | ||||
if (!out.empty()) { | |||||
if (!out->empty()) { | |||||
DnnOprCaller<megdnn::Fill> fill_op(comp_node); | DnnOprCaller<megdnn::Fill> fill_op(comp_node); | ||||
fill_op.op->param() = 1; | fill_op.op->param() = 1; | ||||
fill_op.op->exec(out.as_megdnn(), {}); | |||||
fill_op.op->exec(out->dnn_tensor(), {}); | |||||
} | } | ||||
break; | break; | ||||
case Reduce::Mode::MEAN: | case Reduce::Mode::MEAN: | ||||
@@ -153,34 +153,29 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
MegBrainError, "empty input is not allowed for reduce mode: %s", | MegBrainError, "empty input is not allowed for reduce mode: %s", | ||||
err_msg.c_str()); | err_msg.c_str()); | ||||
} | } | ||||
return {Tensor::make(out)}; | |||||
return {out}; | |||||
} | } | ||||
auto dnn_ten = inputs[0]->dnn_tensor(); | auto dnn_ten = inputs[0]->dnn_tensor(); | ||||
dnn_ten.layout = src; | dnn_ten.layout = src; | ||||
inp_tensornds.push_back(dnn_ten); | inp_tensornds.push_back(dnn_ten); | ||||
megdnn::Workspace dnn_wk; | |||||
auto wk_size = dnn_op.op->get_workspace_in_bytes(src, layout); | auto wk_size = dnn_op.op->get_workspace_in_bytes(src, layout); | ||||
if (wk_size) { | |||||
TensorLayout w_layout({wk_size}, dtype::Byte()); | |||||
dnn_wk = dnn_op.create_workspace(w_layout); | |||||
} | |||||
DeviceTensorND out = | |||||
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, layout); | |||||
dnn_op.op->exec(inp_tensornds[0], out.as_megdnn(), dnn_wk); | |||||
auto dnn_wk = dnn_op.create_workspace(wk_size); | |||||
TensorLayout ori_layout = layout; | |||||
if (!keepdim && src.ndim > 1) { | if (!keepdim && src.ndim > 1) { | ||||
auto out_layout = out.layout(); | |||||
out_layout.remove_axis_inplace(axis); | |||||
out_layout.init_contiguous_stride(); | |||||
out.resize(out_layout); | |||||
layout.remove_axis_inplace(axis); | |||||
layout.init_contiguous_stride(); | |||||
} | } | ||||
return {Tensor::make(out)}; | |||||
auto out = Tensor::make(layout, comp_node); | |||||
auto dnn_out = out->dnn_tensor(); | |||||
dnn_out.layout = ori_layout; | |||||
dnn_op.op->exec(inp_tensornds[0], dnn_out, dnn_wk); | |||||
return {out}; | |||||
} | } | ||||
std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( | std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( | ||||
@@ -252,9 +252,8 @@ SmallVector<TensorPtr> param_pack_concat_apply_on_physical_tensor( | |||||
HostTensorStorage srcs_storage; | HostTensorStorage srcs_storage; | ||||
srcs_storage.reset(comp_node, srcs_size, srcs_ptr); | srcs_storage.reset(comp_node, srcs_size, srcs_ptr); | ||||
caller.op->exec( | caller.op->exec( | ||||
{srcs_raw_ptr, srcs_layout}, inputs.back()->dev_tensor().as_megdnn(), | |||||
output->dev_tensor().as_megdnn(), | |||||
caller.create_workspace({{ws_size}, dtype::Byte()})); | |||||
{srcs_raw_ptr, srcs_layout}, inputs.back()->dnn_tensor(), | |||||
output->dnn_tensor(), caller.create_workspace(ws_size)); | |||||
async_release(HostTensorND{comp_node, srcs_layout}.storage(srcs_storage)); | async_release(HostTensorND{comp_node, srcs_layout}.storage(srcs_storage)); | ||||
return {output}; | return {output}; | ||||
} | } | ||||
@@ -89,8 +89,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
size_t sz = dnn_opr.op->get_workspace_in_bytes( | size_t sz = dnn_opr.op->get_workspace_in_bytes( | ||||
inputs[0]->layout(), inputs[1]->layout(), out_layout, ind_layout); | inputs[0]->layout(), inputs[1]->layout(), out_layout, ind_layout); | ||||
TensorLayout w_layout({sz}, dtype::Byte()); | |||||
auto dnn_wk = dnn_opr.create_workspace(w_layout); | |||||
auto dnn_wk = dnn_opr.create_workspace(sz); | |||||
dnn_opr.op->exec( | dnn_opr.op->exec( | ||||
inputs[0]->dnn_tensor(), inputs[1]->dnn_tensor(), out.as_megdnn(), | inputs[0]->dnn_tensor(), inputs[1]->dnn_tensor(), out.as_megdnn(), | ||||
@@ -566,9 +566,13 @@ DeviceTensorND Tensor::dev_tensor(bool contiguous) { | |||||
return ret; | return ret; | ||||
} | } | ||||
bool Tensor::empty() { | |||||
return !m_blob->size(); | |||||
} | |||||
megdnn::TensorND Tensor::dnn_tensor() { | megdnn::TensorND Tensor::dnn_tensor() { | ||||
mgb_assert(m_blob, "uninitialized tensor."); | mgb_assert(m_blob, "uninitialized tensor."); | ||||
return {m_layout, {m_blob->storage().get(), m_offset}}; | |||||
return DnnTensorND{m_layout, m_blob->storage(), m_offset}; | |||||
} | } | ||||
void Tensor::fetch_value() { | void Tensor::fetch_value() { | ||||
@@ -10,6 +10,7 @@ | |||||
#include "megbrain/imperative/resource_manager.h" | #include "megbrain/imperative/resource_manager.h" | ||||
#include "megbrain/tensor.h" | #include "megbrain/tensor.h" | ||||
#include "megbrain/utils/metahelper.h" | #include "megbrain/utils/metahelper.h" | ||||
#include "megdnn/basic_types.h" | |||||
namespace mgb { | namespace mgb { | ||||
namespace imperative { | namespace imperative { | ||||
@@ -87,6 +88,22 @@ using EventPtr = std::unique_ptr<CompNode::Event, EventDeleter>; | |||||
class Tensor; | class Tensor; | ||||
using TensorPtr = std::shared_ptr<Tensor>; | using TensorPtr = std::shared_ptr<Tensor>; | ||||
/* | |||||
using DnnTensorND to save the reference count of workspace | |||||
allocted by blobmanager to prevent invalidation | |||||
*/ | |||||
struct DnnTensorND : megdnn::TensorND { | |||||
private: | |||||
std::shared_ptr<dt_byte> m_reference; | |||||
public: | |||||
DnnTensorND(TensorLayout& layout_, std::shared_ptr<dt_byte> ref_ptr, size_t offset) | |||||
: megdnn::TensorND(layout_, {ref_ptr.get(), offset}) { | |||||
m_reference = ref_ptr; | |||||
} | |||||
}; | |||||
class Tensor : public NonCopyableObj { | class Tensor : public NonCopyableObj { | ||||
public: | public: | ||||
Tensor() = default; | Tensor() = default; | ||||
@@ -131,6 +148,8 @@ public: | |||||
void to_contiguous_inplace(); | void to_contiguous_inplace(); | ||||
bool empty(); | |||||
DeviceTensorND dev_tensor(bool contiguous = true); | DeviceTensorND dev_tensor(bool contiguous = true); | ||||
void assign_from_dev_tensor(DeviceTensorND); | void assign_from_dev_tensor(DeviceTensorND); | ||||
@@ -258,9 +258,9 @@ void Elemwise::perform( | |||||
} | } | ||||
void Elemwise::perform_dnn( | void Elemwise::perform_dnn( | ||||
CompNode cn, DeviceTensorND& dest, megdnn::TensorNDArray& inputs, | |||||
CompNode cn, const megdnn::TensorND& dest, megdnn::TensorNDArray& inputs, | |||||
intl::UniqPtrWithCN<megdnn::Elemwise>& opr) { | intl::UniqPtrWithCN<megdnn::Elemwise>& opr) { | ||||
call_megdnn_opr_exec(cn, inputs, dest.as_megdnn(), opr.get(), nullptr); | |||||
call_megdnn_opr_exec(cn, inputs, dest, opr.get(), nullptr); | |||||
} | } | ||||
TensorLayoutArray Elemwise::collective_collapse(const TensorLayoutArray& layouts) { | TensorLayoutArray Elemwise::collective_collapse(const TensorLayoutArray& layouts) { | ||||
@@ -78,7 +78,7 @@ public: | |||||
intl::UniqPtrWithCN<megdnn::Elemwise>& opr); | intl::UniqPtrWithCN<megdnn::Elemwise>& opr); | ||||
MGE_WIN_DECLSPEC_FUC static void perform_dnn( | MGE_WIN_DECLSPEC_FUC static void perform_dnn( | ||||
CompNode cn, DeviceTensorND& dest, megdnn::TensorNDArray& inputs, | |||||
CompNode cn, const megdnn::TensorND& dest, megdnn::TensorNDArray& inputs, | |||||
intl::UniqPtrWithCN<megdnn::Elemwise>& opr); | intl::UniqPtrWithCN<megdnn::Elemwise>& opr); | ||||
using TensorLayoutPtrArray = SmallVector<TensorLayout*>; | using TensorLayoutPtrArray = SmallVector<TensorLayout*>; | ||||