GitOrigin-RevId: fbe0516ffc
master
@@ -743,20 +743,28 @@ const GraphOptimizer& GraphOptimizer::add_passes_for_optimize_options( | |||
}); | |||
cb(nhwcd4, { | |||
add_pass<FuseConvBiasNonlinPass>(); | |||
add_pass(PaddingChannelPass::make( | |||
cg::GraphCommonOptimizeOptions::LayoutTransform::NHWCD4, true)); | |||
add_pass(ConvertFormatPass::make_nhwcd4_converter()); | |||
}); | |||
cb(nchw88, { | |||
add_pass<FuseConvBiasNonlinPass>(); | |||
add_pass(PaddingChannelPass::make( | |||
cg::GraphCommonOptimizeOptions::LayoutTransform::NCHW88, true)); | |||
add_pass(EnableNchwxxPass::make_nchwxx_converter(8)); | |||
add_pass<ShuffleShuffleRemovePass>(); | |||
}); | |||
cb(nchw44, { | |||
add_pass<FuseConvBiasNonlinPass>(); | |||
add_pass(PaddingChannelPass::make( | |||
cg::GraphCommonOptimizeOptions::LayoutTransform::NCHW44, true)); | |||
add_pass(EnableNchwxxPass::make_nchwxx_converter(4)); | |||
add_pass<ShuffleShuffleRemovePass>(); | |||
}); | |||
cb(nchw44_dot, { | |||
add_pass<FuseConvBiasNonlinPass>(); | |||
add_pass(PaddingChannelPass::make( | |||
cg::GraphCommonOptimizeOptions::LayoutTransform::NCHW44_DOT, true)); | |||
add_pass(EnableNchw44DotPass::make_nchw44_dot_converter()); | |||
add_pass<ShuffleShuffleRemovePass>(); | |||
}); | |||
@@ -784,7 +792,7 @@ const GraphOptimizer& GraphOptimizer::add_passes_for_optimize_options( | |||
cb(nchw64, { | |||
add_pass<FuseConvBiasNonlinPass>(); | |||
add_pass(PaddingChannelPass::make( | |||
cg::GraphCommonOptimizeOptions::LayoutTransform::NCHW64)); | |||
cg::GraphCommonOptimizeOptions::LayoutTransform::NCHW64, false)); | |||
add_pass<FuseConvBiasZPass>(); | |||
add_pass(EnableNCHW64Pass::make_nchw64_converter()); | |||
add_pass<ShuffleShuffleRemovePass>(); | |||
@@ -1,5 +1,6 @@ | |||
#include "megbrain/gopt/inference.h" | |||
#include "megbrain/opr/basic_arith.h" | |||
#include "megbrain/opr/dnn/adaptive_pooling.h" | |||
#include "megbrain/opr/dnn/convolution.h" | |||
#include "megbrain/opr/dnn/pooling.h" | |||
#include "megbrain/opr/imgproc.h" | |||
@@ -34,8 +35,8 @@ using ReformatKey = ReformatManager::ReformatKey; | |||
/* ==================== PaddingChannelPass ================= */ | |||
namespace { | |||
size_t padding_int4(size_t in_channel, bool flag) { | |||
static_cast<void>(flag); | |||
size_t padding_int4(size_t in_channel, bool) { | |||
if (in_channel <= 32) { | |||
return (8 - (in_channel % 8)) % 8; | |||
} else { | |||
@@ -43,6 +44,8 @@ size_t padding_int4(size_t in_channel, bool flag) { | |||
} | |||
} | |||
//! flag is used by user to identify some case, such as in nchw64, flag is used | |||
//! to identify the convbias and convolution backward | |||
size_t padding_int8(size_t in_channel, bool flag) { | |||
if (flag) { | |||
if (in_channel <= 16) { | |||
@@ -58,24 +61,41 @@ size_t padding_4(size_t in_channel, bool) { | |||
return (4 - (in_channel % 4)) % 4; | |||
}; | |||
size_t padding_8(size_t in_channel, bool) { | |||
return (8 - (in_channel % 8)) % 8; | |||
}; | |||
} // namespace | |||
std::unique_ptr<PaddingChannelPass> PaddingChannelPass::make( | |||
cg::GraphCommonOptimizeOptions::LayoutTransform layout_transform) { | |||
cg::GraphCommonOptimizeOptions::LayoutTransform layout_transform, | |||
bool only_padding_weights) { | |||
MIDOUT_B("PaddingChannelPass::make") | |||
using LayoutTrans = cg::GraphCommonOptimizeOptions::LayoutTransform; | |||
auto ret = std::make_unique<PaddingChannelPass>(); | |||
auto ret = std::unique_ptr<PaddingChannelPass>( | |||
new PaddingChannelPass(only_padding_weights)); | |||
auto& alignment_map = ret->m_alignment_map; | |||
if (layout_transform == LayoutTrans::NCHW64) { | |||
alignment_map[DTypeEnum::QuantizedS4] = padding_int4; | |||
alignment_map[DTypeEnum::Quantized4Asymm] = padding_int4; | |||
alignment_map[DTypeEnum::QuantizedS8] = padding_int8; | |||
} else if ( | |||
layout_transform == LayoutTrans::NHWCD4 || | |||
layout_transform == LayoutTrans::NCHW44 || | |||
layout_transform == LayoutTrans::NCHW44_DOT) { | |||
alignment_map[DTypeEnum::QuantizedS8] = padding_4; | |||
alignment_map[DTypeEnum::Quantized8Asymm] = padding_4; | |||
alignment_map[DTypeEnum::Float32] = padding_4; | |||
#if !MEGDNN_DISABLE_FLOAT16 | |||
alignment_map[DTypeEnum::Float16] = padding_4; | |||
#endif | |||
} else if (layout_transform == LayoutTrans::NCHW88) { | |||
alignment_map[DTypeEnum::QuantizedS8] = padding_8; | |||
alignment_map[DTypeEnum::Quantized8Asymm] = padding_8; | |||
alignment_map[DTypeEnum::Float32] = padding_8; | |||
#if !MEGDNN_DISABLE_FLOAT16 | |||
alignment_map[DTypeEnum::Float16] = padding_8; | |||
#endif | |||
} | |||
ret->fill_opr_convert_fun(layout_transform); | |||
return ret; | |||
@@ -138,6 +158,10 @@ VarNode* PaddingChannelPass::extract_subtensor( | |||
mgb_assert(inp->shape()[2] == orig_shape[2]); | |||
mgb_assert(inp->shape()[3] == orig_shape[3]); | |||
size_t orig_channels = orig_shape[1]; | |||
//! if channel is not padding, do nothing | |||
if (orig_channels == inp->shape()[1]) { | |||
return inp; | |||
} | |||
auto x = SymbolVar(inp); | |||
auto cv = [&x](int v) { return x.make_scalar(v); }; | |||
using AIdx = opr::Subtensor::AxisIndexer; | |||
@@ -150,8 +174,25 @@ VarNode* PaddingChannelPass::extract_subtensor( | |||
}; | |||
VarNode* PaddingChannelPass::pad_in_channels(VarNode* inp, size_t pad_channels) { | |||
mgb_assert(inp->shape().ndim == 4); | |||
TensorShape shape{inp->shape()[0], pad_channels, inp->shape()[2], inp->shape()[3]}; | |||
TensorShape shape; | |||
size_t axis = 0; | |||
if (inp->shape().ndim == 4) { | |||
shape = TensorShape{ | |||
inp->shape()[0], pad_channels, inp->shape()[2], inp->shape()[3]}; | |||
axis = 1; | |||
} else { | |||
mgb_assert(inp->shape().ndim == 5); | |||
//! the channel wise convolution | |||
if (inp->shape()[1] == 1 && inp->shape()[2] == 1) { | |||
shape = TensorShape{ | |||
pad_channels, inp->shape()[1], inp->shape()[2], inp->shape()[3], | |||
inp->shape()[4]}; | |||
axis = 0; | |||
} else { | |||
//! the group convolution | |||
mgb_assert(0, "group convolution can't padding cahnnel\n"); | |||
} | |||
} | |||
std::shared_ptr<HostTensorND> host_val = | |||
std::make_shared<HostTensorND>(inp->comp_node(), inp->dtype()); | |||
host_val->resize(shape); | |||
@@ -159,13 +200,30 @@ VarNode* PaddingChannelPass::pad_in_channels(VarNode* inp, size_t pad_channels) | |||
size_t size_bytes = TensorLayout{shape, inp->dtype()}.span().dist_byte(); | |||
std::memset(ptr, 0, size_bytes); | |||
auto padding = opr::ImmutableTensor::make(*inp->owner_graph(), *host_val); | |||
auto out = opr::Concat::make({inp, padding}, 1); | |||
auto out = opr::Concat::make({inp, padding}, axis); | |||
return out.node(); | |||
}; | |||
VarNode* PaddingChannelPass::pad_out_channels(VarNode* inp, size_t pad_channels) { | |||
mgb_assert(inp->shape().ndim == 4); | |||
TensorShape shape{pad_channels, inp->shape()[1], inp->shape()[2], inp->shape()[3]}; | |||
TensorShape shape; | |||
size_t axis = 0; | |||
if (inp->shape().ndim == 4) { | |||
shape = TensorShape{ | |||
pad_channels, inp->shape()[1], inp->shape()[2], inp->shape()[3]}; | |||
axis = 0; | |||
} else { | |||
mgb_assert(inp->shape().ndim == 5); | |||
//! the channel wise convolution | |||
if (inp->shape()[1] == 1 && inp->shape()[2] == 1) { | |||
shape = TensorShape{ | |||
pad_channels, inp->shape()[1], inp->shape()[2], inp->shape()[3], | |||
inp->shape()[4]}; | |||
axis = 0; | |||
} else { | |||
//! the group convolution | |||
mgb_assert(0, "group convolution can't padding cahnnel\n"); | |||
} | |||
} | |||
std::shared_ptr<HostTensorND> host_val = | |||
std::make_shared<HostTensorND>(inp->comp_node(), inp->dtype()); | |||
host_val->resize(shape); | |||
@@ -173,15 +231,15 @@ VarNode* PaddingChannelPass::pad_out_channels(VarNode* inp, size_t pad_channels) | |||
size_t size_bytes = TensorLayout{shape, inp->dtype()}.span().dist_byte(); | |||
std::memset(ptr, 0, size_bytes); | |||
auto padding = opr::ImmutableTensor::make(*inp->owner_graph(), *host_val); | |||
auto out = opr::Concat::make({inp, padding}, 0); | |||
auto out = opr::Concat::make({inp, padding}, axis); | |||
return out.node(); | |||
}; | |||
// padding policy for conv bias with data type qint8 | |||
OperatorNodeBase* PaddingChannelPass::padding_policy( | |||
// padding policy for dense convolution | |||
OperatorNodeBase* PaddingChannelPass::padding_conv_policy( | |||
OperatorNodeBase* opr, const VarNodeArray& new_inp) { | |||
mgb_assert(opr->input().size() == new_inp.size()); | |||
mgb_assert(new_inp.size() == 3); | |||
mgb_assert(new_inp.size() >= 2); | |||
//! new weights and old weights are same shape | |||
mgb_assert(opr->input(1)->shape().eq_shape(new_inp[1]->shape())); | |||
auto inps = new_inp; | |||
@@ -198,7 +256,8 @@ OperatorNodeBase* PaddingChannelPass::padding_policy( | |||
if (m_padding_oprs.count(opr->input(0)->owner_opr())) { | |||
//! as the opr of input var is padding, but the dtype of input and output of | |||
//! the input opr maybe different, so the alignment is not the same | |||
size_t pad_channels_0 = it->second(new_in_channels, true); | |||
size_t pad_channels_0 = | |||
m_only_padding_weights ? 0 : it->second(new_in_channels, true); | |||
size_t pad_channels_1 = it->second(in_channels, true); | |||
if (pad_channels_0) { | |||
inps[0] = pad_in_channels(new_inp[0], pad_channels_0); | |||
@@ -211,7 +270,7 @@ OperatorNodeBase* PaddingChannelPass::padding_policy( | |||
} else { | |||
mgb_assert(new_in_channels == in_channels); | |||
size_t pad_channels = it->second(in_channels, true); | |||
if (pad_channels > 0) { | |||
if (pad_channels > 0 && !m_only_padding_weights) { | |||
inps[0] = pad_in_channels(new_inp[0], pad_channels); | |||
inps[1] = pad_in_channels(new_inp[1], pad_channels); | |||
} | |||
@@ -220,31 +279,63 @@ OperatorNodeBase* PaddingChannelPass::padding_policy( | |||
size_t pad_channels = it->second(out_channels, true); | |||
if (pad_channels > 0) { | |||
inps[1] = pad_out_channels(inps[1], pad_channels); | |||
inps[2] = pad_in_channels(inps[2], pad_channels); | |||
if (inps.size() >= 3) { | |||
inps[2] = pad_in_channels(inps[2], pad_channels); | |||
} | |||
m_padding_oprs.insert(opr); | |||
} | |||
return serialization::copy_opr_shallow(*opr, inps, opr->config()); | |||
}; | |||
//! padding policy for channel wise convolution | |||
OperatorNodeBase* PaddingChannelPass::padding_channel_wise_conv_policy( | |||
OperatorNodeBase* opr, const VarNodeArray& new_inp) { | |||
mgb_assert(opr->input().size() == new_inp.size()); | |||
mgb_assert(opr->input()[1]->shape().ndim == 5); | |||
mgb_assert(new_inp.size() >= 2); | |||
//! new weights and old weights are same shape | |||
mgb_assert(opr->input(1)->shape().eq_shape(new_inp[1]->shape())); | |||
auto inps = new_inp; | |||
size_t group = opr->input(1)->shape()[0]; | |||
size_t new_in_channels = new_inp[0]->shape()[1]; | |||
auto it = m_alignment_map.find(opr->input(0)->dtype().enumv()); | |||
if (it != m_alignment_map.end()) { | |||
mgb_assert(it->second); | |||
} else { | |||
return serialization::copy_opr_shallow(*opr, inps, opr->config()); | |||
} | |||
// pad input channels | |||
if (m_padding_oprs.count(opr->input(0)->owner_opr())) { | |||
size_t pad_channels_1 = new_in_channels - group; | |||
if (pad_channels_1) { | |||
inps[1] = pad_in_channels(new_inp[1], pad_channels_1); | |||
m_padding_oprs.insert(opr); | |||
} | |||
} | |||
return serialization::copy_opr_shallow(*opr, inps, opr->config()); | |||
}; | |||
void PaddingChannelPass::fill_opr_convert_fun(LayoutTrans layout_trans) { | |||
add_convbias_replace_func(layout_trans); | |||
add_conv_replace_func(layout_trans); | |||
add_conv_backward_data_replace_func(layout_trans); | |||
add_format_aware_opr_replace_func(layout_trans); | |||
add_elemwise_like_opr_replace_func(layout_trans); | |||
add_condition_padding_oprs_replace_func(layout_trans); | |||
add_nonpadding_oprs_replace_func(layout_trans); | |||
} | |||
void PaddingChannelPass::add_convbias_replace_func(LayoutTrans layout_trans) { | |||
void PaddingChannelPass::add_conv_replace_func(LayoutTrans layout_trans) { | |||
if (layout_trans == LayoutTrans::NCHW64) { | |||
m_opr_replace_funcs[opr::ConvBiasForward::typeinfo()] = | |||
[this](OperatorNodeBase* opr, const VarNodeArray& new_inp) { | |||
if (opr->input(0)->dtype().enumv() == DTypeEnum::QuantizedS8) { | |||
return padding_policy(opr, new_inp); | |||
} else if ( | |||
opr->input(0)->dtype().enumv() == DTypeEnum::QuantizedS4 || | |||
opr->input(0)->dtype().enumv() == | |||
DTypeEnum::Quantized4Asymm) { | |||
return padding_policy(opr, new_inp); | |||
mgb_assert( | |||
opr->input()[1]->shape().ndim == 4, | |||
"nchw64 format only support padding channel in dense " | |||
"convolution\n"); | |||
if (opr->input(0)->dtype().enumv() == DTypeEnum::QuantizedS8 || | |||
opr->input(0)->dtype().enumv() == DTypeEnum::QuantizedS4 || | |||
opr->input(0)->dtype().enumv() == DTypeEnum::Quantized4Asymm) { | |||
return padding_conv_policy(opr, new_inp); | |||
} else { | |||
mgb_assert( | |||
m_padding_oprs.count(opr->input(0)->owner_opr()) == 0, | |||
@@ -257,11 +348,36 @@ void PaddingChannelPass::add_convbias_replace_func(LayoutTrans layout_trans) { | |||
*opr, new_inp, opr->config()); | |||
} | |||
}; | |||
} else if (layout_trans == LayoutTrans::NCHW44) { | |||
m_opr_replace_funcs[opr::ConvBiasForward::typeinfo()] = | |||
[this](OperatorNodeBase* opr, const VarNodeArray& new_inp) { | |||
return padding_policy(opr, new_inp); | |||
}; | |||
} else if ( | |||
layout_trans == LayoutTrans::NCHW44 || | |||
layout_trans == LayoutTrans::NCHW44_DOT || | |||
layout_trans == LayoutTrans::NCHW88) { | |||
auto padding_conv = [this](OperatorNodeBase* opr, const VarNodeArray& new_inp) { | |||
if (opr->input()[1]->shape().ndim == 4) { | |||
return padding_conv_policy(opr, new_inp); | |||
} else { | |||
mgb_assert(opr->input()[1]->shape().ndim == 5); | |||
if (opr->input()[1]->shape()[1] == 1 && | |||
opr->input()[1]->shape()[2] == 1) { | |||
return padding_channel_wise_conv_policy(opr, new_inp); | |||
} else { | |||
//! group convolution can't padding channel | |||
mgb_assert(opr->input().size() == new_inp.size()); | |||
auto inps = new_inp; | |||
for (size_t i = 0; i < new_inp.size(); ++i) { | |||
auto cur_inp = opr->input(i); | |||
bool padding_cur_inp = | |||
m_padding_oprs.count(cur_inp->owner_opr()) > 0; | |||
if (padding_cur_inp) { | |||
inps[i] = extract_subtensor(inps[i], cur_inp->shape()); | |||
} | |||
} | |||
return serialization::copy_opr_shallow(*opr, inps, opr->config()); | |||
} | |||
} | |||
}; | |||
m_opr_replace_funcs[opr::ConvBiasForward::typeinfo()] = padding_conv; | |||
m_opr_replace_funcs[opr::Convolution::typeinfo()] = padding_conv; | |||
} | |||
} | |||
@@ -298,7 +414,9 @@ void PaddingChannelPass::add_conv_backward_data_replace_func(LayoutTrans layout_ | |||
size_t pad_channels = new_out_channels - out_channels; | |||
inps[0] = pad_out_channels(new_inp[0], pad_channels); | |||
} else { | |||
size_t pad_channels = it->second(out_channels, false); | |||
size_t pad_channels = m_only_padding_weights | |||
? 0 | |||
: it->second(out_channels, false); | |||
if (pad_channels > 0) { | |||
inps[0] = pad_out_channels(new_inp[0], pad_channels); | |||
inps[1] = pad_in_channels(new_inp[1], pad_channels); | |||
@@ -313,24 +431,43 @@ void PaddingChannelPass::add_conv_backward_data_replace_func(LayoutTrans layout_ | |||
} | |||
return serialization::copy_opr_shallow(*opr, inps, opr->config()); | |||
}; | |||
} else { | |||
m_opr_replace_funcs[opr::ConvolutionBackwardData::typeinfo()] = | |||
[this](OperatorNodeBase* opr, const VarNodeArray& new_inp) { | |||
mgb_assert(opr->input(0)->shape().eq_shape(new_inp[0]->shape())); | |||
auto inps = new_inp; | |||
size_t out_channels = opr->input(0)->shape()[0]; | |||
size_t new_out_channels = new_inp[1]->shape()[1]; | |||
// pad output channels | |||
if (m_padding_oprs.count(opr->input(1)->owner_opr())) { | |||
size_t pad_channels = new_out_channels - out_channels; | |||
inps[0] = pad_out_channels(new_inp[0], pad_channels); | |||
} | |||
out_channels = inps[0]->shape()[0]; | |||
return serialization::copy_opr_shallow(*opr, inps, opr->config()); | |||
}; | |||
} | |||
} | |||
void PaddingChannelPass::add_format_aware_opr_replace_func(LayoutTrans) { | |||
auto replace_format_aware_opr = [this](OperatorNodeBase* opr, | |||
const VarNodeArray& new_inp) { | |||
if (opr->input(0)->dtype().enumv() != DTypeEnum::QuantizedS8 && | |||
opr->input(0)->dtype().enumv() != DTypeEnum::QuantizedS4 && | |||
opr->input(0)->dtype().enumv() != DTypeEnum::Quantized4Asymm) { | |||
mgb_assert( | |||
m_padding_oprs.count(opr->input(0)->owner_opr()) == 0, | |||
"operator(type:%s,name:%s) for data type(%s) cannot be " | |||
"padded channel. extra info:" | |||
"consumer(%s), producer(%s)", | |||
opr->dyn_typeinfo()->name, opr->cname(), | |||
opr->input(0)->dtype().name(), opr->cname(), | |||
opr->input(0)->owner_opr()->cname()); | |||
return serialization::copy_opr_shallow(*opr, new_inp, opr->config()); | |||
void PaddingChannelPass::add_format_aware_opr_replace_func(LayoutTrans layout_trans) { | |||
auto replace_format_aware_opr = [this, layout_trans]( | |||
OperatorNodeBase* opr, | |||
const VarNodeArray& new_inp) { | |||
if (layout_trans == LayoutTrans::NCHW64) { | |||
if (opr->input(0)->dtype().enumv() != DTypeEnum::QuantizedS8 && | |||
opr->input(0)->dtype().enumv() != DTypeEnum::QuantizedS4 && | |||
opr->input(0)->dtype().enumv() != DTypeEnum::Quantized4Asymm) { | |||
mgb_assert( | |||
m_padding_oprs.count(opr->input(0)->owner_opr()) == 0, | |||
"operator(type:%s,name:%s) for data type(%s) cannot be " | |||
"padded channel. extra info:" | |||
"consumer(%s), producer(%s)", | |||
opr->dyn_typeinfo()->name, opr->cname(), | |||
opr->input(0)->dtype().name(), opr->cname(), | |||
opr->input(0)->owner_opr()->cname()); | |||
return serialization::copy_opr_shallow(*opr, new_inp, opr->config()); | |||
} | |||
} | |||
mgb_assert(opr->input().size() == new_inp.size()); | |||
if (m_padding_oprs.count(opr->input(0)->owner_opr())) { | |||
@@ -341,6 +478,9 @@ void PaddingChannelPass::add_format_aware_opr_replace_func(LayoutTrans) { | |||
m_opr_replace_funcs[opr::PoolingForward::typeinfo()] = replace_format_aware_opr; | |||
m_opr_replace_funcs[opr::WarpPerspectiveForward::typeinfo()] = | |||
replace_format_aware_opr; | |||
m_opr_replace_funcs[opr::WarpAffine::typeinfo()] = replace_format_aware_opr; | |||
m_opr_replace_funcs[opr::AdaptivePooling::typeinfo()] = replace_format_aware_opr; | |||
m_opr_replace_funcs[opr::ResizeForward::typeinfo()] = replace_format_aware_opr; | |||
} | |||
void PaddingChannelPass::add_elemwise_like_opr_replace_func(LayoutTrans) { | |||
@@ -353,6 +493,10 @@ void PaddingChannelPass::add_elemwise_like_opr_replace_func(LayoutTrans) { | |||
size_t channels_after_padding = 0; | |||
size_t i = 0; | |||
for (auto&& cur_inp : opr->input()) { | |||
if (cur_inp->shape().is_scalar()) { | |||
++i; | |||
continue; | |||
} | |||
bool padding_cur_inp = m_padding_oprs.count(cur_inp->owner_opr()) > 0; | |||
if (padding_cur_inp) { | |||
if (!have_padding_inp) | |||
@@ -363,8 +507,9 @@ void PaddingChannelPass::add_elemwise_like_opr_replace_func(LayoutTrans) { | |||
same_padding = channels_after_padding == new_inp[i]->shape()[1]; | |||
} | |||
} | |||
if (padding_all_inps && (!padding_cur_inp || !same_padding)) | |||
if (padding_all_inps && (!padding_cur_inp || !same_padding)) { | |||
padding_all_inps = false; | |||
} | |||
++i; | |||
} | |||
if (have_padding_inp && !padding_all_inps) { | |||
@@ -378,7 +523,7 @@ void PaddingChannelPass::add_elemwise_like_opr_replace_func(LayoutTrans) { | |||
} | |||
return serialization::copy_opr_shallow(*opr, inps, opr->config()); | |||
} | |||
if (padding_all_inps) { | |||
if (padding_all_inps && have_padding_inp) { | |||
m_padding_oprs.insert(opr); | |||
} | |||
return serialization::copy_opr_shallow(*opr, new_inp, opr->config()); | |||
@@ -386,6 +531,53 @@ void PaddingChannelPass::add_elemwise_like_opr_replace_func(LayoutTrans) { | |||
m_opr_replace_funcs[opr::ElemwiseMultiType::typeinfo()] = replace_elemwise_like_opr; | |||
m_opr_replace_funcs[opr::Elemwise::typeinfo()] = replace_elemwise_like_opr; | |||
m_opr_replace_funcs[opr::TypeCvt::typeinfo()] = replace_elemwise_like_opr; | |||
m_opr_replace_funcs[opr::PowC::typeinfo()] = replace_elemwise_like_opr; | |||
} | |||
void PaddingChannelPass::add_condition_padding_oprs_replace_func(LayoutTrans) { | |||
auto replace_condition_oprs = [this](OperatorNodeBase* opr, | |||
const VarNodeArray& new_inp) { | |||
mgb_assert(opr->input().size() == new_inp.size()); | |||
bool can_forward_padding = true; | |||
if (auto reduce = opr->try_cast_final<opr::Reduce>()) { | |||
auto axis = reduce->param().axis; | |||
if (axis < 0) { | |||
axis += reduce->input(0)->layout().ndim; | |||
} | |||
//! don't reduce in channel | |||
if (reduce->input().size() > 1) { | |||
can_forward_padding = false; | |||
} else { | |||
can_forward_padding = reduce->param().axis != 1; | |||
} | |||
} else if (auto subtensor = opr->try_cast_final<opr::Subtensor>()) { | |||
auto indexs = subtensor->index_desc(); | |||
size_t input_dim = subtensor->input(0)->shape().ndim; | |||
for (size_t id = 0; id < indexs.size(); id++) { | |||
if (indexs[id].axis.get(input_dim) == 1) { | |||
//! when subtensor perform on channel dim, if is idx mode or | |||
//! end is valid, it can forward without add subtensor | |||
can_forward_padding &= | |||
indexs[id].idx.node() || indexs[id].end.node(); | |||
} | |||
} | |||
} | |||
auto inps = new_inp; | |||
for (size_t i = 0; i < new_inp.size(); ++i) { | |||
auto cur_inp = opr->input(i); | |||
bool padding_cur_inp = m_padding_oprs.count(cur_inp->owner_opr()) > 0; | |||
if (padding_cur_inp) { | |||
if (can_forward_padding) { | |||
m_padding_oprs.insert(opr); | |||
} else { | |||
inps[i] = extract_subtensor(inps[i], cur_inp->shape()); | |||
} | |||
} | |||
} | |||
return serialization::copy_opr_shallow(*opr, inps, opr->config()); | |||
}; | |||
m_opr_replace_funcs[opr::Reduce::typeinfo()] = replace_condition_oprs; | |||
m_opr_replace_funcs[opr::Subtensor::typeinfo()] = replace_condition_oprs; | |||
} | |||
void PaddingChannelPass::add_nonpadding_oprs_replace_func(LayoutTrans) { | |||
@@ -405,8 +597,11 @@ void PaddingChannelPass::add_nonpadding_oprs_replace_func(LayoutTrans) { | |||
m_opr_replace_funcs[opr::Reshape::typeinfo()] = replace_nonpadding_oprs; | |||
m_opr_replace_funcs[opr::GetVarShape::typeinfo()] = replace_nonpadding_oprs; | |||
m_opr_replace_funcs[opr::Concat::typeinfo()] = replace_nonpadding_oprs; | |||
m_opr_replace_funcs[opr::Reduce::typeinfo()] = replace_nonpadding_oprs; | |||
m_opr_replace_funcs[opr::Subtensor::typeinfo()] = replace_nonpadding_oprs; | |||
m_opr_replace_funcs[opr::Dimshuffle::typeinfo()] = replace_nonpadding_oprs; | |||
m_opr_replace_funcs[opr::Argmax::typeinfo()] = replace_nonpadding_oprs; | |||
m_opr_replace_funcs[opr::Argmin::typeinfo()] = replace_nonpadding_oprs; | |||
m_opr_replace_funcs[opr::IncrSubtensor::typeinfo()] = replace_nonpadding_oprs; | |||
m_opr_replace_funcs[opr::AssertEqual::typeinfo()] = replace_nonpadding_oprs; | |||
} | |||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -508,39 +508,48 @@ public: | |||
* assume input network is built in NCHW tensor format | |||
*/ | |||
class PaddingChannelPass final : public Pass { | |||
public: | |||
private: | |||
using ChannelAlignmentMap = | |||
ThinHashMap<DTypeEnum, std::function<size_t(size_t, bool)>>; | |||
using LayoutTrans = cg::GraphCommonOptimizeOptions::LayoutTransform; | |||
using ReplaceFuncs = ThinHashMap< | |||
Typeinfo*, | |||
thin_function<OperatorNodeBase*(OperatorNodeBase*, const VarNodeArray&)>>; | |||
public: | |||
const char* name() const override; | |||
void apply(OptState& opt) const override; | |||
void fill_opr_convert_fun(LayoutTrans layout_trans); | |||
using ReplaceFuncs = ThinHashMap< | |||
Typeinfo*, | |||
thin_function<OperatorNodeBase*(OperatorNodeBase*, const VarNodeArray&)>>; | |||
//! make channel padding opt pass with given tensor format | |||
static std::unique_ptr<PaddingChannelPass> make(LayoutTrans layout_transform); | |||
static std::unique_ptr<PaddingChannelPass> make( | |||
LayoutTrans layout_transform, bool only_padding_weights = false); | |||
private: | |||
PaddingChannelPass(bool only_padding_weights = false) | |||
: m_only_padding_weights(only_padding_weights) {} | |||
VarNode* extract_subtensor(VarNode* inp, const TensorShape& orig_shape) const; | |||
VarNode* pad_in_channels(VarNode* inp, size_t pad_channels); | |||
VarNode* pad_out_channels(VarNode* inp, size_t pad_channels); | |||
OperatorNodeBase* padding_policy( | |||
OperatorNodeBase* padding_conv_policy( | |||
OperatorNodeBase* opr, const VarNodeArray& new_inp); | |||
OperatorNodeBase* padding_channel_wise_conv_policy( | |||
OperatorNodeBase* opr, const VarNodeArray& new_inp); | |||
void add_convbias_replace_func(LayoutTrans layout_transform); | |||
void add_conv_replace_func(LayoutTrans layout_transform); | |||
void add_conv_backward_data_replace_func(LayoutTrans layout_transform); | |||
void add_format_aware_opr_replace_func(LayoutTrans layout_transform); | |||
void add_elemwise_like_opr_replace_func(LayoutTrans layout_transform); | |||
void add_condition_padding_oprs_replace_func(LayoutTrans layout_transform); | |||
void add_nonpadding_oprs_replace_func(LayoutTrans layout_transform); | |||
ChannelAlignmentMap m_alignment_map; | |||
ReplaceFuncs m_opr_replace_funcs; | |||
mutable ThinHashSet<OperatorNodeBase*> m_padding_oprs; | |||
bool m_only_padding_weights; | |||
}; | |||
/*! | |||
@@ -4004,8 +4004,11 @@ TEST(TestGoptInference, ConvertFormatNCHW44) { | |||
ASSERT_EQ( | |||
opr::Convolution::Param::Format::NCHW44, | |||
find_opr<opr::ConvBias>(y_opt, "conv4").param().format); | |||
//! nchw44 add the PaddingChannel pass, it will padding conv5 output | |||
//! channel, so it will convert to nchw44 format | |||
ASSERT_EQ( | |||
opr::Convolution::Param::Format::NCHW, | |||
opr::Convolution::Param::Format::NCHW44, | |||
find_opr<opr::ConvBias>(y_opt, "conv5").param().format); | |||
graph->compile({{y_opt, {}}}) | |||
@@ -4206,7 +4209,6 @@ TEST(TestGoptInference, ConvertFormatNCHW44_DOT) { | |||
auto w1 = mkcvar("w1", {8, 3, 3, 3}), | |||
conv1 = opr::Convolution::make( | |||
x, w1, param_conv, {}, OperatorNodeConfig("conv1")); | |||
printf("create conv1 %s\n", conv1.node()->owner_opr()->dyn_typeinfo()->name); | |||
param_conv.pad_h = param_conv.pad_w = 1; | |||
//! no supported hybrid nchw44 | |||
opr::ConvBias::Param param_conv_bias_pad0; | |||
@@ -4313,8 +4315,10 @@ TEST(TestGoptInference, ConvertFormatNCHW44_DOT) { | |||
ASSERT_EQ( | |||
opr::Convolution::Param::Format::NCHW44, | |||
find_opr<opr::ConvBias>(y_opt, "conv4").param().format); | |||
//! nchw44-dot default add PaddingChannel pass, so it output channel will be | |||
//! padding to times of 4, so it can't be used as nchw44 format. | |||
ASSERT_EQ( | |||
opr::Convolution::Param::Format::NCHW, | |||
opr::Convolution::Param::Format::NCHW44, | |||
find_opr<opr::ConvBias>(y_opt, "conv5").param().format); | |||
graph->compile({{y_opt, {}}}) | |||
@@ -0,0 +1,446 @@ | |||
#include "megbrain/graph/cg.h" | |||
#include "megbrain/opr/dnn/local.h" | |||
#include "megbrain/gopt/basic_arith.h" | |||
#include "megbrain/gopt/gtrans.h" | |||
#include "megbrain/gopt/inference.h" | |||
#include "megbrain/opr/basic_arith_wrapper.h" | |||
#include "megbrain/opr/blas.h" | |||
#include "megbrain/opr/dnn/adaptive_pooling.h" | |||
#include "megbrain/opr/dnn/batch_norm.h" | |||
#include "megbrain/opr/dnn/convolution.h" | |||
#include "megbrain/opr/dnn/pooling.h" | |||
#include "megbrain/opr/imgproc.h" | |||
#include "megbrain/opr/io.h" | |||
#include "megbrain/opr/nn_int.h" | |||
#include "megbrain/opr/tensor_gen.h" | |||
#include "megbrain/opr/tensor_manip.h" | |||
#include "megbrain/opr/utility.h" | |||
#include "helper.h" | |||
#include "megbrain/comp_node_env.h" | |||
#include "megbrain/test/helper.h" | |||
#include "megdnn/tensor_format.h" | |||
#include <random> | |||
#include <vector> | |||
using namespace mgb; | |||
namespace { | |||
//! find first the operator of specific type; raise exception if not found | |||
template <typename T> | |||
T* find_opr(SymbolVar endpoint) { | |||
T* found = nullptr; | |||
auto cb = [&found](cg::OperatorNodeBase* opr) { | |||
if (!found && opr->same_type<T>()) { | |||
found = &opr->cast_final_safe<T>(); | |||
} | |||
}; | |||
cg::DepOprIter{cb}.add(endpoint.node()->owner_opr()); | |||
mgb_assert(found, "not found opr from %s", endpoint.node()->name().c_str()); | |||
return found; | |||
} | |||
template <typename T> | |||
T* find_opr(SymbolVar endpoint, const std::string& node_name) { | |||
T* found = nullptr; | |||
auto cb = [&found, &node_name](cg::OperatorNodeBase* opr) { | |||
if (!found && opr->same_type<T>() && opr->name() == node_name) { | |||
found = &opr->cast_final_safe<T>(); | |||
} | |||
}; | |||
cg::DepOprIter{cb}.add(endpoint.node()->owner_opr()); | |||
mgb_assert( | |||
found, "not found opr %s from %s", node_name.c_str(), | |||
endpoint.node()->name().c_str()); | |||
return found; | |||
} | |||
} // namespace | |||
TEST(TestGoptInference, ChannelPaddingNCHW44) { | |||
HostTensorGenerator<> gen; | |||
auto cn = CompNode::load("cpu0"); | |||
auto graph = ComputingGraph::make(); | |||
graph->options().graph_opt_level = 0; | |||
auto mkcvar = [&](const char* name, const TensorShape& shp) { | |||
return opr::SharedDeviceTensor::make(*graph, *gen(shp, cn)).rename(name); | |||
}; | |||
auto host_x = gen({1, 3, 8, 8}, cn); | |||
auto x = opr::Host2DeviceCopy::make(*graph, host_x); | |||
//! Hybrid nchw44 mode | |||
opr::ConvBias::Param param_conv; | |||
param_conv.pad_h = param_conv.pad_w = 1; | |||
auto w1 = mkcvar("w1", {8, 3, 3, 3}), b1 = mkcvar("w1", {1, 8, 1, 1}), | |||
conv1 = opr::ConvBias::make( | |||
x, w1, b1, param_conv, {}, OperatorNodeConfig("conv1")); | |||
auto w2 = mkcvar("w2", {6, 8, 3, 3}), b2 = mkcvar("b2", {1, 6, 1, 1}), | |||
conv2 = opr::ConvBias::make( | |||
conv1, w2, b2, param_conv, {}, OperatorNodeConfig("conv2")); | |||
auto w3 = mkcvar("w3", {3, 6, 3, 3}), b3 = mkcvar("b3", {1, 3, 1, 1}), | |||
conv3 = opr::ConvBias::make( | |||
conv2, w3, b3, param_conv, {}, OperatorNodeConfig("conv3")); | |||
opr::Convolution::Param param_convolution; | |||
param_convolution.sparse = opr::Convolution::Param::Sparse::GROUP; | |||
//! channel wise convolution | |||
auto w4 = mkcvar("w4", {3, 1, 1, 1, 1}), | |||
conv4 = opr::Convolution::make( | |||
conv3, w4, param_convolution, {}, OperatorNodeConfig("conv4")); | |||
param_convolution.sparse = opr::Convolution::Param::Sparse::DENSE; | |||
auto w5 = mkcvar("w5", {6, 3, 1, 1}), | |||
conv5 = opr::Convolution::make( | |||
conv4, w5, param_convolution, {}, OperatorNodeConfig("conv5")); | |||
//! group convolution | |||
param_convolution.sparse = opr::Convolution::Param::Sparse::GROUP; | |||
auto w6 = mkcvar("w6", {2, 4, 3, 1, 1}), | |||
conv6 = opr::Convolution::make( | |||
conv5, w6, param_convolution, {}, OperatorNodeConfig("conv6")); | |||
param_convolution.sparse = opr::Convolution::Param::Sparse::DENSE; | |||
auto w7 = mkcvar("w7", {3, 8, 1, 1}), | |||
y = opr::Convolution::make( | |||
conv6, w7, param_convolution, {}, OperatorNodeConfig("conv7")); | |||
SymbolVar y_opt; | |||
auto options = gopt::OptimizeForInferenceOptions{}; | |||
options.enable_fuse_conv_bias_nonlinearity(); | |||
options.enable_nchw44(); | |||
unpack_vector(gopt::optimize_for_inference({y}, options), y_opt); | |||
auto conv1_opt = find_opr<opr::ConvBias>(y_opt, "conv1"); | |||
auto conv2_opt = find_opr<opr::ConvBias>(y_opt, "conv2"); | |||
auto conv3_opt = find_opr<opr::ConvBias>(y_opt, "conv3"); | |||
auto conv4_opt = find_opr<opr::Convolution>(y_opt, "conv4"); | |||
auto conv6_opt = find_opr<opr::Convolution>(y_opt, "conv6"); | |||
//! do not padding input tensor | |||
ASSERT_EQ(conv1_opt->input(0)->shape()[1], 3); | |||
ASSERT_EQ(opr::Convolution::Param::Format::NCHW44, conv1_opt->param().format); | |||
//! output tensor padding input tensor | |||
ASSERT_EQ(conv2_opt->input(1)->shape()[0], 2); | |||
ASSERT_EQ(opr::Convolution::Param::Format::NCHW44, conv2_opt->param().format); | |||
ASSERT_EQ(conv3_opt->input(1)->shape()[0], 1); | |||
ASSERT_EQ(opr::Convolution::Param::Format::NCHW44, conv3_opt->param().format); | |||
ASSERT_EQ(conv4_opt->input(1)->shape()[0], 1); | |||
ASSERT_EQ(opr::Convolution::Param::Format::NCHW44, conv4_opt->param().format); | |||
ASSERT_EQ(conv6_opt->input(0)->shape()[1], 6); | |||
ASSERT_EQ(opr::Convolution::Param::Format::NCHW, conv6_opt->param().format); | |||
//! the dst tensor channel must stay unchange | |||
ASSERT_EQ(y_opt.node()->shape()[1], 3); | |||
graph->compile({{y_opt, {}}}) | |||
->to_json() | |||
->writeto_fpath(output_file("TestGoptInference.ChannelPaddingNCHW44.json")); | |||
HostTensorND host_y_opt, host_y; | |||
auto func = graph->compile( | |||
{make_callback_copy(y, host_y), make_callback_copy(y_opt, host_y_opt)}); | |||
func->execute(); | |||
MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-2); | |||
//! test change the input shape | |||
*host_x = *gen({2, 3, 32, 32}, cn); | |||
func->execute(); | |||
MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-2); | |||
} | |||
TEST(TestGoptInference, ChannelPaddingSubtensor) { | |||
HostTensorGenerator<> gen; | |||
auto cn = CompNode::load("cpu0"); | |||
auto graph = ComputingGraph::make(); | |||
graph->options().graph_opt_level = 0; | |||
auto mkcvar = [&](const char* name, const TensorShape& shp) { | |||
return opr::SharedDeviceTensor::make(*graph, *gen(shp, cn)).rename(name); | |||
}; | |||
auto host_x = gen({1, 3, 8, 8}, cn); | |||
auto x = opr::Host2DeviceCopy::make(*graph, host_x); | |||
//! Hybrid nchw44 mode | |||
opr::ConvBias::Param param_conv; | |||
param_conv.pad_h = param_conv.pad_w = 1; | |||
auto w1 = mkcvar("w1", {8, 3, 3, 3}), b1 = mkcvar("w1", {1, 8, 1, 1}), | |||
conv1 = opr::ConvBias::make( | |||
x, w1, b1, param_conv, {}, OperatorNodeConfig("conv1")); | |||
auto w2 = mkcvar("w2", {6, 8, 1, 1}), | |||
conv2 = opr::Convolution::make(conv1, w2, {}, {}, OperatorNodeConfig("conv2")); | |||
using AIdx = opr::indexing::AxisIndexer; | |||
auto sub0 = opr::Subtensor::make( | |||
conv2, | |||
{AIdx::make_interval( | |||
2, conv2.make_scalar(1), conv2.make_scalar(4), | |||
conv2.make_scalar(1))}, | |||
OperatorNodeConfig("sub0")); | |||
auto sub1 = opr::Subtensor::make( | |||
conv2, | |||
{AIdx::make_interval( | |||
1, conv2.make_scalar(1), conv2.make_scalar(2), | |||
conv2.make_scalar(1)), | |||
AIdx::make_interval( | |||
2, conv2.make_scalar(1), conv2.make_scalar(4), | |||
conv2.make_scalar(1))}, | |||
OperatorNodeConfig("sub1")); | |||
auto sub2 = opr::Subtensor::make( | |||
conv2, | |||
{AIdx::make_interval(1, conv2.make_scalar(5), {}, {}), | |||
AIdx::make_interval( | |||
2, conv2.make_scalar(1), conv2.make_scalar(4), | |||
conv2.make_scalar(1))}, | |||
OperatorNodeConfig("sub2")); | |||
auto y_sub = sub0 + sub1 + sub2; | |||
SymbolVar y_pad; | |||
unpack_vector( | |||
gopt::GraphOptimizer{} | |||
.add_pass(gopt::PaddingChannelPass::make( | |||
cg::GraphCommonOptimizeOptions::LayoutTransform::NCHW44, | |||
true)) | |||
.apply({{y_sub}}) | |||
.endpoint_vars(), | |||
y_pad); | |||
auto conv1_opt = find_opr<opr::ConvBias>(y_pad, "conv1"); | |||
auto conv2_opt = find_opr<opr::Convolution>(y_pad, "conv2"); | |||
auto sub0_opt = find_opr<opr::Subtensor>(y_pad, "sub0"); | |||
auto sub1_opt = find_opr<opr::Subtensor>(y_pad, "sub1"); | |||
auto sub2_opt = find_opr<opr::Subtensor>(y_pad, "sub2"); | |||
//! do not padding input tensor | |||
ASSERT_EQ(conv1_opt->input(0)->shape()[1], 3); | |||
//! output tensor padding input tensor | |||
ASSERT_EQ(conv2_opt->input(1)->shape()[0], 8); | |||
ASSERT_EQ(conv2_opt->output(0)->shape()[1], 8); | |||
//! sub0 do not perform on channel dim, so no need to add subtensor | |||
ASSERT_EQ(sub0_opt->input(0)->shape()[1], 8); | |||
//! sub1 perform on channel dim, but end is specific, so no need to add subtensor | |||
ASSERT_EQ(sub1_opt->input(0)->shape()[1], 8); | |||
//! sub1 perform on channel dim, and end is default, so need to add subtensor | |||
ASSERT_EQ(sub2_opt->input(0)->shape()[1], 6); | |||
//! the dst tensor channel must stay unchange | |||
ASSERT_EQ(y_pad.node()->shape()[1], 6); | |||
graph->compile({{y_pad, {}}}) | |||
->to_json() | |||
->writeto_fpath( | |||
output_file("TestGoptInference.ChannelPaddingSubtensor.json")); | |||
HostTensorND host_y_opt, host_y; | |||
auto func = graph->compile( | |||
{make_callback_copy(y_sub, host_y), make_callback_copy(y_pad, host_y_opt)}); | |||
func->execute(); | |||
MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-2); | |||
//! test change the input shape | |||
*host_x = *gen({2, 3, 32, 32}, cn); | |||
func->execute(); | |||
MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-2); | |||
} | |||
TEST(TestGoptInference, ChannelPaddingReduce) { | |||
HostTensorGenerator<> gen; | |||
auto cn = CompNode::load("cpu0"); | |||
auto graph = ComputingGraph::make(); | |||
graph->options().graph_opt_level = 0; | |||
auto mkcvar = [&](const char* name, const TensorShape& shp) { | |||
return opr::SharedDeviceTensor::make(*graph, *gen(shp, cn)).rename(name); | |||
}; | |||
auto host_x = gen({1, 3, 8, 8}, cn); | |||
auto x = opr::Host2DeviceCopy::make(*graph, host_x); | |||
//! Hybrid nchw44 mode | |||
opr::ConvBias::Param param_conv; | |||
param_conv.pad_h = param_conv.pad_w = 1; | |||
auto w1 = mkcvar("w1", {8, 3, 3, 3}), b1 = mkcvar("w1", {1, 8, 1, 1}), | |||
conv1 = opr::ConvBias::make( | |||
x, w1, b1, param_conv, {}, OperatorNodeConfig("conv1")); | |||
auto w2 = mkcvar("w2", {6, 8, 1, 1}), | |||
conv2 = opr::Convolution::make(conv1, w2, {}, {}, OperatorNodeConfig("conv2")); | |||
auto reduce0 = opr::Reduce::make( | |||
conv2, {opr::Reduce::Mode::MAX, 1}, {}, OperatorNodeConfig("reduce0")); | |||
auto reduce1 = opr::Reduce::make( | |||
conv2, {opr::Reduce::Mode::MAX, 2}, {}, OperatorNodeConfig("reduce1")); | |||
auto y_reduce = reduce0 + reduce1; | |||
SymbolVar y_pad; | |||
unpack_vector( | |||
gopt::GraphOptimizer{} | |||
.add_pass(gopt::PaddingChannelPass::make( | |||
cg::GraphCommonOptimizeOptions::LayoutTransform::NCHW44, | |||
true)) | |||
.apply({{y_reduce}}) | |||
.endpoint_vars(), | |||
y_pad); | |||
auto conv1_opt = find_opr<opr::ConvBias>(y_pad, "conv1"); | |||
auto conv2_opt = find_opr<opr::Convolution>(y_pad, "conv2"); | |||
auto reduce0_opt = find_opr<opr::Reduce>(y_pad, "reduce0"); | |||
auto reduce1_opt = find_opr<opr::Reduce>(y_pad, "reduce1"); | |||
//! do not padding input tensor | |||
ASSERT_EQ(conv1_opt->input(0)->shape()[1], 3); | |||
//! output tensor padding input tensor | |||
ASSERT_EQ(conv2_opt->input(1)->shape()[0], 8); | |||
ASSERT_EQ(conv2_opt->output(0)->shape()[1], 8); | |||
//! reduce0 perform on channel dim, so need to add subtensor | |||
ASSERT_EQ(reduce0_opt->input(0)->shape()[1], 6); | |||
//! reduce1 don't perform on channel dim, so no need to add subtensor | |||
ASSERT_EQ(reduce1_opt->input(0)->shape()[1], 8); | |||
//! the dst tensor channel must stay unchange | |||
ASSERT_EQ(y_pad.node()->shape()[1], 6); | |||
graph->compile({{y_pad, {}}}) | |||
->to_json() | |||
->writeto_fpath(output_file("TestGoptInference.ChannelPaddingReduce.json")); | |||
HostTensorND host_y_opt, host_y; | |||
auto func = graph->compile( | |||
{make_callback_copy(y_reduce, host_y), | |||
make_callback_copy(y_pad, host_y_opt)}); | |||
func->execute(); | |||
MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-2); | |||
//! test change the input shape | |||
*host_x = *gen({2, 3, 32, 32}, cn); | |||
func->execute(); | |||
MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-2); | |||
} | |||
TEST(TestGoptInference, ChannelPaddingMisc) { | |||
HostTensorGenerator<> gen; | |||
auto cn = CompNode::load("cpu0"); | |||
auto graph = ComputingGraph::make(); | |||
graph->options().graph_opt_level = 0; | |||
auto mkcvar = [&](const char* name, const TensorShape& shp) { | |||
return opr::SharedDeviceTensor::make(*graph, *gen(shp, cn)).rename(name); | |||
}; | |||
auto host_x = gen({1, 3, 8, 8}, cn); | |||
auto x = opr::Host2DeviceCopy::make(*graph, host_x); | |||
//! Hybrid nchw44 mode | |||
opr::ConvBias::Param param_conv; | |||
param_conv.pad_h = param_conv.pad_w = 1; | |||
auto w1 = mkcvar("w1", {8, 3, 3, 3}), b1 = mkcvar("w1", {1, 8, 1, 1}), | |||
conv1 = opr::ConvBias::make( | |||
x, w1, b1, param_conv, {}, OperatorNodeConfig("conv1")); | |||
auto w2 = mkcvar("w2", {6, 8, 1, 1}), | |||
conv2 = opr::Convolution::make(conv1, w2, {}, {}, OperatorNodeConfig("conv2")); | |||
auto elem0 = conv2 + 1; | |||
auto concat = opr::Concat::make({elem0, conv2}, 1, OperatorNodeConfig("concat")); | |||
SymbolVar y_pad; | |||
unpack_vector( | |||
gopt::GraphOptimizer{} | |||
.add_pass(gopt::PaddingChannelPass::make( | |||
cg::GraphCommonOptimizeOptions::LayoutTransform::NCHW44, | |||
true)) | |||
.apply({{concat}}) | |||
.endpoint_vars(), | |||
y_pad); | |||
auto conv1_opt = find_opr<opr::ConvBias>(y_pad, "conv1"); | |||
auto conv2_opt = find_opr<opr::Convolution>(y_pad, "conv2"); | |||
auto elemwise0_opt = find_opr<opr::Elemwise>(y_pad); | |||
auto concat_opt = find_opr<opr::Concat>(y_pad, "concat"); | |||
//! do not padding input tensor | |||
ASSERT_EQ(conv1_opt->input(0)->shape()[1], 3); | |||
//! output tensor padding input tensor | |||
ASSERT_EQ(conv2_opt->input(1)->shape()[0], 8); | |||
ASSERT_EQ(conv2_opt->output(0)->shape()[1], 8); | |||
ASSERT_EQ(elemwise0_opt->output(0)->shape()[1], 8); | |||
ASSERT_EQ(concat_opt->input(0)->shape()[1], 6); | |||
ASSERT_EQ(concat_opt->input(1)->shape()[1], 6); | |||
//! the dst tensor channel must stay unchange | |||
ASSERT_EQ(y_pad.node()->shape()[1], 12); | |||
graph->compile({{y_pad, {}}}) | |||
->to_json() | |||
->writeto_fpath(output_file("TestGoptInference.ChannelPaddingMisc.json")); | |||
HostTensorND host_y_opt, host_y; | |||
auto func = graph->compile( | |||
{make_callback_copy(concat, host_y), | |||
make_callback_copy(y_pad, host_y_opt)}); | |||
func->execute(); | |||
MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-2); | |||
//! test change the input shape | |||
*host_x = *gen({2, 3, 32, 32}, cn); | |||
func->execute(); | |||
MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-2); | |||
} | |||
TEST(TestGoptInference, ChannelPaddingMoreOp) { | |||
HostTensorGenerator<> gen; | |||
auto cn = CompNode::load("cpu0"); | |||
auto graph = ComputingGraph::make(); | |||
auto mkvar = [&](const char* name, const TensorShape& shp) { | |||
return opr::Host2DeviceCopy::make(*graph, gen(shp, cn)).rename(name); | |||
}; | |||
auto mkcvar = [&](const char* name, const TensorShape& shp) { | |||
return opr::SharedDeviceTensor::make(*graph, *gen(shp, cn)).rename(name); | |||
}; | |||
auto host_x = gen({2, 3, 8, 8}, cn); | |||
auto x = opr::Host2DeviceCopy::make(*graph, host_x); | |||
opr::Convolution::Param param; | |||
param.pad_h = param.pad_w = 1; | |||
auto w1 = mkcvar("w1", {6, 3, 3, 3}), conv = opr::Convolution::make(x, w1, param); | |||
auto shape_of = opr::GetVarShape::make(conv); | |||
auto subtensor = opr::Subtensor::make( | |||
shape_of, {opr::Subtensor::AxisIndexer::make_interval( | |||
0, x.make_scalar(2), None, x.make_scalar(1))}); | |||
opr::Resize::Param param_resize; | |||
param_resize.format = opr::Resize::Param::Format::NCHW; | |||
auto resize = opr::ResizeForward::make(conv, subtensor * 2, param_resize); | |||
auto mat = mkcvar("mat", {2, 3, 3}), | |||
warp = opr::WarpPerspectiveForward::make( | |||
resize, mat, nullptr, cg::var_from_tensor_shape(x, {4, 4})); | |||
auto b = mkvar("b", {1, 6, 1, 1}), | |||
elem = opr::Elemwise::make({warp + b}, opr::Elemwise::Param::Mode::RELU); | |||
param.pad_h = param.pad_w = 1; | |||
auto w2 = mkcvar("w2", {7, 6, 3, 3}), y = opr::Convolution::make(elem, w2, param), | |||
z = opr::AxisAddRemove::make(y, {opr::AxisAddRemove::AxisDesc::make_add(0)}); | |||
SymbolVar y_pad, z_pad; | |||
unpack_vector( | |||
gopt::GraphOptimizer{} | |||
.add_pass(gopt::PaddingChannelPass::make( | |||
cg::GraphCommonOptimizeOptions::LayoutTransform::NCHW44, | |||
true)) | |||
.apply({{y}}) | |||
.endpoint_vars(), | |||
y_pad); | |||
unpack_vector( | |||
gopt::GraphOptimizer{} | |||
.add_pass(gopt::PaddingChannelPass::make( | |||
cg::GraphCommonOptimizeOptions::LayoutTransform::NCHW44, | |||
true)) | |||
.apply({{z}}) | |||
.endpoint_vars(), | |||
z_pad); | |||
graph->compile({{y_pad, {}}}) | |||
->to_json() | |||
->writeto_fpath(output_file("TestGoptInference.ChannelPaddingMoreOp.json")); | |||
HostTensorND host_y_opt, host_y; | |||
auto func = graph->compile( | |||
{make_callback_copy(y, host_y), make_callback_copy(y_pad, host_y_opt)}); | |||
func->execute(); | |||
MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-3); | |||
*host_x = *gen({2, 3, 16, 16}, cn); | |||
func->execute(); | |||
MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-3); | |||
} | |||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |