GitOrigin-RevId: d2e1e14d41
release-1.1
@@ -378,15 +378,15 @@ public: | |||
_megdnn_workspace workspace) = 0; | |||
/** | |||
* \brief execute weight preprocessing, read weights form filter and write | |||
* to preprocessed_filter after preprocessed. | |||
* \brief execute weight preprocessing, read weights form filter and bias, | |||
* write to preprocessed_filter after preprocessed. | |||
* | |||
* \praram[in] workspace the needed tmp workspace when exec_preprocess | |||
* running, the size is got by get_preprocess_workspace_in_bytes | |||
*/ | |||
virtual void exec_preprocess(const TensorLayout& src_layout, | |||
_megdnn_tensor_in filter, | |||
const TensorLayout& bias_layout, | |||
_megdnn_tensor_in bias, | |||
const TensorLayout& z_layout, | |||
const TensorLayout& dst_layout, | |||
PreprocessedFilter* preprocessed_filter, | |||
@@ -238,11 +238,11 @@ ConvBiasForwardImpl::deduce_preprocessed_filter_layout( | |||
void ConvBiasForwardImpl::exec_preprocess( | |||
const TensorLayout& src_layout, _megdnn_tensor_in filter, | |||
const TensorLayout& bias_layout, const TensorLayout& z_layout, | |||
_megdnn_tensor_in bias, const TensorLayout& z_layout, | |||
const TensorLayout& dst_layout, PreprocessedFilter* preprocessed_filter, | |||
_megdnn_workspace workspace) { | |||
TensorND src{nullptr, src_layout}, dst{nullptr, dst_layout}, | |||
z{nullptr, z_layout}, bias{nullptr, bias_layout}; | |||
z{nullptr, z_layout}; | |||
AlgoBase::ExecArgs args(this, src, filter, bias, z, dst, workspace, | |||
preprocessed_filter); | |||
auto algo = get_algorithm(this, src.layout, filter.layout, bias.layout, | |||
@@ -49,7 +49,7 @@ public: | |||
const TensorLayout&, const TensorLayout&, const TensorLayout&, | |||
const TensorLayout&, const TensorLayout&) override; | |||
void exec_preprocess(const TensorLayout&, _megdnn_tensor_in, | |||
const TensorLayout&, const TensorLayout&, | |||
_megdnn_tensor_in, const TensorLayout&, | |||
const TensorLayout&, PreprocessedFilter*, | |||
_megdnn_workspace) override; | |||
const char* get_algorithm_set_name() const override; | |||
@@ -178,15 +178,14 @@ void ConvBiasImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, | |||
void ConvBiasImpl::exec_preprocess(const TensorLayout& src_layout, | |||
_megdnn_tensor_in filter, | |||
const TensorLayout& bias_layout, | |||
_megdnn_tensor_in bias, | |||
const TensorLayout& z_layout, | |||
const TensorLayout& dst_layout, | |||
PreprocessedFilter* preprocessed_filter, | |||
_megdnn_workspace workspace) { | |||
//! exec_preprocess currently only support preprocess weights before exec, | |||
//! src/dst/bias/z will be ignored, just set to nullptr | |||
TensorND src{nullptr, src_layout}, dst{nullptr, dst_layout}, | |||
bias{nullptr, bias_layout}; | |||
//! exec_preprocess currently only support preprocess weights and bias | |||
//! before exec, src/dst/z will be ignored, just set to nullptr | |||
TensorND src{nullptr, src_layout}, dst{nullptr, dst_layout}; | |||
auto fparam = make_ncb_kern_param(src, filter, bias, dst, workspace, | |||
preprocessed_filter); | |||
//! should not pass workspace_size limit otherwise can not find match algo | |||
@@ -196,7 +195,7 @@ void ConvBiasImpl::exec_preprocess(const TensorLayout& src_layout, | |||
exec_preprocess_with_ncb_kern(fparam, algo); | |||
} else { | |||
naive::ConvBiasForwardImpl::exec_preprocess( | |||
src_layout, filter, bias_layout, z_layout, dst_layout, | |||
src_layout, filter, bias, z_layout, dst_layout, | |||
preprocessed_filter, workspace); | |||
} | |||
} | |||
@@ -57,7 +57,7 @@ public: | |||
void exec_preprocess(const TensorLayout& src_layout, | |||
_megdnn_tensor_in filter, | |||
const TensorLayout& bias_layout, | |||
_megdnn_tensor_in bias, | |||
const TensorLayout& z_layout, | |||
const TensorLayout& dst_layout, | |||
PreprocessedFilter* preprocessed_filter, | |||
@@ -59,7 +59,7 @@ public: | |||
} | |||
void exec_preprocess(const TensorLayout&, _megdnn_tensor_in, | |||
const TensorLayout&, const TensorLayout&, | |||
_megdnn_tensor_in, const TensorLayout&, | |||
const TensorLayout&, PreprocessedFilter*, | |||
_megdnn_workspace) override {} | |||
@@ -601,7 +601,7 @@ struct OprWeightPreprocessProxy<ConvBiasForward> | |||
tensors[3].layout, tensors[4].layout); | |||
WorkspaceWrapper preprocess_workspace(opr->handle(), | |||
preprocess_workspace_size); | |||
opr->exec_preprocess(tensors[0].layout, tensors[1], tensors[2].layout, | |||
opr->exec_preprocess(tensors[0].layout, tensors[1], tensors[2], | |||
tensors[3].layout, tensors[4].layout, | |||
&preprocessed_filter, | |||
preprocess_workspace.workspace()); | |||
@@ -1955,6 +1955,39 @@ typename DnnOp::Algorithm* try_find_any_weight_preprocess_algo( | |||
return nullptr; | |||
} | |||
template <typename DnnOp, typename... Args> | |||
typename DnnOp::Algorithm* try_find_any_bias_preprocess_algo( | |||
DnnOp* dnn_op, const char* mgb_info, Maybe<bool>& found, | |||
Args&& ...args) { | |||
if (found.valid()) { | |||
if (found.val()) { | |||
return dnn_op->execution_policy().algorithm; | |||
} else { | |||
return nullptr; | |||
} | |||
} | |||
for (auto&& algo : dnn_op->get_all_algorithms( | |||
std::forward<Args>(args)...)) { | |||
dnn_op->execution_policy().algorithm = algo; | |||
auto layouts = dnn_op->deduce_preprocessed_filter_layout( | |||
std::forward<Args>(args)...); | |||
if (layouts.size() <= 1) | |||
continue; | |||
bool valid = false; | |||
if (!layouts[1].is_empty()) { | |||
valid = true; | |||
break; | |||
} | |||
if (valid) { | |||
found.emplace(true); | |||
return algo; | |||
} | |||
} | |||
found.emplace(false); | |||
mgb_log_warn("Can't find bias preprocess algo for op %s", mgb_info); | |||
return nullptr; | |||
} | |||
void test_free_memory_in_weight_preprocess(int record_level, CompNode cn) { | |||
HostTensorGenerator<> gen; | |||
auto graph = ComputingGraph::make(); | |||
@@ -2152,4 +2185,54 @@ TEST(TestGraph, FreeMemoryInWeightPreprocessWithMultiReader) { | |||
.empty()); | |||
} | |||
TEST(TestGraph, FreeBias) { | |||
HostTensorGenerator<> gen; | |||
auto graph = ComputingGraph::make(); | |||
auto cn = CompNode::load("xpu0"); | |||
graph->options().graph_opt.weight_preprocess = true; | |||
auto mkvar = [&](const char* name, const TensorShape& shp) { | |||
return opr::Host2DeviceCopy::make(*graph, gen(shp, cn)).rename(name); | |||
}; | |||
auto mkcvar = [&](const char* name, const TensorShape& shp) { | |||
return opr::SharedDeviceTensor::make_const(*graph, *gen(shp, cn)) | |||
.rename(name); | |||
}; | |||
auto x = mkvar("x", {1, 32, 16, 16}); | |||
// ConvBias test dense | |||
opr::ConvBias::Param param_conv_bias; | |||
param_conv_bias.pad_h = param_conv_bias.pad_w = 0; | |||
param_conv_bias.sparse = opr::ConvBias::Param::Sparse::DENSE; | |||
auto w1 = mkcvar("w1", {32, 32, 1, 1}), b1 = mkcvar("b1", {1, 32, 1, 1}); | |||
auto conv1 = opr::ConvBias::make(x, w1, b1, param_conv_bias); | |||
Maybe<bool> wp1; | |||
conv1.node()->owner_opr()->cast_final_safe<opr::ConvBias>() | |||
.setup_algo_chooser([&](const cg::OperatorNodeBase* opr) { | |||
return try_find_any_bias_preprocess_algo( | |||
opr->cast_final_safe<opr::ConvBias>().megdnn_opr(), | |||
opr->cname(), wp1, | |||
opr->input(0)->layout(), opr->input(1)->layout(), | |||
opr->input(2)->layout(), TensorLayout{}, | |||
opr->output(0)->layout()); | |||
}); | |||
HostTensorND host_y; | |||
auto func =graph->compile({make_callback_copy(conv1, host_y)}); | |||
//!flag the no need memory of var | |||
func->execute(); | |||
//!free the no need memory of var | |||
func->execute(); | |||
auto check = [&](SymbolVar v) { | |||
ASSERT_TRUE(v.node()->contain_flag(VarNode::Flag::MEMORY_NO_NEED)); | |||
ASSERT_TRUE(v.node()->dev_tensor().empty()); | |||
ASSERT_TRUE(v.node()->owner_opr() | |||
->cast_final_safe<opr::SharedDeviceTensor>() | |||
.get_dev_tensor() | |||
.empty()); | |||
}; | |||
ASSERT_TRUE(wp1.valid()); | |||
if (wp1.val()) { | |||
check(b1); | |||
} | |||
} | |||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -961,17 +961,37 @@ void ConvBiasForward::scn_do_execute_preprocess() { | |||
z_layout = input(3)->layout(); | |||
} | |||
megdnn_opr()->exec_preprocess( | |||
input(0)->layout(), input(1)->dev_tensor().as_megdnn(), bias_layout, | |||
z_layout, output(0)->layout(), preprocessed_filter(), | |||
input(0)->layout(), input(1)->dev_tensor().as_megdnn(), | |||
input(2)->dev_tensor().as_megdnn(), z_layout, output(0)->layout(), | |||
preprocessed_filter(), | |||
intl::get_megdnn_workspace_from_var(output().back())); | |||
//! Flag the input(1) no use later, which can be freed when no other | |||
//! Flag the weight and bias no use later, which can be freed when no other | |||
//! var depend on its dev_value, host_value and shape. | |||
auto receiver_info = | |||
auto receiver_info_weight = | |||
input(1)->owner_graph()->var_receiver_in_current_comp_seq(input(1)); | |||
if (receiver_info.dev_value == 1 && receiver_info.host_value == 0 && | |||
receiver_info.shape == 0) { | |||
if (receiver_info_weight.dev_value == 1 && | |||
receiver_info_weight.host_value == 0 && | |||
receiver_info_weight.shape == 0) { | |||
input(1)->add_flag(VarNode::Flag::MEMORY_NO_NEED); | |||
} | |||
//! if bias is preprocessd | |||
if (input().size() > 3) { | |||
auto preprocessed_layouts = | |||
megdnn_opr()->deduce_preprocessed_filter_layout( | |||
input(0)->layout(), input(1)->layout(), bias_layout, | |||
z_layout, output(0)->layout()); | |||
if (preprocessed_layouts.size() > 1 && | |||
!preprocessed_layouts[1].is_empty()) { | |||
auto receiver_info_bias = | |||
input(2)->owner_graph()->var_receiver_in_current_comp_seq( | |||
input(2)); | |||
if (receiver_info_bias.dev_value == 1 && | |||
receiver_info_bias.host_value == 0 && | |||
receiver_info_bias.shape == 0) { | |||
input(2)->add_flag(VarNode::Flag::MEMORY_NO_NEED); | |||
} | |||
} | |||
} | |||
} | |||
/* ===================== LocalShareForward ==================== */ | |||
@@ -178,9 +178,26 @@ typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl( | |||
for (size_t i = 0; i < flt_val.size(); i++) { | |||
pf.tensors[i] = flt_val[i].as_megdnn(); | |||
} | |||
APPLY(_(megdnn_opr)->exec_preprocess(args..., &pf, mdn_workspace), | |||
std::forward_as_tuple(layouts[0], inp_val[1].as_megdnn()), | |||
array_skip<2>(layouts)); | |||
if_constexpr<opr_contain_bias<Opr>()>( | |||
//! convbias | |||
[&](auto __) { | |||
APPLY(__(megdnn_opr) | |||
->exec_preprocess(args..., &pf, | |||
mdn_workspace), | |||
std::forward_as_tuple(layouts[0], | |||
inp_val[1].as_megdnn(), | |||
inp_val[2].as_megdnn()), | |||
array_skip<arity_in - 1>(layouts)); | |||
}, | |||
//! Convolution | |||
[&](auto __) { | |||
APPLY(__(megdnn_opr) | |||
->exec_preprocess(args..., &pf, | |||
mdn_workspace), | |||
std::forward_as_tuple(layouts[0], | |||
inp_val[1].as_megdnn()), | |||
array_skip<2>(layouts)); | |||
}); | |||
} | |||
}); | |||
@@ -75,6 +75,11 @@ constexpr bool opr_supports_preprocess() { | |||
std::is_same<Opr, megdnn::ConvBias>::value; | |||
} | |||
template <typename Opr> | |||
constexpr bool opr_contain_bias() { | |||
return std::is_same<Opr, megdnn::ConvBias>::value; | |||
} | |||
template <typename Opr, bool has_prep> | |||
struct PreprocessFilterImpl { | |||
using T = union {}; | |||