GitOrigin-RevId: 603d2eb94a
tags/v1.0.0-rc1
@@ -444,91 +444,6 @@ void handle_bias_and_nonlinear(Handle* handle, param::ConvBias args, | |||||
} | } | ||||
} | } | ||||
//! Only used for naive implementation. DO NOT use the following function in | |||||
//! other backends. | |||||
void handle_z_inp_and_activation_naive( | |||||
param::ConvBias::NonlineMode nonline_mode, | |||||
const TensorND& conv_bias_tensor, const TensorND& z_tensor, | |||||
const TensorND& dst_tensor, dt_byte* workspace_ptr) { | |||||
auto res = dst_tensor, z_float = z_tensor; | |||||
//!create naive inplace handle | |||||
auto handle = inplace_cpu_handle(2); | |||||
if (z_tensor.layout.ndim > 0 && | |||||
z_tensor.layout.dtype.category() != DTypeCategory::FLOAT) { | |||||
dt_byte *res_float_workspace_ptr = nullptr, | |||||
*z_float_workspace_ptr = nullptr; | |||||
megdnn_assert(z_tensor.layout.eq_shape(dst_tensor.layout)); | |||||
res_float_workspace_ptr = workspace_ptr; | |||||
z_float_workspace_ptr = res_float_workspace_ptr + | |||||
TensorLayout{z_tensor.layout, dtype::Float32()} | |||||
.span() | |||||
.dist_byte(); | |||||
res = TensorND{res_float_workspace_ptr, | |||||
TensorLayout{dst_tensor.layout, dtype::Float32()}}; | |||||
z_float = TensorND{z_float_workspace_ptr, | |||||
TensorLayout{z_tensor.layout, dtype::Float32()}}; | |||||
} | |||||
// ====================sfb + z_tensor===================== | |||||
if (z_tensor.layout.ndim > 0) { | |||||
if (z_tensor.layout.dtype.category() != DTypeCategory::FLOAT) { | |||||
auto&& type_cvt = handle->create_operator<TypeCvt>(); | |||||
type_cvt->exec(conv_bias_tensor, res); | |||||
type_cvt->exec(z_tensor, z_float); | |||||
} | |||||
auto add_opr = handle->create_operator<ElemwiseForward>(); | |||||
add_opr->param().mode = Elemwise::Param::Mode::ADD; | |||||
add_opr->exec({res, z_float}, res); | |||||
} else { | |||||
res = conv_bias_tensor; | |||||
} | |||||
using NonlineMode = param::ConvBias::NonlineMode; | |||||
switch (nonline_mode) { | |||||
#define cb(_mode) \ | |||||
case NonlineMode::_mode: { \ | |||||
if (res.layout.dtype.category() != DTypeCategory::QUANTIZED) { \ | |||||
auto nonlinear = handle->create_operator<ElemwiseForward>(); \ | |||||
nonlinear->param().mode = Elemwise::Param::Mode::_mode; \ | |||||
if (res.layout.dtype == dst_tensor.layout.dtype) { \ | |||||
nonlinear->exec({res}, dst_tensor); \ | |||||
} else { \ | |||||
nonlinear->exec({res}, res); \ | |||||
handle->create_operator<TypeCvt>()->exec(res, dst_tensor); \ | |||||
} \ | |||||
} else { \ | |||||
auto nonlinear = handle->create_operator<ElemwiseMultiType>(); \ | |||||
nonlinear->param().mode = \ | |||||
ElemwiseMultiType::Param::Mode::Q##_mode; \ | |||||
nonlinear->exec({res}, dst_tensor); \ | |||||
} \ | |||||
break; \ | |||||
} | |||||
cb(RELU); | |||||
cb(H_SWISH); | |||||
#undef cb | |||||
case NonlineMode::SIGMOID: { | |||||
megdnn_assert(res.layout.dtype.category() != | |||||
DTypeCategory::QUANTIZED); | |||||
auto nonlinear = handle->create_operator<ElemwiseForward>(); | |||||
nonlinear->param().mode = Elemwise::Param::Mode::SIGMOID; | |||||
nonlinear->exec({res}, res); | |||||
if (res.raw_ptr != dst_tensor.raw_ptr) { | |||||
handle->create_operator<TypeCvt>()->exec(res, dst_tensor); | |||||
} | |||||
break; | |||||
} | |||||
case NonlineMode::IDENTITY: { | |||||
if (res.raw_ptr != dst_tensor.raw_ptr) { | |||||
handle->create_operator<TypeCvt>()->exec(res, dst_tensor); | |||||
} | |||||
break; | |||||
} | |||||
default: | |||||
megdnn_assert(false); | |||||
} | |||||
} | |||||
} // namespace megdnn | } // namespace megdnn | ||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -21,12 +21,6 @@ void handle_bias_and_nonlinear(Handle* handle, param::ConvBias args, | |||||
const TensorND* conv_dst_tensor, | const TensorND* conv_dst_tensor, | ||||
const TensorND* dst_tensor, | const TensorND* dst_tensor, | ||||
const TensorND* bias_tensor); | const TensorND* bias_tensor); | ||||
void handle_z_inp_and_activation_naive( | |||||
param::ConvBias::NonlineMode nonline_mode, | |||||
const TensorND& conv_bias_tensor, const TensorND& z_tensor, | |||||
const TensorND& dst_tensor, dt_byte* workspace_ptr); | |||||
} // namespace megdnn | } // namespace megdnn | ||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -204,6 +204,18 @@ ConvBiasImpl::AlgoConv1x1::dispatch_preprocess_kerns( | |||||
bool ConvBiasImpl::AlgoConv1x1::usable(const NCBKernSizeParam& param, | bool ConvBiasImpl::AlgoConv1x1::usable(const NCBKernSizeParam& param, | ||||
AlgoSelectionStrategy) const { | AlgoSelectionStrategy) const { | ||||
MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 2) { | MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 2) { | ||||
//! x86 only support nchw | |||||
#if MEGDNN_X86 | |||||
if (param.filter_meta.format != param::ConvBias::Format::NCHW) { | |||||
return false; | |||||
} | |||||
#else | |||||
if (param.filter_meta.format != param::ConvBias::Format::NCHW && | |||||
param.filter_meta.format != param::ConvBias::Format::NCHW44 && | |||||
param.filter_meta.format != param::ConvBias::Format::NCHW44_DOT) { | |||||
return false; | |||||
} | |||||
#endif | |||||
size_t FH = param.filter_meta.spatial[0], | size_t FH = param.filter_meta.spatial[0], | ||||
FW = param.filter_meta.spatial[1]; | FW = param.filter_meta.spatial[1]; | ||||
size_t PH = param.filter_meta.padding[0], | size_t PH = param.filter_meta.padding[0], | ||||
@@ -17,6 +17,7 @@ | |||||
#include "src/naive/handle.h" | #include "src/naive/handle.h" | ||||
#include "src/naive/lowbit_utils.h" | #include "src/naive/lowbit_utils.h" | ||||
#include "src/common/conv_bias.h" | #include "src/common/conv_bias.h" | ||||
#include "src/common/opr_delegate.h" | |||||
#include "midout.h" | #include "midout.h" | ||||
MIDOUT_DECL(megdnn_naive_conv_bias_fwd) | MIDOUT_DECL(megdnn_naive_conv_bias_fwd) | ||||
@@ -24,6 +25,91 @@ MIDOUT_DECL(megdnn_naive_conv_bias_fwd) | |||||
namespace megdnn { | namespace megdnn { | ||||
namespace naive { | namespace naive { | ||||
//! Only used for naive implementation. DO NOT use the following function in | |||||
//! other backends. | |||||
void handle_z_inp_and_activation_naive( | |||||
param::ConvBias::NonlineMode nonline_mode, | |||||
const TensorND& conv_bias_tensor, const TensorND& z_tensor, | |||||
const TensorND& dst_tensor, dt_byte* workspace_ptr) { | |||||
auto res = dst_tensor, z_float = z_tensor; | |||||
//!create naive inplace handle | |||||
auto handle = inplace_cpu_handle(2); | |||||
if (z_tensor.layout.ndim > 0 && | |||||
z_tensor.layout.dtype.category() != DTypeCategory::FLOAT) { | |||||
dt_byte *res_float_workspace_ptr = nullptr, | |||||
*z_float_workspace_ptr = nullptr; | |||||
megdnn_assert(z_tensor.layout.eq_shape(dst_tensor.layout)); | |||||
res_float_workspace_ptr = workspace_ptr; | |||||
z_float_workspace_ptr = res_float_workspace_ptr + | |||||
TensorLayout{z_tensor.layout, dtype::Float32()} | |||||
.span() | |||||
.dist_byte(); | |||||
res = TensorND{res_float_workspace_ptr, | |||||
TensorLayout{dst_tensor.layout, dtype::Float32()}}; | |||||
z_float = TensorND{z_float_workspace_ptr, | |||||
TensorLayout{z_tensor.layout, dtype::Float32()}}; | |||||
} | |||||
// ====================sfb + z_tensor===================== | |||||
if (z_tensor.layout.ndim > 0) { | |||||
if (z_tensor.layout.dtype.category() != DTypeCategory::FLOAT) { | |||||
auto&& type_cvt = handle->create_operator<TypeCvt>(); | |||||
type_cvt->exec(conv_bias_tensor, res); | |||||
type_cvt->exec(z_tensor, z_float); | |||||
} | |||||
auto add_opr = handle->create_operator<ElemwiseForward>(); | |||||
add_opr->param().mode = Elemwise::Param::Mode::ADD; | |||||
add_opr->exec({res, z_float}, res); | |||||
} else { | |||||
res = conv_bias_tensor; | |||||
} | |||||
using NonlineMode = param::ConvBias::NonlineMode; | |||||
switch (nonline_mode) { | |||||
#define cb(_mode) \ | |||||
case NonlineMode::_mode: { \ | |||||
if (res.layout.dtype.category() != DTypeCategory::QUANTIZED) { \ | |||||
auto nonlinear = handle->create_operator<ElemwiseForward>(); \ | |||||
nonlinear->param().mode = Elemwise::Param::Mode::_mode; \ | |||||
if (res.layout.dtype == dst_tensor.layout.dtype) { \ | |||||
nonlinear->exec({res}, dst_tensor); \ | |||||
} else { \ | |||||
nonlinear->exec({res}, res); \ | |||||
handle->create_operator<TypeCvt>()->exec(res, dst_tensor); \ | |||||
} \ | |||||
} else { \ | |||||
auto nonlinear = handle->create_operator<ElemwiseMultiType>(); \ | |||||
nonlinear->param().mode = \ | |||||
ElemwiseMultiType::Param::Mode::Q##_mode; \ | |||||
nonlinear->exec({res}, dst_tensor); \ | |||||
} \ | |||||
break; \ | |||||
} | |||||
cb(RELU); | |||||
cb(H_SWISH); | |||||
#undef cb | |||||
case NonlineMode::SIGMOID: { | |||||
megdnn_assert(res.layout.dtype.category() != | |||||
DTypeCategory::QUANTIZED); | |||||
auto nonlinear = handle->create_operator<ElemwiseForward>(); | |||||
nonlinear->param().mode = Elemwise::Param::Mode::SIGMOID; | |||||
nonlinear->exec({res}, res); | |||||
if (res.raw_ptr != dst_tensor.raw_ptr) { | |||||
handle->create_operator<TypeCvt>()->exec(res, dst_tensor); | |||||
} | |||||
break; | |||||
} | |||||
case NonlineMode::IDENTITY: { | |||||
if (res.raw_ptr != dst_tensor.raw_ptr) { | |||||
handle->create_operator<TypeCvt>()->exec(res, dst_tensor); | |||||
} | |||||
break; | |||||
} | |||||
default: | |||||
megdnn_assert(false); | |||||
} | |||||
} | |||||
namespace convolution { | namespace convolution { | ||||
template <> | template <> | ||||
@@ -66,6 +66,11 @@ public: | |||||
const char* get_algorithm_set_name() const override; | const char* get_algorithm_set_name() const override; | ||||
}; | }; | ||||
void handle_z_inp_and_activation_naive( | |||||
param::ConvBias::NonlineMode nonline_mode, | |||||
const TensorND& conv_bias_tensor, const TensorND& z_tensor, | |||||
const TensorND& dst_tensor, dt_byte* workspace_ptr); | |||||
} // namespace naive | } // namespace naive | ||||
} // namespace megdnn | } // namespace megdnn | ||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |