@@ -15,15 +15,23 @@ | |||
#include "src/arm_common/elemwise_helper/kimpl/op_base.h" | |||
#include "src/arm_common/elemwise_op.h" | |||
#include "src/fallback/conv_bias/opr_impl.h" | |||
#include "midout.h" | |||
MIDOUT_DECL(arm_common_conv_bias_postprocess_helper) | |||
namespace { | |||
#define CONCAT_OP(_name) megdnn::arm_common::_name | |||
#define CONCAT_NL(_name) megdnn::NonlineMode::_name | |||
#define CB(_caller, _op, _mode) \ | |||
case _mode: \ | |||
_caller(_op); \ | |||
#define CB(_caller, _op, _mode, midout_tag) \ | |||
case _mode: \ | |||
MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 1, midout_tag) { \ | |||
_caller(_op); \ | |||
} \ | |||
MIDOUT_END(); \ | |||
break; | |||
#define DEFAULT \ | |||
@@ -65,44 +73,53 @@ namespace { | |||
reinterpret_cast<ctype*>(dst_ptr), bias_type, bias_type, \ | |||
dst_type, N* OC* OH* OW* pack_oc_size); | |||
#define FOR_BIAS(_mode) \ | |||
switch (_mode) { \ | |||
case megdnn::BiasMode::NO_BIAS: \ | |||
FOR_NONLINEAR_NOBIAS(FOR_NONLINEAR_UNARY) \ | |||
break; \ | |||
case megdnn::BiasMode::BROADCAST_CHANNEL_BIAS: \ | |||
if (pack_oc_size == 1) { \ | |||
FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST); \ | |||
} else { \ | |||
megdnn_assert(pack_oc_size == 4, \ | |||
"Only support nchw44 in ARM"); \ | |||
FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST_NCHW44); \ | |||
} \ | |||
break; \ | |||
case megdnn::BiasMode::BIAS: \ | |||
FOR_NONLINEAR(FOR_NONLINEAR_BINARY) \ | |||
break; \ | |||
default: \ | |||
megdnn_throw("no quantized unsupported biasmode"); \ | |||
break; \ | |||
#define FOR_BIAS(_mode) \ | |||
switch (_mode) { \ | |||
case megdnn::BiasMode::NO_BIAS: \ | |||
MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 0) { \ | |||
FOR_NONLINEAR_NOBIAS(FOR_NONLINEAR_UNARY); \ | |||
} \ | |||
MIDOUT_END(); \ | |||
break; \ | |||
case megdnn::BiasMode::BROADCAST_CHANNEL_BIAS: \ | |||
MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 1) { \ | |||
if (pack_oc_size == 1) { \ | |||
FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST); \ | |||
} else { \ | |||
megdnn_assert(pack_oc_size == 4, \ | |||
"Only support nchw44 in ARM"); \ | |||
FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST_NCHW44); \ | |||
} \ | |||
} \ | |||
MIDOUT_END(); \ | |||
break; \ | |||
case megdnn::BiasMode::BIAS: \ | |||
MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 2) { \ | |||
FOR_NONLINEAR(FOR_NONLINEAR_BINARY); \ | |||
} \ | |||
MIDOUT_END(); \ | |||
break; \ | |||
default: \ | |||
megdnn_throw("no quantized unsupported biasmode"); \ | |||
break; \ | |||
} | |||
#define FOR_NONLINEAR(_caller) \ | |||
switch (nonlineMode) { \ | |||
CB(_caller, CONCAT_OP(AddOp), CONCAT_NL(IDENTITY)) \ | |||
CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU)) \ | |||
CB(_caller, CONCAT_OP(FuseAddSigmoidOp), CONCAT_NL(SIGMOID)) \ | |||
CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH)) \ | |||
DEFAULT \ | |||
#define FOR_NONLINEAR(_caller) \ | |||
switch (nonlineMode) { \ | |||
CB(_caller, CONCAT_OP(AddOp), CONCAT_NL(IDENTITY), 3) \ | |||
CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU), 4) \ | |||
CB(_caller, CONCAT_OP(FuseAddSigmoidOp), CONCAT_NL(SIGMOID), 5) \ | |||
CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH), 6) \ | |||
DEFAULT \ | |||
} | |||
#define FOR_NONLINEAR_NOBIAS(_caller) \ | |||
switch (nonlineMode) { \ | |||
HANDLE_IDENTITY() \ | |||
CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU)) \ | |||
CB(_caller, CONCAT_OP(SigmoidOp), CONCAT_NL(SIGMOID)) \ | |||
CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH)) \ | |||
DEFAULT \ | |||
#define FOR_NONLINEAR_NOBIAS(_caller) \ | |||
switch (nonlineMode) { \ | |||
HANDLE_IDENTITY() \ | |||
CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU), 7); \ | |||
CB(_caller, CONCAT_OP(SigmoidOp), CONCAT_NL(SIGMOID), 8); \ | |||
CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH), 9); \ | |||
DEFAULT \ | |||
} | |||
template <typename ctype, typename dtype = ctype, | |||
@@ -177,20 +194,20 @@ struct PostProcess<ctype, dtype, megdnn::PostprocessMode::NO_PROCESS> { | |||
case megdnn::NonlineMode::IDENTITY: \ | |||
_caller(_op) break; | |||
#define FOR_NONLINEAR(_caller) \ | |||
switch (nonlineMode) { \ | |||
HANDLE_IDENTITY(_caller, CONCAT_OP(AddOp)) \ | |||
CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU)) \ | |||
CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH)) \ | |||
DEFAULT \ | |||
#define FOR_NONLINEAR(_caller) \ | |||
switch (nonlineMode) { \ | |||
HANDLE_IDENTITY(_caller, CONCAT_OP(AddOp)) \ | |||
CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU), 10) \ | |||
CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH), 11) \ | |||
DEFAULT \ | |||
} | |||
#define FOR_NONLINEAR_NOBIAS(_caller) \ | |||
switch (nonlineMode) { \ | |||
HANDLE_IDENTITY(_caller, CONCAT_OP(TypeCvtOp)) \ | |||
CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU)) \ | |||
CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH)) \ | |||
DEFAULT \ | |||
#define FOR_NONLINEAR_NOBIAS(_caller) \ | |||
switch (nonlineMode) { \ | |||
HANDLE_IDENTITY(_caller, CONCAT_OP(TypeCvtOp)) \ | |||
CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU), 12) \ | |||
CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH), 13) \ | |||
DEFAULT \ | |||
} | |||
#define FOR_BIAS(_bias_mode, OH, OW) \ | |||
@@ -18,6 +18,10 @@ | |||
#include <mutex> | |||
#include "midout.h" | |||
MIDOUT_DECL(dnn_src_common_handle_impl) | |||
namespace megdnn { | |||
class HandleImplHelper : public Handle { | |||
@@ -63,19 +67,23 @@ protected: | |||
template <class Opr, size_t idx, class Self> | |||
static Opr* get_helper_opr(Self self, | |||
const typename Opr::Param& param = {}) { | |||
static_assert(idx < NR_HELPER_OPRS, "invalid idx"); | |||
if (!self->m_helper_oprs[idx]) { | |||
std::lock_guard<std::mutex> lg{self->m_helper_oprs_mtx}; | |||
MIDOUT_BEGIN(dnn_src_common_handle_impl, Opr, idx) { | |||
static_assert(idx < NR_HELPER_OPRS, "invalid idx"); | |||
if (!self->m_helper_oprs[idx]) { | |||
self->m_helper_oprs[idx] = | |||
self->template create_operator<Opr>(); | |||
auto ret = static_cast<Opr*>(self->m_helper_oprs[idx].get()); | |||
ret->param() = param; | |||
megdnn_assert(ret->is_thread_safe()); | |||
return ret; | |||
std::lock_guard<std::mutex> lg{self->m_helper_oprs_mtx}; | |||
if (!self->m_helper_oprs[idx]) { | |||
self->m_helper_oprs[idx] = | |||
self->template create_operator<Opr>(); | |||
auto ret = | |||
static_cast<Opr*>(self->m_helper_oprs[idx].get()); | |||
ret->param() = param; | |||
megdnn_assert(ret->is_thread_safe()); | |||
return ret; | |||
} | |||
} | |||
return static_cast<Opr*>(self->m_helper_oprs[idx].get()); | |||
} | |||
return static_cast<Opr*>(self->m_helper_oprs[idx].get()); | |||
MIDOUT_END(); | |||
} | |||
private: | |||
@@ -13,6 +13,10 @@ | |||
#include "megdnn/oprs.h" | |||
#include "src/common/utils.h" | |||
#include "midout.h" | |||
MIDOUT_DECL(transpose_fallback) | |||
namespace megdnn { | |||
namespace relayout { | |||
@@ -107,13 +111,15 @@ void transpose(size_t batch, size_t m, size_t n, T* src, T* dst) { | |||
auto work_block = [m, n, &batch_src, &batch_dst]( | |||
const size_t i, const size_t j, const size_t h, | |||
const size_t w) { | |||
auto src = batch_src + i * n + j, dst = batch_dst + j * m + i; | |||
if (h == B && w == B) { | |||
transpose_block(src, dst, n, m); | |||
} else { | |||
transpose_block(src, dst, n, m, h, w); | |||
MIDOUT_BEGIN(transpose_fallback, midout_iv(0)) { | |||
if (h == B && w == B) { | |||
transpose_block(src, dst, n, m); | |||
} else { | |||
transpose_block(src, dst, n, m, h, w); | |||
} | |||
} | |||
MIDOUT_END(); | |||
}; | |||
auto work_row = [&work_block, n](size_t i, size_t h) { | |||
size_t j = 0; | |||
@@ -442,20 +442,35 @@ WorkspaceBundle ConvBiasImpl::AlgoIm2col::get_bundle( | |||
get_matmul_kern_param(param, ohw_tile_size, oc_tile_size); | |||
if (m_matmul_algo->packmode() == Pack_Mode::DEFAULT) { | |||
Im2colKerns<Pack_Mode::DEFAULT> defaultkern; | |||
ws = defaultkern.get_thread_bundle(param, im2col_kern_param, | |||
m_matmul_algo, ohw_tile_size, | |||
oc_tile_size); | |||
MIDOUT_BEGIN( | |||
megdnn_fallback_im2col, | |||
midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_dft"_hash)) { | |||
Im2colKerns<Pack_Mode::DEFAULT> defaultkern; | |||
ws = defaultkern.get_thread_bundle(param, im2col_kern_param, | |||
m_matmul_algo, ohw_tile_size, | |||
oc_tile_size); | |||
} | |||
MIDOUT_END(); | |||
} else if (m_matmul_algo->packmode() == Pack_Mode::ONLY_PACKA) { | |||
Im2colKerns<Pack_Mode::ONLY_PACKA> onlypackakern; | |||
ws = onlypackakern.get_thread_bundle(param, im2col_kern_param, | |||
m_matmul_algo, ohw_tile_size, | |||
oc_tile_size); | |||
MIDOUT_BEGIN( | |||
megdnn_fallback_im2col, | |||
midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_packa"_hash)) { | |||
Im2colKerns<Pack_Mode::ONLY_PACKA> onlypackakern; | |||
ws = onlypackakern.get_thread_bundle(param, im2col_kern_param, | |||
m_matmul_algo, ohw_tile_size, | |||
oc_tile_size); | |||
} | |||
MIDOUT_END(); | |||
} else { | |||
Im2colKerns<Pack_Mode::NO_PACK> nopackkern; | |||
ws = nopackkern.get_thread_bundle(param, im2col_kern_param, | |||
m_matmul_algo, ohw_tile_size, | |||
oc_tile_size); | |||
MIDOUT_BEGIN( | |||
megdnn_fallback_im2col, | |||
midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_other"_hash)) { | |||
Im2colKerns<Pack_Mode::NO_PACK> nopackkern; | |||
ws = nopackkern.get_thread_bundle(param, im2col_kern_param, | |||
m_matmul_algo, ohw_tile_size, | |||
oc_tile_size); | |||
} | |||
MIDOUT_END(); | |||
} | |||
return {nullptr, | |||
@@ -19,6 +19,9 @@ | |||
#include "src/fallback/conv_bias/opr_impl.h" | |||
#include "src/fallback/matrix_mul/opr_impl.h" | |||
#include "midout.h" | |||
MIDOUT_DECL(megdnn_fallback_conv_bias_winograd_common) | |||
namespace megdnn { | |||
namespace winograd { | |||
@@ -440,9 +443,12 @@ public: | |||
unit_oc_size]( | |||
const NCBKernParam& ncb_param, | |||
const NCBKernIndex& ncb_index) { | |||
winograd_compute(strategy, bundle_top, bundle_compute, matmul_algo, | |||
matmul_param, unit_tile_size, unit_oc_size, | |||
ncb_param, std::move(ncb_index)); | |||
MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, 0, 0) { | |||
winograd_compute(strategy, bundle_top, bundle_compute, | |||
matmul_algo, matmul_param, unit_tile_size, | |||
unit_oc_size, ncb_param, std::move(ncb_index)); | |||
} | |||
MIDOUT_END(); | |||
}; | |||
kerns.push_back( | |||
{winograd_compute_kern, {GROUP, N, nr_hw_tiles, nr_oc_tiles}}); | |||
@@ -250,8 +250,11 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern( | |||
param.compute_mode == param::ConvBias::ComputeMode::cmode) { \ | |||
using ctype = DTypeTrait<dt>::ctype; \ | |||
using comp_type = DTypeTrait<compute_type>::ctype; \ | |||
return {{kern_naive_forward<ctype, ctype, comp_type>, \ | |||
{group, N, 1_z}}}; \ | |||
MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(1)) { \ | |||
return {{kern_naive_forward<ctype, ctype, comp_type>, \ | |||
{group, N, 1_z}}}; \ | |||
} \ | |||
MIDOUT_END(); \ | |||
} \ | |||
} while (0) | |||
@@ -262,16 +265,19 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern( | |||
#endif | |||
#undef cb | |||
#define cb(dt_src, dt_dst) \ | |||
do { \ | |||
if (param.src_type.enumv() == DTypeTrait<dt_src>::enumv && \ | |||
param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \ | |||
param.dst_type.enumv() == DTypeTrait<dt_dst>::enumv) { \ | |||
return {{kern_naive_forward<DTypeTrait<dt_src>::ctype, \ | |||
DTypeTrait<dt_dst>::ctype, \ | |||
DTypeTrait<dt_dst>::ctype>, \ | |||
{group, N, 1_z}}}; \ | |||
} \ | |||
#define cb(dt_src, dt_dst) \ | |||
do { \ | |||
if (param.src_type.enumv() == DTypeTrait<dt_src>::enumv && \ | |||
param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \ | |||
param.dst_type.enumv() == DTypeTrait<dt_dst>::enumv) { \ | |||
MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(2)) { \ | |||
return {{kern_naive_forward<DTypeTrait<dt_src>::ctype, \ | |||
DTypeTrait<dt_dst>::ctype, \ | |||
DTypeTrait<dt_dst>::ctype>, \ | |||
{group, N, 1_z}}}; \ | |||
} \ | |||
MIDOUT_END(); \ | |||
} \ | |||
} while (0) | |||
cb(dtype::Int8, dtype::Int16); | |||
cb(dtype::Int8, dtype::Int32); | |||
@@ -14,6 +14,10 @@ | |||
#include "megdnn/tensor_iter.h" | |||
#include "src/naive/handle.h" | |||
#include "midout.h" | |||
MIDOUT_DECL(naive_relayout) | |||
using namespace megdnn; | |||
using namespace naive; | |||
@@ -48,22 +52,24 @@ void RelayoutForwardImpl::exec( | |||
do_exec(src, dst); | |||
} | |||
void RelayoutForwardImpl::do_exec( | |||
_megdnn_tensor_in src, _megdnn_tensor_out dst) { | |||
switch(src.layout.dtype.enumv()) { | |||
#define cb(_dt) \ | |||
case DTypeEnum::_dt: \ | |||
{ \ | |||
MEGDNN_DISPATCH_CPU_KERN_OPR( \ | |||
do_copy<DTypeTrait<dtype::_dt>::ctype>(dst, src)); \ | |||
return; \ | |||
} | |||
MEGDNN_FOREACH_DTYPE_NAME(cb) | |||
MEGDNN_FOREACH_PARAMETERIZED_DTYPE(cb) | |||
void RelayoutForwardImpl::do_exec(_megdnn_tensor_in src, | |||
_megdnn_tensor_out dst) { | |||
MIDOUT_BEGIN(naive_relayout, midout_iv(0)) { | |||
switch (src.layout.dtype.enumv()) { | |||
#define cb(_dt) \ | |||
case DTypeEnum::_dt: { \ | |||
MEGDNN_DISPATCH_CPU_KERN_OPR( \ | |||
do_copy<DTypeTrait<dtype::_dt>::ctype>(dst, src)); \ | |||
return; \ | |||
} | |||
MEGDNN_FOREACH_DTYPE_NAME(cb) | |||
MEGDNN_FOREACH_PARAMETERIZED_DTYPE(cb) | |||
#undef cb | |||
default: | |||
megdnn_throw("bad dtype"); | |||
default: | |||
megdnn_throw("bad dtype"); | |||
} | |||
} | |||
MIDOUT_END(); | |||
} | |||
void RelayoutForwardImpl::check_cpu_handle(Handle *handle) { | |||
@@ -27,10 +27,16 @@ endif() | |||
add_executable(megdnn_test ${SOURCES}) | |||
target_link_libraries(megdnn_test gtest) | |||
target_link_libraries(megdnn_test megdnn ${MGE_BLAS_LIBS}) | |||
target_include_directories(megdnn_test | |||
PRIVATE | |||
${PROJECT_SOURCE_DIR}/third_party/midout/src | |||
) | |||
if(UNIX) | |||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++") | |||
endif() | |||
@@ -135,7 +135,7 @@ MGB_OPR_REGISTRY_CALLER_SPECIALIZE | |||
*/ | |||
#define MGB_SEREG_OPR_INTL_CALL_ENTRY(_cls, _impl) \ | |||
namespace { \ | |||
::mgb::serialization::OprRegistryCaller<_cls, _impl> \ | |||
[[gnu::unused]] ::mgb::serialization::OprRegistryCaller<_cls, _impl> \ | |||
__caller_OprReg##_cls##_ins; \ | |||
} | |||
@@ -244,7 +244,7 @@ struct IsComplete<T, decltype(void(sizeof(T)))> : std::true_type {}; | |||
MGB_REG_OPR_SHALLOW_COPY_IMPL(_cls, _copy); \ | |||
} \ | |||
}; \ | |||
::mgb::serialization::OprRegistryCaller< \ | |||
[[gnu::unused]] ::mgb::serialization::OprRegistryCaller< \ | |||
_cls, _OprRegShallowCopy##_cls> \ | |||
__caller_OprRegShallowCopy##_cls##_ins; \ | |||
} | |||