GitOrigin-RevId: 96cdc57180
release-1.5
@@ -0,0 +1,365 @@ | |||||
/** | |||||
* \file dnn/src/x86/pooling/algos.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/x86/pooling/algo.h" | |||||
#include "megdnn/opr_param_defs.h" | |||||
#include "src/common/opr_delegate.h" | |||||
#include "src/common/utils.h" | |||||
#include "src/fallback/pooling/opr_impl.h" | |||||
#include "src/naive/handle.h" | |||||
#include "src/x86/handle.h" | |||||
#include "src/x86/pooling/do_max_pooling_3x3_s2x2_float_sse.h" | |||||
#include "src/x86/pooling/pooling_special_cases.h" | |||||
#include "src/x86/utils.h" | |||||
using namespace megdnn; | |||||
using namespace x86; | |||||
namespace { | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
template <dnnl::memory::format_tag format_tag, bool use_mkl_mem> | |||||
dnnl::memory tensor_to_mkl_memory(_megdnn_tensor_in src, | |||||
const dnnl::engine& mkldnn_eng, | |||||
dnnl::memory::data_type mkldnn_datatype) { | |||||
megdnn_assert(format_tag == dnnl::memory::format_tag::nChw8c || | |||||
format_tag == dnnl::memory::format_tag::nchw || | |||||
format_tag == dnnl::memory::format_tag::nhwc, | |||||
"not support format"); | |||||
dnnl::memory::dims src_shape = { | |||||
static_cast<long>(src.layout[0]), static_cast<long>(src.layout[1]), | |||||
static_cast<long>(src.layout[2]), static_cast<long>(src.layout[3])}; | |||||
if (format_tag == dnnl::memory::format_tag::nChw8c) { | |||||
src_shape = {static_cast<long>(src.layout[0]), | |||||
static_cast<long>(src.layout[1] * 8), | |||||
static_cast<long>(src.layout[2]), | |||||
static_cast<long>(src.layout[3])}; | |||||
} | |||||
auto megdnn_src_md = | |||||
dnnl::memory::desc({src_shape}, mkldnn_datatype, format_tag); | |||||
if (use_mkl_mem) { | |||||
auto megdnn_src_memory = dnnl::memory(megdnn_src_md, mkldnn_eng); | |||||
return megdnn_src_memory; | |||||
} else { | |||||
auto megdnn_src_memory = dnnl::memory(megdnn_src_md, mkldnn_eng, | |||||
const_cast<void*>(src.raw_ptr)); | |||||
return megdnn_src_memory; | |||||
} | |||||
} | |||||
#endif | |||||
} // namespace | |||||
PoolingImpl::AlgoPack::AlgoPack() { | |||||
all_algos.push_back(&algo_mean_w2s2_avx); | |||||
all_algos.push_back(&algo_mean_w2s2_sse3); | |||||
all_algos.push_back(&algo_max_w2s2_sse); | |||||
all_algos.push_back(&algo_max_w3s3_sse); | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
all_algos.push_back(&algo_mkldnn_nchw); | |||||
all_algos.push_back(&algo_mkldnn_nchw88); | |||||
#endif | |||||
all_algos.push_back(&algo_fallback); | |||||
for (auto&& algo : all_algos) { | |||||
m_all_algos_map.emplace(algo->info().desc, algo); | |||||
} | |||||
} | |||||
PoolingImpl::AlgoPack PoolingImpl::sm_algo_pack; | |||||
MEGDNN_DEF_GET_ALGO_FROM_DESC(PoolingImpl) | |||||
PoolingImpl::AlgoBase::SizeArgs::SizeArgs(PoolingImpl* o, | |||||
const TensorLayout& src, | |||||
const TensorLayout& dst) | |||||
: handle{static_cast<x86::HandleImpl*>(o->handle())}, | |||||
opr{o}, | |||||
layout_src{src}, | |||||
layout_dst{dst} {} | |||||
PoolingImpl::AlgoBase::ExecArgs::ExecArgs(PoolingImpl* opr, | |||||
_megdnn_tensor_in src, | |||||
_megdnn_tensor_out dst, | |||||
_megdnn_workspace workspace) | |||||
: SizeArgs(opr, src.layout, dst.layout), | |||||
src_tensor{&src}, | |||||
dst_tensor{&dst}, | |||||
workspace{workspace} {} | |||||
std::string PoolingImpl::AlgoBase::SizeArgs::to_string() const { | |||||
return ssprintf("src=%s, dst=%s", layout_src.to_string().c_str(), | |||||
layout_dst.to_string().c_str()); | |||||
} | |||||
bool PoolingImpl::AlgoMeanW2S2AVX::is_available(const SizeArgs& args) const { | |||||
auto SH = args.opr->param().stride_h; | |||||
auto SW = args.opr->param().stride_w; | |||||
auto FH = args.opr->param().window_h; | |||||
auto FW = args.opr->param().window_w; | |||||
return (is_supported(SIMDType::AVX) && | |||||
args.opr->param().mode == Mode::AVERAGE && | |||||
args.opr->param().format == Param::Format::NCHW && | |||||
args.layout_src.dtype == dtype::Float32() && FH == 2 && FW == 2 && | |||||
SH == 2 && SW == 2); | |||||
} | |||||
void PoolingImpl::AlgoMeanW2S2AVX::exec(const ExecArgs& args) const { | |||||
auto N = args.layout_src.shape[0]; | |||||
auto C = args.layout_src.shape[1]; | |||||
auto IH = args.layout_src.shape[2]; | |||||
auto IW = args.layout_src.shape[3]; | |||||
auto OH = args.layout_dst.shape[2]; | |||||
auto OW = args.layout_dst.shape[3]; | |||||
auto PH = args.opr->param().pad_h; | |||||
auto PW = args.opr->param().pad_w; | |||||
auto sptr = reinterpret_cast<dt_float32*>(args.src_tensor->raw_ptr); | |||||
auto dptr = reinterpret_cast<dt_float32*>(args.dst_tensor->raw_ptr); | |||||
auto handle = [=]() { return args.handle; }; | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR(rep(n, N) rep(c, C) { | |||||
mean_pooling_w2x2_s2x2_avx(sptr + n * C * IH * IW + c * IH * IW, IH, IW, | |||||
dptr + n * C * OH * OW + c * OH * OW, OH, OW, | |||||
PH, PW, true); | |||||
}); | |||||
} | |||||
bool PoolingImpl::AlgoMeanW2S2SSE3::is_available(const SizeArgs& args) const { | |||||
auto SH = args.opr->param().stride_h; | |||||
auto SW = args.opr->param().stride_w; | |||||
auto FH = args.opr->param().window_h; | |||||
auto FW = args.opr->param().window_w; | |||||
return (is_supported(SIMDType::SSE3) && | |||||
args.opr->param().mode == Mode::AVERAGE && | |||||
args.layout_src.dtype == dtype::Float32() && | |||||
args.opr->param().format == Param::Format::NCHW && FH == 2 && | |||||
FW == 2 && SH == 2 && SW == 2); | |||||
} | |||||
void PoolingImpl::AlgoMeanW2S2SSE3::exec(const ExecArgs& args) const { | |||||
auto N = args.layout_src.shape[0]; | |||||
auto C = args.layout_src.shape[1]; | |||||
auto IH = args.layout_src.shape[2]; | |||||
auto IW = args.layout_src.shape[3]; | |||||
auto OH = args.layout_dst.shape[2]; | |||||
auto OW = args.layout_dst.shape[3]; | |||||
auto PH = args.opr->param().pad_h; | |||||
auto PW = args.opr->param().pad_w; | |||||
auto sptr = reinterpret_cast<dt_float32*>(args.src_tensor->raw_ptr); | |||||
auto dptr = reinterpret_cast<dt_float32*>(args.dst_tensor->raw_ptr); | |||||
auto handle = [=]() { return args.handle; }; | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR(rep(n, N) rep(c, C) { | |||||
mean_pooling_w2x2_s2x2_sse3(sptr + n * C * IH * IW + c * IH * IW, IH, | |||||
IW, dptr + n * C * OH * OW + c * OH * OW, | |||||
OH, OW, PH, PW, true); | |||||
}); | |||||
} | |||||
bool PoolingImpl::AlgoMaxW2S2SSE::is_available(const SizeArgs& args) const { | |||||
auto SH = args.opr->param().stride_h; | |||||
auto SW = args.opr->param().stride_w; | |||||
auto FH = args.opr->param().window_h; | |||||
auto FW = args.opr->param().window_w; | |||||
return (is_supported(SIMDType::SSE) && | |||||
args.layout_src.dtype == dtype::Float32() && | |||||
args.opr->param().mode == Mode::MAX && | |||||
args.opr->param().format == Param::Format::NCHW && FH == 2 && | |||||
FW == 2 && SH == 2 && SW == 2); | |||||
} | |||||
void PoolingImpl::AlgoMaxW2S2SSE::exec(const ExecArgs& args) const { | |||||
auto N = args.layout_src.shape[0]; | |||||
auto C = args.layout_src.shape[1]; | |||||
auto IH = args.layout_src.shape[2]; | |||||
auto IW = args.layout_src.shape[3]; | |||||
auto OH = args.layout_dst.shape[2]; | |||||
auto OW = args.layout_dst.shape[3]; | |||||
auto PH = args.opr->param().pad_h; | |||||
auto PW = args.opr->param().pad_w; | |||||
auto sptr = reinterpret_cast<dt_float32*>(args.src_tensor->raw_ptr); | |||||
auto dptr = reinterpret_cast<dt_float32*>(args.dst_tensor->raw_ptr); | |||||
auto handle = [=]() { return args.handle; }; | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR(rep(n, N) rep(c, C) { | |||||
max_pooling_w2x2_s2x2_sse(sptr + n * C * IH * IW + c * IH * IW, IH, IW, | |||||
dptr + n * C * OH * OW + c * OH * OW, OH, OW, | |||||
PH, PW); | |||||
}); | |||||
} | |||||
bool PoolingImpl::AlgoMaxW3S3SSE::is_available(const SizeArgs& args) const { | |||||
auto SH = args.opr->param().stride_h; | |||||
auto SW = args.opr->param().stride_w; | |||||
auto FH = args.opr->param().window_h; | |||||
auto FW = args.opr->param().window_w; | |||||
return (is_supported(SIMDType::SSE) && | |||||
args.layout_src.dtype == dtype::Float32() && | |||||
args.opr->param().mode == Mode::MAX && | |||||
args.opr->param().format == Param::Format::NCHW && FH == 3 && | |||||
FW == 3 && SH == 2 && SW == 2); | |||||
} | |||||
void PoolingImpl::AlgoMaxW3S3SSE::exec(const ExecArgs& args) const { | |||||
auto N = args.layout_src.shape[0]; | |||||
auto C = args.layout_src.shape[1]; | |||||
auto IH = args.layout_src.shape[2]; | |||||
auto IW = args.layout_src.shape[3]; | |||||
auto OH = args.layout_dst.shape[2]; | |||||
auto OW = args.layout_dst.shape[3]; | |||||
auto PH = args.opr->param().pad_h; | |||||
auto PW = args.opr->param().pad_w; | |||||
auto sptr = reinterpret_cast<dt_float32*>(args.src_tensor->raw_ptr); | |||||
auto dptr = reinterpret_cast<dt_float32*>(args.dst_tensor->raw_ptr); | |||||
auto handle = [=]() { return args.handle; }; | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR( | |||||
WorkspaceBundle ws = get_bundle(args.layout_src, args.layout_dst, | |||||
args.opr->param()); | |||||
ws.set(args.workspace.raw_ptr); rep(n, N) rep(c, C) { | |||||
do_max_pooling_3x3_s2x2_float_SSE( | |||||
sptr + n * C * IH * IW + c * IH * IW, | |||||
dptr + n * C * OH * OW + c * OH * OW, IH, IW, OH, OW, | |||||
PH, PW, ws); | |||||
}); | |||||
} | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
bool PoolingImpl::AlgoMKLDNNNCHW::is_available(const SizeArgs& args) const { | |||||
return ((args.layout_src.dtype.enumv() == DTypeEnum::QuantizedS8 || | |||||
args.layout_src.dtype.enumv() == DTypeEnum::Int8) && | |||||
args.opr->param().mode == Mode::MAX && | |||||
args.opr->param().format == Param::Format::NCHW); | |||||
} | |||||
void PoolingImpl::AlgoMKLDNNNCHW::exec(const ExecArgs& args) const { | |||||
auto PH = args.opr->param().pad_h; | |||||
auto PW = args.opr->param().pad_w; | |||||
auto FH = args.opr->param().window_h; | |||||
auto FW = args.opr->param().window_w; | |||||
auto SH = args.opr->param().stride_h; | |||||
auto SW = args.opr->param().stride_w; | |||||
auto handle = [=]() { return args.handle; }; | |||||
auto x86_handle = static_cast<HandleImpl*>(inplace_cpu_handle().get()); | |||||
auto mkldnn_eng = x86_handle->mkldnn_engine(); | |||||
auto mkldnn_stream = x86_handle->mkldnn_stream(); | |||||
auto mkldnn_pooling_mode = dnnl::algorithm::pooling_max; | |||||
dnnl::memory::dims pool_strides = {SH, SW}; | |||||
dnnl::memory::dims pool_padding = {PH, PW}; | |||||
dnnl::memory::dims pool_kernel = {FH, FW}; | |||||
dnnl::memory&& megdnn_src_memory_ori = | |||||
tensor_to_mkl_memory<dnnl::memory::format_tag::nchw, false>( | |||||
*args.src_tensor, mkldnn_eng, dnnl::memory::data_type::s8); | |||||
dnnl::memory&& megdnn_dst_memory_ori = | |||||
tensor_to_mkl_memory<dnnl::memory::format_tag::nchw, false>( | |||||
*args.dst_tensor, mkldnn_eng, dnnl::memory::data_type::s8); | |||||
dnnl::memory&& megdnn_src_memory = | |||||
tensor_to_mkl_memory<dnnl::memory::format_tag::nhwc, true>( | |||||
*args.src_tensor, mkldnn_eng, dnnl::memory::data_type::s8); | |||||
dnnl::memory&& megdnn_dst_memory = | |||||
tensor_to_mkl_memory<dnnl::memory::format_tag::nhwc, true>( | |||||
*args.dst_tensor, mkldnn_eng, dnnl::memory::data_type::s8); | |||||
auto reorder_src = dnnl::reorder(megdnn_src_memory_ori, megdnn_src_memory); | |||||
auto reorder_dst = dnnl::reorder(megdnn_dst_memory, megdnn_dst_memory_ori); | |||||
auto pool1_desc = dnnl::pooling_forward::desc( | |||||
dnnl::prop_kind::forward_inference, mkldnn_pooling_mode, | |||||
megdnn_src_memory.get_desc(), megdnn_dst_memory.get_desc(), | |||||
pool_strides, pool_kernel, pool_padding, pool_padding); | |||||
auto pool_pd = | |||||
dnnl::pooling_forward::primitive_desc(pool1_desc, mkldnn_eng); | |||||
auto pool = dnnl::pooling_forward(pool_pd); | |||||
auto run = [mkldnn_stream, mkldnn_eng, reorder_src, pool, reorder_dst, | |||||
megdnn_src_memory_ori, megdnn_src_memory, megdnn_dst_memory, | |||||
megdnn_dst_memory_ori](void) { | |||||
MEGDNN_MARK_USED_VAR(mkldnn_eng); | |||||
auto mkl_stream = mkldnn_stream; | |||||
reorder_src.execute(mkl_stream, {{DNNL_ARG_FROM, megdnn_src_memory_ori}, | |||||
{DNNL_ARG_TO, megdnn_src_memory}}); | |||||
pool.execute(mkl_stream, {{DNNL_ARG_SRC, megdnn_src_memory}, | |||||
{DNNL_ARG_DST, megdnn_dst_memory}}); | |||||
reorder_dst.execute(mkl_stream, {{DNNL_ARG_FROM, megdnn_dst_memory}, | |||||
{DNNL_ARG_TO, megdnn_dst_memory_ori}}); | |||||
mkl_stream.wait(); | |||||
}; | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR(run()); | |||||
} | |||||
#endif | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
bool PoolingImpl::AlgoMKLDNNNCHW88::is_available(const SizeArgs& args) const { | |||||
return (args.layout_src.dtype == dtype::Float32() && | |||||
args.opr->param().mode == Mode::MAX && | |||||
args.opr->param().format == Param::Format::NCHW88); | |||||
} | |||||
void PoolingImpl::AlgoMKLDNNNCHW88::exec(const ExecArgs& args) const { | |||||
auto PH = args.opr->param().pad_h; | |||||
auto PW = args.opr->param().pad_w; | |||||
auto FH = args.opr->param().window_h; | |||||
auto FW = args.opr->param().window_w; | |||||
auto SH = args.opr->param().stride_h; | |||||
auto SW = args.opr->param().stride_w; | |||||
auto handle = [=]() { return args.handle; }; | |||||
auto x86_handle = static_cast<HandleImpl*>(inplace_cpu_handle().get()); | |||||
auto mkldnn_eng = x86_handle->mkldnn_engine(); | |||||
auto mkldnn_stream = x86_handle->mkldnn_stream(); | |||||
auto mkldnn_pooling_mode = dnnl::algorithm::pooling_max; | |||||
switch (args.opr->param().mode) { | |||||
case Mode::MAX: | |||||
mkldnn_pooling_mode = dnnl::algorithm::pooling_max; | |||||
break; | |||||
case Mode::AVERAGE: | |||||
mkldnn_pooling_mode = dnnl::algorithm::pooling_avg_include_padding; | |||||
break; | |||||
case Mode::AVERAGE_COUNT_EXCLUDE_PADDING: | |||||
mkldnn_pooling_mode = dnnl::algorithm::pooling_avg_exclude_padding; | |||||
break; | |||||
default: | |||||
megdnn_throw("not supported pooling mode\n"); | |||||
}; | |||||
dnnl::memory::dims pool_strides = {SH, SW}; | |||||
dnnl::memory::dims pool_padding = {PH, PW}; | |||||
dnnl::memory::dims pool_kernel = {FH, FW}; | |||||
dnnl::memory&& megdnn_src_memory_ori = | |||||
tensor_to_mkl_memory<dnnl::memory::format_tag::nChw8c, false>( | |||||
*args.src_tensor, mkldnn_eng, dnnl::memory::data_type::f32); | |||||
dnnl::memory&& megdnn_dst_memory_ori = | |||||
tensor_to_mkl_memory<dnnl::memory::format_tag::nChw8c, false>( | |||||
*args.dst_tensor, mkldnn_eng, dnnl::memory::data_type::f32); | |||||
auto pool_desc = dnnl::pooling_forward::desc( | |||||
dnnl::prop_kind::forward_inference, mkldnn_pooling_mode, | |||||
megdnn_src_memory_ori.get_desc(), megdnn_dst_memory_ori.get_desc(), | |||||
pool_strides, pool_kernel, pool_padding, pool_padding); | |||||
auto pool_pd = dnnl::pooling_forward::primitive_desc(pool_desc, mkldnn_eng); | |||||
auto pool = dnnl::pooling_forward(pool_pd); | |||||
auto run = [mkldnn_stream, pool, mkldnn_eng, megdnn_src_memory_ori, | |||||
megdnn_dst_memory_ori](void) { | |||||
MEGDNN_MARK_USED_VAR(mkldnn_eng); | |||||
auto mkl_stream = mkldnn_stream; | |||||
pool.execute(mkl_stream, {{DNNL_ARG_SRC, megdnn_src_memory_ori}, | |||||
{DNNL_ARG_DST, megdnn_dst_memory_ori}}); | |||||
mkl_stream.wait(); | |||||
}; | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR(run()); | |||||
} | |||||
#endif |
@@ -0,0 +1,132 @@ | |||||
/** | |||||
* \file dnn/src/x86/pooling/algo.h | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#pragma once | |||||
#include <unordered_map> | |||||
#include "src/common/algo_base.h" | |||||
#include "src/common/metahelper.h" | |||||
#include "src/x86/pooling/opr_impl.h" | |||||
#include "src/x86/handle.h" | |||||
namespace megdnn { | |||||
namespace x86 { | |||||
using AlgoBase = PoolingImpl::AlgoBase; | |||||
class PoolingImpl::AlgoBase : public Algorithm { | |||||
public: | |||||
enum class AlgoType : uint32_t { | |||||
X86_MeanW2S2AVX, | |||||
X86_MeanW2S2SSE3, | |||||
X86_MaxW2S2SSE, | |||||
X86_MaxW3S3SSE, | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
X86_MKLDNNNCHW, | |||||
X86_MKLDNNNCHW88, | |||||
#endif | |||||
X86_Fallback | |||||
}; | |||||
using Mapper = std::unordered_map<AlgorithmDesc, AlgoBase*>; | |||||
AlgoBase() : Algorithm() { m_handle_type = Handle::HandleType::X86; } | |||||
virtual ~AlgoBase() = default; | |||||
struct SizeArgs { | |||||
HandleImpl* handle; | |||||
PoolingImpl* opr; | |||||
const TensorLayout layout_src, layout_dst; | |||||
std::string to_string() const; | |||||
SizeArgs(PoolingImpl* opr, const TensorLayout& src, | |||||
const TensorLayout& dst); | |||||
}; | |||||
struct ExecArgs : public SizeArgs { | |||||
const TensorND *src_tensor, *dst_tensor; | |||||
Workspace workspace; | |||||
ExecArgs(PoolingImpl* opr, _megdnn_tensor_in src, | |||||
_megdnn_tensor_out dst, _megdnn_workspace workspace); | |||||
}; | |||||
virtual bool is_available(const SizeArgs& args) const = 0; | |||||
virtual void exec(const ExecArgs& args) const = 0; | |||||
uint32_t type() const override { return INVALID_ALGO_TYPE; }; | |||||
bool is_available_attribute( | |||||
const SizeArgs& args, | |||||
const AlgoAttribute& positive_attr = AlgoAttribute::REPRODUCIBLE, | |||||
const AlgoAttribute& negative_attr = AlgoAttribute::DEFAULT) { | |||||
return contain_attribute_all(positive_attr) && | |||||
!contain_attribute_any(negative_attr) && is_available(args); | |||||
} | |||||
}; | |||||
#define ALGO_IMPL(_name) \ | |||||
class PoolingImpl::Algo##_name final : public AlgoBase { \ | |||||
std::string m_algo_name; \ | |||||
\ | |||||
public: \ | |||||
Algo##_name() : m_algo_name(std::string(#_name).append("_POOLING")) {} \ | |||||
AlgoAttribute attribute() const override { \ | |||||
return AlgoAttribute::REPRODUCIBLE; \ | |||||
}; \ | |||||
const char* name() const override { return m_algo_name.c_str(); } \ | |||||
bool is_available(const SizeArgs& args) const override; \ | |||||
void exec(const ExecArgs& args) const override; \ | |||||
MEGDNN_DECL_ALGO_TYPE(X86_##_name) \ | |||||
}; | |||||
ALGO_IMPL(MeanW2S2AVX) | |||||
ALGO_IMPL(MeanW2S2SSE3) | |||||
ALGO_IMPL(MaxW2S2SSE) | |||||
ALGO_IMPL(MaxW3S3SSE) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
ALGO_IMPL(MKLDNNNCHW) | |||||
ALGO_IMPL(MKLDNNNCHW88) | |||||
#endif | |||||
#undef ALGO_IMPL | |||||
class PoolingImpl::AlgoFallback final : public AlgoBase { | |||||
std::string m_algo_name; | |||||
public: | |||||
AlgoFallback() : m_algo_name("FALLBACK_POOLING") {} | |||||
AlgoAttribute attribute() const override { | |||||
return AlgoAttribute::REPRODUCIBLE; | |||||
}; | |||||
const char* name() const override { return m_algo_name.c_str(); } | |||||
bool is_available(const SizeArgs&) const override { return true; } | |||||
void exec(const ExecArgs&) const override {} | |||||
MEGDNN_DECL_ALGO_TYPE(X86_Fallback) | |||||
}; | |||||
class PoolingImpl::AlgoPack : NonCopyableObj { | |||||
private: | |||||
AlgoBase::Mapper m_all_algos_map; | |||||
AlgoMeanW2S2AVX algo_mean_w2s2_avx; | |||||
AlgoMeanW2S2SSE3 algo_mean_w2s2_sse3; | |||||
AlgoMaxW2S2SSE algo_max_w2s2_sse; | |||||
AlgoMaxW3S3SSE algo_max_w3s3_sse; | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
AlgoMKLDNNNCHW algo_mkldnn_nchw; | |||||
AlgoMKLDNNNCHW88 algo_mkldnn_nchw88; | |||||
#endif | |||||
AlgoFallback algo_fallback; | |||||
public: | |||||
AlgoPack(); | |||||
std::vector<AlgoBase*> all_algos; | |||||
const AlgoBase::Mapper& all_algos_map() const { return m_all_algos_map; } | |||||
}; | |||||
} // namespace x86 | |||||
} // namespace megdnn |
@@ -13,9 +13,9 @@ | |||||
#include "src/common/utils.h" | #include "src/common/utils.h" | ||||
#include "src/naive/handle.h" | #include "src/naive/handle.h" | ||||
#include "src/x86/handle.h" | #include "src/x86/handle.h" | ||||
#include "src/x86/pooling/do_max_pooling_3x3_s2x2_float_sse.h" | |||||
#include "src/x86/pooling/pooling_special_cases.h" | |||||
#include "src/x86/utils.h" | #include "src/x86/utils.h" | ||||
#include "src/x86/pooling/algo.h" | |||||
#include "src/common/algo_chooser.h" | |||||
#if MEGDNN_X86_WITH_MKL_DNN | #if MEGDNN_X86_WITH_MKL_DNN | ||||
#include "mkldnn.hpp" | #include "mkldnn.hpp" | ||||
@@ -24,10 +24,9 @@ | |||||
using namespace megdnn; | using namespace megdnn; | ||||
using namespace x86; | using namespace x86; | ||||
namespace { | |||||
WorkspaceBundle get_bundle(const TensorLayout& src, const TensorLayout& dst, | |||||
const param::Pooling& param) { | |||||
WorkspaceBundle megdnn::x86::get_bundle(const TensorLayout& src, | |||||
const TensorLayout& dst, | |||||
const param::Pooling& param) { | |||||
megdnn_assert( | megdnn_assert( | ||||
is_supported(SIMDType::SSE) && src.dtype == dtype::Float32() && | is_supported(SIMDType::SSE) && src.dtype == dtype::Float32() && | ||||
param.format == param::Pooling::Format::NCHW && | param.format == param::Pooling::Format::NCHW && | ||||
@@ -45,242 +44,63 @@ WorkspaceBundle get_bundle(const TensorLayout& src, const TensorLayout& dst, | |||||
return ws; | return ws; | ||||
} | } | ||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
template <dnnl::memory::format_tag format_tag, bool use_mkl_mem> | |||||
dnnl::memory tensor_to_mkl_memory(_megdnn_tensor_in src, | |||||
const dnnl::engine& mkldnn_eng, | |||||
dnnl::memory::data_type mkldnn_datatype) { | |||||
megdnn_assert(format_tag == dnnl::memory::format_tag::nChw8c || | |||||
format_tag == dnnl::memory::format_tag::nchw || | |||||
format_tag == dnnl::memory::format_tag::nhwc, | |||||
"not support format"); | |||||
dnnl::memory::dims src_shape = { | |||||
static_cast<long>(src.layout[0]), static_cast<long>(src.layout[1]), | |||||
static_cast<long>(src.layout[2]), static_cast<long>(src.layout[3])}; | |||||
if (format_tag == dnnl::memory::format_tag::nChw8c) { | |||||
src_shape = {static_cast<long>(src.layout[0]), | |||||
static_cast<long>(src.layout[1] * 8), | |||||
static_cast<long>(src.layout[2]), | |||||
static_cast<long>(src.layout[3])}; | |||||
} | |||||
auto megdnn_src_md = | |||||
dnnl::memory::desc({src_shape}, mkldnn_datatype, format_tag); | |||||
if (use_mkl_mem) { | |||||
auto megdnn_src_memory = dnnl::memory(megdnn_src_md, mkldnn_eng); | |||||
return megdnn_src_memory; | |||||
size_t PoolingImpl::get_workspace_in_bytes(const TensorLayout& src, | |||||
const TensorLayout& dst) { | |||||
auto algo = get_algorithm(this, src, dst); | |||||
if (!is_fallback_algo(algo)) { | |||||
if (is_supported(SIMDType::SSE) && src.dtype == dtype::Float32() && | |||||
param().mode == Mode::MAX && | |||||
param().format == Param::Format::NCHW && param().window_h == 3 && | |||||
param().window_w == 3 && param().stride_h == 2 && | |||||
param().stride_w == 2) { | |||||
WorkspaceBundle ws = get_bundle(src, dst, param()); | |||||
return ws.total_size_in_bytes(); | |||||
} else { | |||||
return 0; | |||||
} | |||||
} else { | } else { | ||||
auto megdnn_src_memory = dnnl::memory(megdnn_src_md, mkldnn_eng, | |||||
const_cast<void*>(src.raw_ptr)); | |||||
return megdnn_src_memory; | |||||
auto fallback_worksapce = | |||||
fallback::PoolingImpl::get_workspace_in_bytes(src, dst); | |||||
return fallback_worksapce; | |||||
} | } | ||||
} | } | ||||
#endif | |||||
} // namespace | |||||
size_t PoolingImpl::get_workspace_in_bytes(const TensorLayout& src, | |||||
const TensorLayout& dst) { | |||||
if (is_supported(SIMDType::SSE) && src.dtype == dtype::Float32() && | |||||
param().mode == Mode::MAX && param().format == Param::Format::NCHW && | |||||
param().window_h == 3 && param().window_w == 3 && | |||||
param().stride_h == 2 && param().stride_w == 2) { | |||||
WorkspaceBundle ws = get_bundle(src, dst, param()); | |||||
std::vector<Algorithm*> PoolingImpl::get_all_algorithms( | |||||
const TensorLayout& src, const TensorLayout& dst) { | |||||
return megdnn::get_all_algorithms<PoolingImpl>({this, src, dst}); | |||||
} | |||||
return ws.total_size_in_bytes(); | |||||
} else { | |||||
return 0; | |||||
Algorithm* PoolingImpl::get_algorithm_heuristic( | |||||
const TensorLayout& src, const TensorLayout& dst, | |||||
size_t workspace_limit_in_bytes, const AlgoAttribute& positive_attr, | |||||
const AlgoAttribute& negative_attr) { | |||||
MEGDNN_MARK_USED_VAR(workspace_limit_in_bytes); | |||||
AlgoBase::SizeArgs args(this, src, dst); | |||||
for (auto iter : algo_pack().all_algos) { | |||||
if (iter->is_available_attribute(args, positive_attr, negative_attr)) { | |||||
return iter; | |||||
} | |||||
} | } | ||||
megdnn_throw( | |||||
ssprintf("require algorithm with attribute(%s) and without " | |||||
"attribute(%s), but can't get suitable algo.\n", | |||||
Algorithm::attribute_str(positive_attr).c_str(), | |||||
Algorithm::attribute_str(negative_attr).c_str())); | |||||
return nullptr; | |||||
} | } | ||||
void PoolingImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, | void PoolingImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, | ||||
_megdnn_workspace workspace) { | _megdnn_workspace workspace) { | ||||
check_exec(src.layout, dst.layout, workspace.size); | check_exec(src.layout, dst.layout, workspace.size); | ||||
size_t N = src.layout.shape[0], C = src.layout.shape[1], | |||||
IH = src.layout.shape[2], IW = src.layout.shape[3]; | |||||
size_t OH = dst.layout.shape[2], OW = dst.layout.shape[3]; | |||||
auto mode = param().mode; | |||||
auto FH = param().window_h, FW = param().window_w; | |||||
auto SH = param().stride_h, SW = param().stride_w; | |||||
auto PH = param().pad_h, PW = param().pad_w; | |||||
bool is_average = (mode == Mode::AVERAGE); | |||||
bool is_include = true; | |||||
if (is_supported(SIMDType::AVX) && is_average && | |||||
param().format == Param::Format::NCHW && | |||||
src.layout.dtype == dtype::Float32() && FH == 2 && FW == 2 && SH == 2 && | |||||
SW == 2) { | |||||
auto sptr = src.ptr<dt_float32>(); | |||||
auto dptr = dst.ptr<dt_float32>(); | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR(rep(n, N) rep(c, C) { | |||||
mean_pooling_w2x2_s2x2_avx(sptr + n * C * IH * IW + c * IH * IW, IH, | |||||
IW, dptr + n * C * OH * OW + c * OH * OW, | |||||
OH, OW, PH, PW, is_include); | |||||
}); | |||||
return; | |||||
} | |||||
if (is_supported(SIMDType::SSE3) && is_average && | |||||
src.layout.dtype == dtype::Float32() && | |||||
param().format == Param::Format::NCHW && FH == 2 && FW == 2 && | |||||
SH == 2 && SW == 2) { | |||||
auto sptr = src.ptr<dt_float32>(); | |||||
auto dptr = dst.ptr<dt_float32>(); | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR(rep(n, N) rep(c, C) { | |||||
mean_pooling_w2x2_s2x2_sse3(sptr + n * C * IH * IW + c * IH * IW, | |||||
IH, IW, | |||||
dptr + n * C * OH * OW + c * OH * OW, | |||||
OH, OW, PH, PW, is_include); | |||||
}); | |||||
return; | |||||
} | |||||
if (is_supported(SIMDType::SSE) && src.layout.dtype == dtype::Float32() && | |||||
mode == Mode::MAX && param().format == Param::Format::NCHW && FH == 2 && | |||||
FW == 2 && SH == 2 && SW == 2) { | |||||
auto sptr = src.ptr<dt_float32>(); | |||||
auto dptr = dst.ptr<dt_float32>(); | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR(rep(n, N) rep(c, C) { | |||||
max_pooling_w2x2_s2x2_sse(sptr + n * C * IH * IW + c * IH * IW, IH, | |||||
IW, dptr + n * C * OH * OW + c * OH * OW, | |||||
OH, OW, PH, PW); | |||||
}); | |||||
return; | |||||
} | |||||
if (is_supported(SIMDType::SSE) && src.layout.dtype == dtype::Float32() && | |||||
mode == Mode::MAX && param().format == Param::Format::NCHW && FH == 3 && | |||||
FW == 3 && SH == 2 && SW == 2) { | |||||
auto sptr = src.ptr<dt_float32>(); | |||||
auto dptr = dst.ptr<dt_float32>(); | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR( | |||||
WorkspaceBundle ws = | |||||
get_bundle(src.layout, dst.layout, param()); | |||||
ws.set(workspace.raw_ptr); rep(n, N) rep(c, C) { | |||||
do_max_pooling_3x3_s2x2_float_SSE( | |||||
sptr + n * C * IH * IW + c * IH * IW, | |||||
dptr + n * C * OH * OW + c * OH * OW, IH, IW, OH, | |||||
OW, PH, PW, ws); | |||||
}); | |||||
return; | |||||
} | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
// Mkldnn provide optimized code for nhwc int8 pooling now. | |||||
// Mkldnn can not change the layout automatic. | |||||
// Reorder nchw input to nhwc, do pooling, reorder nhwc result to nchw | |||||
if ((src.layout.dtype.enumv() == DTypeEnum::QuantizedS8 || | |||||
src.layout.dtype.enumv() == DTypeEnum::Int8) && | |||||
mode == Mode::MAX && param().format == Param::Format::NCHW) { | |||||
auto x86_handle = static_cast<HandleImpl*>(inplace_cpu_handle().get()); | |||||
auto mkldnn_eng = x86_handle->mkldnn_engine(); | |||||
auto mkldnn_stream = x86_handle->mkldnn_stream(); | |||||
auto mkldnn_pooling_mode = dnnl::algorithm::pooling_max; | |||||
dnnl::memory::dims pool_strides = {SH, SW}; | |||||
dnnl::memory::dims pool_padding = {PH, PW}; | |||||
dnnl::memory::dims pool_kernel = {FH, FW}; | |||||
dnnl::memory&& megdnn_src_memory_ori = | |||||
tensor_to_mkl_memory<dnnl::memory::format_tag::nchw, false>( | |||||
src, mkldnn_eng, dnnl::memory::data_type::s8); | |||||
dnnl::memory&& megdnn_dst_memory_ori = | |||||
tensor_to_mkl_memory<dnnl::memory::format_tag::nchw, false>( | |||||
dst, mkldnn_eng, dnnl::memory::data_type::s8); | |||||
dnnl::memory&& megdnn_src_memory = | |||||
tensor_to_mkl_memory<dnnl::memory::format_tag::nhwc, true>( | |||||
src, mkldnn_eng, dnnl::memory::data_type::s8); | |||||
dnnl::memory&& megdnn_dst_memory = | |||||
tensor_to_mkl_memory<dnnl::memory::format_tag::nhwc, true>( | |||||
dst, mkldnn_eng, dnnl::memory::data_type::s8); | |||||
auto reorder_src = | |||||
dnnl::reorder(megdnn_src_memory_ori, megdnn_src_memory); | |||||
auto reorder_dst = | |||||
dnnl::reorder(megdnn_dst_memory, megdnn_dst_memory_ori); | |||||
auto pool1_desc = dnnl::pooling_forward::desc( | |||||
dnnl::prop_kind::forward_inference, mkldnn_pooling_mode, | |||||
megdnn_src_memory.get_desc(), megdnn_dst_memory.get_desc(), | |||||
pool_strides, pool_kernel, pool_padding, pool_padding); | |||||
auto pool_pd = | |||||
dnnl::pooling_forward::primitive_desc(pool1_desc, mkldnn_eng); | |||||
auto pool = dnnl::pooling_forward(pool_pd); | |||||
auto run = [mkldnn_stream, mkldnn_eng, reorder_src, pool, reorder_dst, | |||||
megdnn_src_memory_ori, megdnn_src_memory, megdnn_dst_memory, | |||||
megdnn_dst_memory_ori](void) { | |||||
MEGDNN_MARK_USED_VAR(mkldnn_eng); | |||||
auto mkl_stream = mkldnn_stream; | |||||
reorder_src.execute(mkl_stream, | |||||
{{DNNL_ARG_FROM, megdnn_src_memory_ori}, | |||||
{DNNL_ARG_TO, megdnn_src_memory}}); | |||||
pool.execute(mkl_stream, {{DNNL_ARG_SRC, megdnn_src_memory}, | |||||
{DNNL_ARG_DST, megdnn_dst_memory}}); | |||||
reorder_dst.execute(mkl_stream, | |||||
{{DNNL_ARG_FROM, megdnn_dst_memory}, | |||||
{DNNL_ARG_TO, megdnn_dst_memory_ori}}); | |||||
mkl_stream.wait(); | |||||
}; | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR(run()); | |||||
return; | |||||
} | |||||
if (src.layout.dtype == dtype::Float32() && mode == Mode::MAX && | |||||
param().format == Param::Format::NCHW88) { | |||||
auto x86_handle = static_cast<HandleImpl*>(inplace_cpu_handle().get()); | |||||
auto mkldnn_eng = x86_handle->mkldnn_engine(); | |||||
auto mkldnn_stream = x86_handle->mkldnn_stream(); | |||||
auto mkldnn_pooling_mode = dnnl::algorithm::pooling_max; | |||||
switch (mode) { | |||||
case Mode::MAX: | |||||
mkldnn_pooling_mode = dnnl::algorithm::pooling_max; | |||||
break; | |||||
case Mode::AVERAGE: | |||||
mkldnn_pooling_mode = | |||||
dnnl::algorithm::pooling_avg_include_padding; | |||||
break; | |||||
case Mode::AVERAGE_COUNT_EXCLUDE_PADDING: | |||||
mkldnn_pooling_mode = | |||||
dnnl::algorithm::pooling_avg_exclude_padding; | |||||
break; | |||||
default: | |||||
megdnn_assert(0, "not supported pooling mode\n"); | |||||
}; | |||||
dnnl::memory::dims pool_strides = {SH, SW}; | |||||
dnnl::memory::dims pool_padding = {PH, PW}; | |||||
dnnl::memory::dims pool_kernel = {FH, FW}; | |||||
dnnl::memory&& megdnn_src_memory_ori = | |||||
tensor_to_mkl_memory<dnnl::memory::format_tag::nChw8c, false>( | |||||
src, mkldnn_eng, dnnl::memory::data_type::f32); | |||||
dnnl::memory&& megdnn_dst_memory_ori = | |||||
tensor_to_mkl_memory<dnnl::memory::format_tag::nChw8c, false>( | |||||
dst, mkldnn_eng, dnnl::memory::data_type::f32); | |||||
auto pool_desc = dnnl::pooling_forward::desc( | |||||
dnnl::prop_kind::forward_inference, mkldnn_pooling_mode, | |||||
megdnn_src_memory_ori.get_desc(), | |||||
megdnn_dst_memory_ori.get_desc(), pool_strides, pool_kernel, | |||||
pool_padding, pool_padding); | |||||
auto pool_pd = | |||||
dnnl::pooling_forward::primitive_desc(pool_desc, mkldnn_eng); | |||||
auto pool = dnnl::pooling_forward(pool_pd); | |||||
auto run = [mkldnn_stream, pool, mkldnn_eng, megdnn_src_memory_ori, | |||||
megdnn_dst_memory_ori](void) { | |||||
MEGDNN_MARK_USED_VAR(mkldnn_eng); | |||||
auto mkl_stream = mkldnn_stream; | |||||
pool.execute(mkl_stream, {{DNNL_ARG_SRC, megdnn_src_memory_ori}, | |||||
{DNNL_ARG_DST, megdnn_dst_memory_ori}}); | |||||
mkl_stream.wait(); | |||||
}; | |||||
MEGDNN_DISPATCH_CPU_KERN_OPR(run()); | |||||
return; | |||||
AlgoBase::ExecArgs args(this, src, dst, workspace); | |||||
auto algo = get_algorithm(this, src.layout, dst.layout); | |||||
if (!is_fallback_algo(algo)) { | |||||
algo->exec(args); | |||||
} else { | |||||
fallback::PoolingImpl::exec(src, dst, Workspace()); | |||||
} | } | ||||
#endif | |||||
fallback::PoolingImpl::exec(src, dst, Workspace()); | |||||
} | } | ||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -6,7 +6,8 @@ | |||||
* | * | ||||
* Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
* software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | */ | ||||
#pragma once | #pragma once | ||||
#include "src/fallback/pooling/opr_impl.h" | #include "src/fallback/pooling/opr_impl.h" | ||||
@@ -14,17 +15,62 @@ | |||||
namespace megdnn { | namespace megdnn { | ||||
namespace x86 { | namespace x86 { | ||||
class PoolingImpl: public fallback::PoolingImpl { | |||||
public: | |||||
using fallback::PoolingImpl::PoolingImpl; | |||||
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, | |||||
_megdnn_workspace) override; | |||||
size_t get_workspace_in_bytes(const TensorLayout &, | |||||
const TensorLayout &) override; | |||||
}; | |||||
class PoolingImpl : public fallback::PoolingImpl { | |||||
private: | |||||
class AlgoMeanW2S2AVX; | |||||
class AlgoMeanW2S2SSE3; | |||||
class AlgoMaxW2S2SSE; | |||||
class AlgoMaxW3S3SSE; | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
class AlgoMKLDNNNCHW; | |||||
class AlgoMKLDNNNCHW88; | |||||
#endif | |||||
class AlgoFallback; | |||||
class AlgoPack; | |||||
static AlgoPack sm_algo_pack; | |||||
} // namespace x86 | |||||
} // namespace megdnn | |||||
// vim: syntax=cpp.doxygen | |||||
public: | |||||
using fallback::PoolingImpl::PoolingImpl; | |||||
class AlgoBase; | |||||
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, | |||||
_megdnn_workspace) override; | |||||
size_t get_workspace_in_bytes(const TensorLayout&, | |||||
const TensorLayout&) override; | |||||
static size_t constexpr MAX_SPATIAL_DIM = 2; | |||||
const char* get_algorithm_set_name() const override { | |||||
return "X86_POOLING_FORWARD"; | |||||
} | |||||
Algorithm* get_algorithm_from_desc(const AlgorithmDesc& desc) override; | |||||
AlgorithmInfo get_algorithm_info_heuristic( | |||||
const TensorLayout& src, const TensorLayout& dst, | |||||
size_t workspace_limit_in_bytes, const AlgoAttribute& positive_attr, | |||||
const AlgoAttribute& negative_attr) { | |||||
return get_algorithm_heuristic(src, dst, workspace_limit_in_bytes, | |||||
positive_attr, negative_attr) | |||||
->info(); | |||||
} | |||||
static const AlgoPack& algo_pack() { return sm_algo_pack; } | |||||
bool is_fallback_algo(Algorithm* algo) { | |||||
return strcmp(algo->name(), "FALLBACK_POOLING") == 0; | |||||
} | |||||
protected: | |||||
std::vector<Algorithm*> get_all_algorithms( | |||||
const TensorLayout& src, const TensorLayout& dst) override; | |||||
Algorithm* get_algorithm_heuristic( | |||||
const TensorLayout& src, const TensorLayout& dst, | |||||
size_t workspace_limit_in_bytes, const AlgoAttribute& positive_attr, | |||||
const AlgoAttribute& negative_attr) override; | |||||
}; | |||||
WorkspaceBundle get_bundle(const TensorLayout& src, const TensorLayout& dst, | |||||
const param::Pooling& param); | |||||
} // namespace x86 | |||||
} // namespace megdnn | |||||
// vim: syntax=cpp.doxygen |
@@ -159,6 +159,42 @@ TEST(TestOprDNN, PoolingExePolicy) { | |||||
"cudnnReproducible") != std::string::npos); | "cudnnReproducible") != std::string::npos); | ||||
} | } | ||||
TEST(TestOprDNN, PoolingForwardFastrun) { | |||||
using Param = opr::Pooling::Param; | |||||
Param param; | |||||
using Policy = opr::Pooling::ExecutionPolicy; | |||||
using S = Policy::Strategy; | |||||
auto cn = CompNode::load("xpux"); | |||||
cn.activate(); | |||||
auto orig_impl = PersistentCache::set_impl( | |||||
std::make_shared<InMemoryPersistentCache>()); | |||||
HostTensorND host_y; | |||||
S strategy = S::PROFILE | S::REPRODUCIBLE; | |||||
auto graph = ComputingGraph::make(); | |||||
HostTensorGenerator<> gen; | |||||
TensorShape shape = {1, 20, 24, 24}; | |||||
auto input = opr::Host2DeviceCopy::make(*graph, gen(shape, cn)); | |||||
param.mode = Param::Mode::MAX; | |||||
param.window_h = param.window_w = 2; | |||||
param.stride_h = param.stride_w = 2; | |||||
param.pad_h = param.pad_w = 0; | |||||
param.format = Param::Format::NCHW; | |||||
Policy policy; | |||||
policy.strategy = strategy; | |||||
auto pooling = opr::PoolingForward::make(input, param, {}, policy); | |||||
auto func = graph->compile({make_callback_copy(pooling, host_y)}); | |||||
func->execute().wait(); | |||||
} | |||||
} // anonymous namespace | } // anonymous namespace | ||||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |