@@ -24,21 +24,7 @@ Convolution3DBackwardDataImpl::AlgoPack::AlgoPack() { | |||||
for (auto &&i: cudnn) { | for (auto &&i: cudnn) { | ||||
all_algos.push_back(&i); | all_algos.push_back(&i); | ||||
} | } | ||||
all_algos.reserve(all_algos.size() * 2); | |||||
// add gconv algos by AlgoGroupConvGeneral | |||||
auto all_algos_data = all_algos.data(); | |||||
for (size_t i = 1; i < all_algos.size(); ++ i) { | |||||
gconv.push_back({all_algos[i]}); | |||||
} | |||||
for (size_t i = 1; i < all_algos.size(); ++ i) { | |||||
algo2gconv[all_algos[i]] = &gconv[i - 1]; | |||||
} | |||||
for (auto &&i: gconv) { | |||||
all_algos.push_back(&i); | |||||
} | |||||
megdnn_assert(all_algos_data == all_algos.data()); | |||||
all_algos.push_back(&group); | |||||
for (auto&& algo : all_algos) { | for (auto&& algo : all_algos) { | ||||
m_all_algos_map.emplace(algo->info().desc, algo); | m_all_algos_map.emplace(algo->info().desc, algo); | ||||
@@ -61,27 +47,26 @@ Convolution3DBackwardDataImpl::AlgoPack::cudnn_from_enum( | |||||
Convolution3DBackwardDataImpl::AlgoPack Convolution3DBackwardDataImpl::sm_algo_pack; | Convolution3DBackwardDataImpl::AlgoPack Convolution3DBackwardDataImpl::sm_algo_pack; | ||||
Convolution3DBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( | Convolution3DBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( | ||||
Convolution3DBackwardDataImpl *o, | |||||
const TensorLayout &filter, const TensorLayout &diff, | |||||
const TensorLayout &grad): | |||||
SizeArgs(o, o->make_canonized_filter_meta(grad.ndim, filter), diff, grad) | |||||
{ | |||||
} | |||||
const Convolution3DBackwardDataImpl* o, const TensorLayout& filter, | |||||
const TensorLayout& diff, const TensorLayout& grad) | |||||
: SizeArgs(o, filter, o->make_canonized_filter_meta(grad.ndim, filter), | |||||
diff, grad) {} | |||||
Convolution3DBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( | Convolution3DBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( | ||||
Convolution3DBackwardDataImpl *o, | |||||
const CanonizedFilterMeta &filter, const TensorLayout &diff, | |||||
const Convolution3DBackwardDataImpl *o, const TensorLayout& filter, | |||||
const CanonizedFilterMeta &filter_meta, const TensorLayout &diff, | |||||
const TensorLayout &grad): | const TensorLayout &grad): | ||||
handle{concrete_handle(o->handle())}, | handle{concrete_handle(o->handle())}, | ||||
filter_meta{filter}, | |||||
filter_meta{filter_meta}, | |||||
diff_layout{&diff}, | diff_layout{&diff}, | ||||
grad_layout{&grad}, | grad_layout{&grad}, | ||||
filter_layout{&filter}, | |||||
opr{o} | opr{o} | ||||
{ | { | ||||
} | } | ||||
Convolution3DBackwardDataImpl::AlgoBase::ExecArgs::ExecArgs( | Convolution3DBackwardDataImpl::AlgoBase::ExecArgs::ExecArgs( | ||||
Convolution3DBackwardDataImpl *opr, | |||||
const Convolution3DBackwardDataImpl *opr, | |||||
_megdnn_tensor_in filter, | _megdnn_tensor_in filter, | ||||
_megdnn_tensor_in diff, | _megdnn_tensor_in diff, | ||||
_megdnn_tensor_out grad, | _megdnn_tensor_out grad, | ||||
@@ -42,31 +42,33 @@ public: | |||||
struct SizeArgs { | struct SizeArgs { | ||||
HandleImpl* handle; | HandleImpl* handle; | ||||
CanonizedFilterMeta filter_meta; | CanonizedFilterMeta filter_meta; | ||||
const TensorLayout *diff_layout, *grad_layout; | |||||
Convolution3DBackwardDataImpl* opr; | |||||
const TensorLayout *diff_layout, *grad_layout, *filter_layout; | |||||
const Convolution3DBackwardDataImpl* opr; | |||||
std::string to_string() const; | std::string to_string() const; | ||||
void init_desc(convolution3d::CUDNNBwdDataDescs& desc) const { | void init_desc(convolution3d::CUDNNBwdDataDescs& desc) const { | ||||
desc.set(filter_meta, *diff_layout, *grad_layout, opr->param()); | desc.set(filter_meta, *diff_layout, *grad_layout, opr->param()); | ||||
} | } | ||||
SizeArgs(Convolution3DBackwardDataImpl* opr, const TensorLayout& filter, | |||||
const TensorLayout& diff, const TensorLayout& grad); | |||||
SizeArgs(Convolution3DBackwardDataImpl* opr, | |||||
const CanonizedFilterMeta& filter, const TensorLayout& diff, | |||||
SizeArgs(const Convolution3DBackwardDataImpl* opr, | |||||
const TensorLayout& filter, const TensorLayout& diff, | |||||
const TensorLayout& grad); | const TensorLayout& grad); | ||||
SizeArgs(const Convolution3DBackwardDataImpl* opr, | |||||
const TensorLayout& filter, | |||||
const CanonizedFilterMeta& filter_meta, | |||||
const TensorLayout& diff, const TensorLayout& grad); | |||||
convolution3d::ForwardSizeArgs as_fwd_args() const { | convolution3d::ForwardSizeArgs as_fwd_args() const { | ||||
return {handle, grad_layout, filter_meta, diff_layout, | |||||
opr->param().data_type}; | |||||
return {handle, grad_layout, filter_layout, | |||||
filter_meta, diff_layout, opr->param().data_type}; | |||||
} | } | ||||
}; | }; | ||||
struct ExecArgs : public SizeArgs { | struct ExecArgs : public SizeArgs { | ||||
const TensorND *filter_tensor, *diff_tensor, *grad_tensor; | const TensorND *filter_tensor, *diff_tensor, *grad_tensor; | ||||
Workspace workspace; | Workspace workspace; | ||||
ExecArgs(Convolution3DBackwardDataImpl* opr, _megdnn_tensor_in filter, | |||||
_megdnn_tensor_in diff, _megdnn_tensor_out grad, | |||||
_megdnn_workspace workspace); | |||||
ExecArgs(const Convolution3DBackwardDataImpl* opr, | |||||
_megdnn_tensor_in filter, _megdnn_tensor_in diff, | |||||
_megdnn_tensor_out grad, _megdnn_workspace workspace); | |||||
}; | }; | ||||
virtual bool is_available(const SizeArgs& args) const = 0; | virtual bool is_available(const SizeArgs& args) const = 0; | ||||
virtual size_t get_workspace_in_bytes(const SizeArgs& args) const = 0; | virtual size_t get_workspace_in_bytes(const SizeArgs& args) const = 0; | ||||
@@ -154,29 +156,25 @@ public: | |||||
//! implement group conv by another algo | //! implement group conv by another algo | ||||
class Convolution3DBackwardDataImpl::AlgoGroupConvGeneral final | class Convolution3DBackwardDataImpl::AlgoGroupConvGeneral final | ||||
: public AlgoBase { | : public AlgoBase { | ||||
AlgoBase* m_impl; | |||||
std::string m_name; | |||||
public: | public: | ||||
AlgoGroupConvGeneral(AlgoBase* impl); | |||||
bool is_available(const SizeArgs& args) const override; | bool is_available(const SizeArgs& args) const override; | ||||
size_t get_workspace_in_bytes(const SizeArgs& args) const override; | size_t get_workspace_in_bytes(const SizeArgs& args) const override; | ||||
void exec(const ExecArgs& args) const override; | void exec(const ExecArgs& args) const override; | ||||
std::vector<SearchItem> get_subopr_list( | |||||
const TensorLayoutArray& layouts, | |||||
const OperatorBase* opr) const override; | |||||
const char* name() const override { return m_name.c_str(); } | |||||
const char* name() const override { | |||||
return "CUDA:GROUP_CONV3D_BACKWARD_DATA"; | |||||
} | |||||
static void modify_size_args(SizeArgs& args, TensorLayout& diff_pg, | |||||
TensorLayout& grad_pg); | |||||
AlgoAttribute attribute() const override { | AlgoAttribute attribute() const override { | ||||
auto ret = static_cast<AlgoAttribute>(0); | |||||
if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) { | |||||
ret |= AlgoAttribute::REPRODUCIBLE; | |||||
} | |||||
return ret; | |||||
return AlgoAttribute::REPRODUCIBLE; | |||||
} | } | ||||
MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) | MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) | ||||
private: | |||||
WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; | |||||
}; | }; | ||||
class Convolution3DBackwardDataImpl::AlgoPack : NonCopyableObj { | class Convolution3DBackwardDataImpl::AlgoPack : NonCopyableObj { | ||||
@@ -190,8 +188,7 @@ public: | |||||
std::vector<AlgoCUDNN> cudnn; | std::vector<AlgoCUDNN> cudnn; | ||||
AlgoChanwise chanwise; | AlgoChanwise chanwise; | ||||
std::vector<AlgoGroupConvGeneral> gconv; | |||||
std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv; | |||||
AlgoGroupConvGeneral group; | |||||
std::vector<AlgoBase*> | std::vector<AlgoBase*> | ||||
//! all algorithms | //! all algorithms | ||||
@@ -15,68 +15,121 @@ using namespace megdnn; | |||||
using namespace cuda; | using namespace cuda; | ||||
using namespace convolution3d; | using namespace convolution3d; | ||||
void Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::modify_size_args( | |||||
Convolution3DBackwardDataImpl::AlgoBase::SizeArgs &args, | |||||
TensorLayout &diff_pg, TensorLayout &grad_pg) { | |||||
diff_pg = *args.diff_layout; | |||||
grad_pg = *args.grad_layout; | |||||
namespace { | |||||
std::pair<TensorLayoutArray, Convolution3DBackwardDataImpl::Param> | |||||
sub_opr_config(const Convolution3DBackwardDataImpl::AlgoBase::SizeArgs& args) { | |||||
SmallVector<size_t> flt_shape(0); | |||||
std::vector<ptrdiff_t> flt_stride(0); | |||||
size_t idx = 0; | |||||
// check if the first dim is group | |||||
if (args.filter_layout->ndim > args.grad_layout->ndim) | |||||
++idx; | |||||
for (; idx < args.filter_layout->ndim; ++idx) { | |||||
flt_shape.push_back(args.filter_layout->shape[idx]); | |||||
flt_stride.push_back(args.filter_layout->stride[idx]); | |||||
} | |||||
TensorLayout filter_pg(flt_shape, flt_stride, | |||||
args.filter_layout->dtype, | |||||
args.filter_layout->format); | |||||
TensorLayout diff_pg = *args.diff_layout; | |||||
TensorLayout grad_pg = *args.grad_layout; | |||||
auto nr_grp = args.filter_meta.group; | auto nr_grp = args.filter_meta.group; | ||||
args.filter_meta.group = 1; | |||||
diff_pg.shape[1] /= nr_grp; | |||||
grad_pg.shape[1] /= nr_grp; | |||||
args.diff_layout = &diff_pg; | |||||
args.grad_layout = &grad_pg; | |||||
size_t c_pos = 1; | |||||
diff_pg.shape[c_pos] /= nr_grp; | |||||
grad_pg.shape[c_pos] /= nr_grp; | |||||
megdnn::param::Convolution3D param = args.opr->param(); | |||||
param.sparse = megdnn::param::Convolution3D::Sparse::DENSE; | |||||
std::pair<TensorLayoutArray, Convolution3DBackwardDataImpl::Param> ret; | |||||
ret.first = {filter_pg, diff_pg, grad_pg}; | |||||
ret.second = param; | |||||
return ret; | |||||
} | } | ||||
Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( | |||||
AlgoBase *impl): | |||||
m_impl{impl} | |||||
{ | |||||
m_name = "group_conv3d:"; | |||||
m_name += impl->name(); | |||||
std::pair<TensorLayoutArray, std::unique_ptr<Convolution3DBackwardData>> | |||||
prepare_sub_opr(const Convolution3DBackwardDataImpl::AlgoBase::SizeArgs& args) { | |||||
auto conv3d_backdata_opr = | |||||
args.handle->create_operator<Convolution3DBackwardData>(); | |||||
set_execution_policy<Convolution3DBackwardData, Convolution3DBackwardData*>( | |||||
args.opr, conv3d_backdata_opr.get()); | |||||
auto&& config = sub_opr_config(args); | |||||
conv3d_backdata_opr->param() = config.second; | |||||
return {config.first, std::move(conv3d_backdata_opr)}; | |||||
} | |||||
} // namespace | |||||
std::vector<Algorithm::SearchItem> | |||||
Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::get_subopr_list( | |||||
const TensorLayoutArray& layouts, const OperatorBase* opr) const { | |||||
AlgoBase::SizeArgs args{ | |||||
static_cast<const Convolution3DBackwardDataImpl*>(opr), layouts[0], | |||||
layouts[1], layouts[2]}; | |||||
auto&& config = sub_opr_config(args); | |||||
std::string param_str; | |||||
Algorithm::serialize_write_pod(config.second, param_str); | |||||
return {{Algorithm::OprType::CONVOLUTION3D_BACKWARD_DATA, param_str, | |||||
config.first}}; | |||||
} | } | ||||
bool Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::is_available( | bool Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::is_available( | ||||
const SizeArgs &args) const { | const SizeArgs &args) const { | ||||
if (args.filter_meta.group <= 1) | if (args.filter_meta.group <= 1) | ||||
return false; | return false; | ||||
auto sub_args = args; | |||||
TensorLayout diff_pg, grad_pg; | |||||
modify_size_args(sub_args, diff_pg, grad_pg); | |||||
return m_impl->is_available(sub_args); | |||||
if (args.filter_meta.format != Param::Format::NCDHW) { | |||||
return false; | |||||
} | |||||
auto config = prepare_sub_opr(args); | |||||
return get_algorithm( | |||||
static_cast<Convolution3DBackwardDataImpl*>(config.second.get()), | |||||
config.first[0], config.first[1], config.first[2]); | |||||
} | |||||
WorkspaceBundle | |||||
Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::get_workspace_bundle( | |||||
void* ptr, const SizeArgs& args) const { | |||||
auto config = prepare_sub_opr(args); | |||||
size_t sizes = config.second->get_workspace_in_bytes( | |||||
config.first[0], config.first[1], config.first[2]); | |||||
return {ptr, {sizes}}; | |||||
} | } | ||||
size_t Convolution3DBackwardDataImpl::AlgoGroupConvGeneral:: | |||||
get_workspace_in_bytes(const SizeArgs &args) const { | |||||
auto sub_args = args; | |||||
TensorLayout diff_pg, grad_pg; | |||||
modify_size_args(sub_args, diff_pg, grad_pg); | |||||
return m_impl->get_workspace_in_bytes(sub_args); | |||||
size_t | |||||
Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::get_workspace_in_bytes( | |||||
const SizeArgs& args) const { | |||||
return get_workspace_bundle(nullptr, args).total_size_in_bytes(); | |||||
} | } | ||||
void Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::exec( | void Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::exec( | ||||
const ExecArgs &args) const { | |||||
auto sub_args = args; | |||||
TensorND tflt{*args.filter_tensor}, tdiff{*args.diff_tensor}, | |||||
tgrad{*args.grad_tensor}; | |||||
modify_size_args(sub_args, tdiff.layout, tgrad.layout); | |||||
sub_args.filter_tensor = &tflt; | |||||
sub_args.diff_tensor = &tdiff; | |||||
sub_args.grad_tensor = &tgrad; | |||||
auto grp = args.filter_meta.group; | |||||
auto &&fm = args.filter_meta; | |||||
auto strd_flt = (fm.icpg * fm.ocpg * | |||||
fm.spatial[0] * fm.spatial[1] * fm.spatial[2] * tflt.layout.dtype.size()), | |||||
strd_diff = ( | |||||
tdiff.layout.stride[1] * fm.ocpg * tdiff.layout.dtype.size()), | |||||
strd_grad = ( | |||||
tgrad.layout.stride[1] * fm.icpg * tgrad.layout.dtype.size()); | |||||
for (uint32_t g = 0; g < grp; ++ g) { | |||||
m_impl->exec(sub_args); | |||||
incr_voidp(tflt.raw_ptr, strd_flt); | |||||
incr_voidp(tdiff.raw_ptr, strd_diff); | |||||
incr_voidp(tgrad.raw_ptr, strd_grad); | |||||
const ExecArgs& args) const { | |||||
auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args); | |||||
{ | |||||
auto config = prepare_sub_opr(args); | |||||
TensorND tfilter{args.filter_tensor->raw_ptr, config.first[0]}; | |||||
TensorND tdiff{args.diff_tensor->raw_ptr, config.first[1]}; | |||||
TensorND tgrad{args.grad_tensor->raw_ptr, config.first[2]}; | |||||
size_t c_pos = 1; | |||||
auto grp = args.filter_meta.group; | |||||
auto&& fm = args.filter_meta; | |||||
auto strd_flt = (fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] * | |||||
fm.spatial[2] * tfilter.layout.dtype.size()), | |||||
strd_diff = (tdiff.layout.stride[c_pos] * fm.ocpg * | |||||
tdiff.layout.dtype.size()), | |||||
strd_grad = (tgrad.layout.stride[c_pos] * fm.icpg * | |||||
tgrad.layout.dtype.size()); | |||||
for (uint32_t g = 0; g < grp; ++g) { | |||||
config.second->exec(tfilter, tdiff, tgrad, bundle.get_workspace(0)); | |||||
incr_voidp(tfilter.raw_ptr, strd_flt); | |||||
incr_voidp(tdiff.raw_ptr, strd_diff); | |||||
incr_voidp(tgrad.raw_ptr, strd_grad); | |||||
} | |||||
} | } | ||||
} | } | ||||
@@ -26,21 +26,7 @@ Convolution3DBackwardFilterImpl::AlgoPack::AlgoPack() { | |||||
} | } | ||||
all_algos.push_back(&inplace_matmul); | all_algos.push_back(&inplace_matmul); | ||||
all_algos.reserve(all_algos.size() * 2); | |||||
// add gconv algos by AlgoGroupConvGeneral | |||||
auto all_algos_data = all_algos.data(); | |||||
for (size_t i = 1; i < all_algos.size(); ++ i) { | |||||
gconv.push_back({all_algos[i]}); | |||||
} | |||||
for (size_t i = 1; i < all_algos.size(); ++ i) { | |||||
algo2gconv[all_algos[i]] = &gconv[i - 1]; | |||||
} | |||||
for (auto &&i: gconv) { | |||||
all_algos.push_back(&i); | |||||
} | |||||
megdnn_assert(all_algos_data == all_algos.data()); | |||||
non_cudnn_algos.push_back(all_algos.rbegin()[0]); //group inplace_matmul | |||||
all_algos.push_back(&group); | |||||
for (auto&& algo : all_algos) { | for (auto&& algo : all_algos) { | ||||
m_all_algos_map.emplace(algo->info().desc, algo); | m_all_algos_map.emplace(algo->info().desc, algo); | ||||
@@ -64,27 +50,26 @@ Convolution3DBackwardFilterImpl::AlgoPack | |||||
Convolution3DBackwardFilterImpl::sm_algo_pack; | Convolution3DBackwardFilterImpl::sm_algo_pack; | ||||
Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( | Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( | ||||
Convolution3DBackwardFilterImpl *o, | |||||
const Convolution3DBackwardFilterImpl *o, | |||||
const TensorLayout &src, const TensorLayout &diff, | const TensorLayout &src, const TensorLayout &diff, | ||||
const TensorLayout &grad): | const TensorLayout &grad): | ||||
SizeArgs(o, src, diff, o->make_canonized_filter_meta(src.ndim, grad)) | |||||
SizeArgs(o, src, diff, grad, o->make_canonized_filter_meta(src.ndim, grad)) | |||||
{ | { | ||||
} | } | ||||
Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( | Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( | ||||
Convolution3DBackwardFilterImpl *o, | |||||
const TensorLayout &src, const TensorLayout &diff, | |||||
const CanonizedFilterMeta &grad): | |||||
handle{concrete_handle(o->handle())}, | |||||
src_layout{&src}, | |||||
diff_layout{&diff}, | |||||
grad_filter_meta{grad}, | |||||
opr{o} | |||||
{ | |||||
} | |||||
const Convolution3DBackwardFilterImpl* o, const TensorLayout& src, | |||||
const TensorLayout& diff, const TensorLayout& grad, | |||||
const CanonizedFilterMeta& grad_meta) | |||||
: handle{concrete_handle(o->handle())}, | |||||
src_layout{&src}, | |||||
diff_layout{&diff}, | |||||
grad_layout{&grad}, | |||||
grad_filter_meta{grad_meta}, | |||||
opr{o} {} | |||||
Convolution3DBackwardFilterImpl::AlgoBase::ExecArgs::ExecArgs( | Convolution3DBackwardFilterImpl::AlgoBase::ExecArgs::ExecArgs( | ||||
Convolution3DBackwardFilterImpl *opr, | |||||
const Convolution3DBackwardFilterImpl *opr, | |||||
_megdnn_tensor_in src, | _megdnn_tensor_in src, | ||||
_megdnn_tensor_in diff, | _megdnn_tensor_in diff, | ||||
_megdnn_tensor_out grad, | _megdnn_tensor_out grad, | ||||
@@ -36,31 +36,34 @@ public: | |||||
struct SizeArgs { | struct SizeArgs { | ||||
HandleImpl* handle; | HandleImpl* handle; | ||||
const TensorLayout *src_layout, *diff_layout; | |||||
const TensorLayout *src_layout, *diff_layout, *grad_layout; | |||||
CanonizedFilterMeta grad_filter_meta; | CanonizedFilterMeta grad_filter_meta; | ||||
Convolution3DBackwardFilterImpl* opr; | |||||
const Convolution3DBackwardFilterImpl* opr; | |||||
std::string to_string() const; | std::string to_string() const; | ||||
void init_desc(convolution3d::CUDNNBwdFilterDescs& desc) const { | void init_desc(convolution3d::CUDNNBwdFilterDescs& desc) const { | ||||
desc.set(*src_layout, *diff_layout, grad_filter_meta, opr->param()); | desc.set(*src_layout, *diff_layout, grad_filter_meta, opr->param()); | ||||
} | } | ||||
SizeArgs(Convolution3DBackwardFilterImpl* opr, const TensorLayout& src, | |||||
const TensorLayout& diff, const TensorLayout& grad); | |||||
SizeArgs(Convolution3DBackwardFilterImpl* opr, const TensorLayout& src, | |||||
const TensorLayout& diff, const CanonizedFilterMeta& grad); | |||||
SizeArgs(const Convolution3DBackwardFilterImpl* opr, | |||||
const TensorLayout& src, const TensorLayout& diff, | |||||
const TensorLayout& grad); | |||||
SizeArgs(const Convolution3DBackwardFilterImpl* opr, | |||||
const TensorLayout& src, const TensorLayout& diff, | |||||
const TensorLayout& grad, | |||||
const CanonizedFilterMeta& grad_meta); | |||||
convolution3d::ForwardSizeArgs as_fwd_args() const { | convolution3d::ForwardSizeArgs as_fwd_args() const { | ||||
return {handle, src_layout, grad_filter_meta, diff_layout, | |||||
opr->param().data_type}; | |||||
return {handle, src_layout, grad_layout, | |||||
grad_filter_meta, diff_layout, opr->param().data_type}; | |||||
} | } | ||||
}; | }; | ||||
struct ExecArgs : public SizeArgs { | struct ExecArgs : public SizeArgs { | ||||
const TensorND *src_tensor, *diff_tensor, *grad_tensor; | const TensorND *src_tensor, *diff_tensor, *grad_tensor; | ||||
Workspace workspace; | Workspace workspace; | ||||
ExecArgs(Convolution3DBackwardFilterImpl* opr, _megdnn_tensor_in src, | |||||
_megdnn_tensor_in diff, _megdnn_tensor_out grad, | |||||
_megdnn_workspace workspace); | |||||
ExecArgs(const Convolution3DBackwardFilterImpl* opr, | |||||
_megdnn_tensor_in src, _megdnn_tensor_in diff, | |||||
_megdnn_tensor_out grad, _megdnn_workspace workspace); | |||||
}; | }; | ||||
virtual bool is_available(const SizeArgs& args) const = 0; | virtual bool is_available(const SizeArgs& args) const = 0; | ||||
virtual size_t get_workspace_in_bytes(const SizeArgs& args) const = 0; | virtual size_t get_workspace_in_bytes(const SizeArgs& args) const = 0; | ||||
@@ -162,30 +165,25 @@ public: | |||||
//! implement group conv by another algo | //! implement group conv by another algo | ||||
class Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral final | class Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral final | ||||
: public AlgoBase { | : public AlgoBase { | ||||
AlgoBase* m_impl; | |||||
std::string m_name; | |||||
public: | public: | ||||
AlgoGroupConvGeneral(AlgoBase* impl); | |||||
bool is_available(const SizeArgs& args) const override; | bool is_available(const SizeArgs& args) const override; | ||||
size_t get_workspace_in_bytes(const SizeArgs& args) const override; | size_t get_workspace_in_bytes(const SizeArgs& args) const override; | ||||
void exec(const ExecArgs& args) const override; | void exec(const ExecArgs& args) const override; | ||||
std::vector<SearchItem> get_subopr_list( | |||||
const TensorLayoutArray& layouts, | |||||
const OperatorBase* opr) const override; | |||||
const char* name() const override { return m_name.c_str(); } | |||||
const char* name() const override { | |||||
return "CUDA:GROUP_CONV3D_BACKWARD_FILTER"; | |||||
} | |||||
AlgoAttribute attribute() const override { | AlgoAttribute attribute() const override { | ||||
auto ret = static_cast<AlgoAttribute>(0); | |||||
if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) { | |||||
ret |= AlgoAttribute::REPRODUCIBLE; | |||||
} | |||||
return ret; | |||||
return AlgoAttribute::REPRODUCIBLE; | |||||
} | } | ||||
static void modify_size_args(SizeArgs& args, TensorLayout& src_pg, | |||||
TensorLayout& diff_pg); | |||||
MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) | MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) | ||||
private: | |||||
WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; | |||||
}; | }; | ||||
class Convolution3DBackwardFilterImpl::AlgoPack : NonCopyableObj { | class Convolution3DBackwardFilterImpl::AlgoPack : NonCopyableObj { | ||||
@@ -200,8 +198,7 @@ public: | |||||
std::vector<AlgoCUDNN> cudnn; | std::vector<AlgoCUDNN> cudnn; | ||||
AlgoInplaceMatmul inplace_matmul; | AlgoInplaceMatmul inplace_matmul; | ||||
AlgoChanwise chanwise; | AlgoChanwise chanwise; | ||||
std::vector<AlgoGroupConvGeneral> gconv; | |||||
std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv; | |||||
AlgoGroupConvGeneral group; | |||||
std::vector<AlgoBase*> | std::vector<AlgoBase*> | ||||
//! all algorithms | //! all algorithms | ||||
@@ -15,69 +15,123 @@ using namespace megdnn; | |||||
using namespace cuda; | using namespace cuda; | ||||
using namespace convolution3d; | using namespace convolution3d; | ||||
void Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::modify_size_args( | |||||
Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs &args, | |||||
TensorLayout &src_pg, TensorLayout &diff_pg) { | |||||
src_pg = *args.src_layout; | |||||
diff_pg = *args.diff_layout; | |||||
namespace { | |||||
std::pair<TensorLayoutArray, Convolution3DBackwardFilterImpl::Param> | |||||
sub_opr_config( | |||||
const Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs& args) { | |||||
SmallVector<size_t> flt_shape(0); | |||||
std::vector<ptrdiff_t> flt_stride(0); | |||||
size_t idx = 0; | |||||
// check if the first dim is group | |||||
if (args.grad_layout->ndim > args.src_layout->ndim) | |||||
++idx; | |||||
for (; idx < args.grad_layout->ndim; ++idx) { | |||||
flt_shape.push_back(args.grad_layout->shape[idx]); | |||||
flt_stride.push_back(args.grad_layout->stride[idx]); | |||||
} | |||||
TensorLayout grad_pg(flt_shape, flt_stride, args.grad_layout->dtype, | |||||
args.grad_layout->format); | |||||
TensorLayout src_pg = *args.src_layout; | |||||
TensorLayout diff_pg = *args.diff_layout; | |||||
auto nr_grp = args.grad_filter_meta.group; | auto nr_grp = args.grad_filter_meta.group; | ||||
args.grad_filter_meta.group = 1; | |||||
src_pg.shape[1] /= nr_grp; | |||||
diff_pg.shape[1] /= nr_grp; | |||||
args.src_layout = &src_pg; | |||||
args.diff_layout = &diff_pg; | |||||
size_t c_pos = 1; | |||||
src_pg.shape[c_pos] /= nr_grp; | |||||
diff_pg.shape[c_pos] /= nr_grp; | |||||
megdnn::param::Convolution3D param = args.opr->param(); | |||||
param.sparse = megdnn::param::Convolution3D::Sparse::DENSE; | |||||
std::pair<TensorLayoutArray, Convolution3DBackwardFilterImpl::Param> ret; | |||||
ret.first = {src_pg, diff_pg, grad_pg}; | |||||
ret.second = param; | |||||
return ret; | |||||
} | } | ||||
Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( | |||||
AlgoBase *impl): | |||||
m_impl{impl} | |||||
{ | |||||
m_name = "group_conv3d:"; | |||||
m_name += impl->name(); | |||||
std::pair<TensorLayoutArray, std::unique_ptr<Convolution3DBackwardFilter>> | |||||
prepare_sub_opr( | |||||
const Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs& args) { | |||||
auto conv3d_backfilter_opr = | |||||
args.handle->create_operator<Convolution3DBackwardFilter>(); | |||||
set_execution_policy<Convolution3DBackwardFilter, | |||||
Convolution3DBackwardFilter*>( | |||||
args.opr, conv3d_backfilter_opr.get()); | |||||
auto&& config = sub_opr_config(args); | |||||
conv3d_backfilter_opr->param() = config.second; | |||||
return {config.first, std::move(conv3d_backfilter_opr)}; | |||||
} | |||||
} // namespace | |||||
std::vector<Algorithm::SearchItem> | |||||
Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::get_subopr_list( | |||||
const TensorLayoutArray& layouts, const OperatorBase* opr) const { | |||||
AlgoBase::SizeArgs args{ | |||||
static_cast<const Convolution3DBackwardFilterImpl*>(opr), | |||||
layouts[0], layouts[1], layouts[2]}; | |||||
auto&& config = sub_opr_config(args); | |||||
std::string param_str; | |||||
Algorithm::serialize_write_pod(config.second, param_str); | |||||
return {{Algorithm::OprType::CONVOLUTION3D_BACKWARD_FILTER, param_str, | |||||
config.first}}; | |||||
} | } | ||||
bool Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::is_available( | bool Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::is_available( | ||||
const SizeArgs &args) const { | |||||
const SizeArgs& args) const { | |||||
if (args.grad_filter_meta.group <= 1) | if (args.grad_filter_meta.group <= 1) | ||||
return false; | return false; | ||||
auto sub_args = args; | |||||
TensorLayout src_pg, diff_pg; | |||||
modify_size_args(sub_args, src_pg, diff_pg); | |||||
return m_impl->is_available(sub_args); | |||||
if (args.grad_filter_meta.format != Param::Format::NCDHW) { | |||||
return false; | |||||
} | |||||
auto config = prepare_sub_opr(args); | |||||
return get_algorithm( | |||||
static_cast<Convolution3DBackwardFilterImpl*>(config.second.get()), | |||||
config.first[0], config.first[1], config.first[2]); | |||||
} | |||||
WorkspaceBundle | |||||
Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::get_workspace_bundle( | |||||
void* ptr, const SizeArgs& args) const { | |||||
auto config = prepare_sub_opr(args); | |||||
size_t sizes = config.second->get_workspace_in_bytes( | |||||
config.first[0], config.first[1], config.first[2]); | |||||
return {ptr, {sizes}}; | |||||
} | } | ||||
size_t Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral:: | |||||
get_workspace_in_bytes(const SizeArgs &args) const { | |||||
auto sub_args = args; | |||||
TensorLayout src_pg, diff_pg; | |||||
modify_size_args(sub_args, src_pg, diff_pg); | |||||
return m_impl->get_workspace_in_bytes(sub_args); | |||||
size_t | |||||
Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::get_workspace_in_bytes( | |||||
const SizeArgs& args) const { | |||||
return get_workspace_bundle(nullptr, args).total_size_in_bytes(); | |||||
} | } | ||||
void Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::exec( | void Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::exec( | ||||
const ExecArgs &args) const { | |||||
auto sub_args = args; | |||||
TensorND tsrc{*args.src_tensor}, tdiff{*args.diff_tensor}, | |||||
tgrad{*args.grad_tensor}; | |||||
modify_size_args(sub_args, tsrc.layout, tdiff.layout); | |||||
sub_args.src_tensor = &tsrc; | |||||
sub_args.diff_tensor = &tdiff; | |||||
sub_args.grad_tensor = &tgrad; | |||||
auto &&fm = args.grad_filter_meta; | |||||
auto grp = fm.group; | |||||
auto strd_src = ( | |||||
tsrc.layout.stride[1] * fm.icpg * tsrc.layout.dtype.size()), | |||||
strd_diff = ( | |||||
tdiff.layout.stride[1] * fm.ocpg * tdiff.layout.dtype.size()), | |||||
strd_grad = (fm.icpg * fm.ocpg * | |||||
fm.spatial[0] * fm.spatial[1] * fm.spatial[2] * tgrad.layout.dtype.size()); | |||||
for (uint32_t g = 0; g < grp; ++ g) { | |||||
m_impl->exec(sub_args); | |||||
incr_voidp(tsrc.raw_ptr, strd_src); | |||||
incr_voidp(tdiff.raw_ptr, strd_diff); | |||||
incr_voidp(tgrad.raw_ptr, strd_grad); | |||||
const ExecArgs& args) const { | |||||
auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args); | |||||
{ | |||||
auto config = prepare_sub_opr(args); | |||||
TensorND tsrc{args.src_tensor->raw_ptr, config.first[0]}; | |||||
TensorND tdiff{args.diff_tensor->raw_ptr, config.first[1]}; | |||||
TensorND tgrad{args.grad_tensor->raw_ptr, config.first[2]}; | |||||
size_t c_pos = 1; | |||||
auto grp = args.grad_filter_meta.group; | |||||
auto&& fm = args.grad_filter_meta; | |||||
auto strd_src = (tsrc.layout.stride[c_pos] * fm.icpg * | |||||
tsrc.layout.dtype.size()), | |||||
strd_diff = (tdiff.layout.stride[c_pos] * fm.ocpg * | |||||
tdiff.layout.dtype.size()), | |||||
strd_grad = (fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] * | |||||
fm.spatial[2] * tgrad.layout.dtype.size()); | |||||
for (uint32_t g = 0; g < grp; ++g) { | |||||
config.second->exec(tsrc, tdiff, tgrad, bundle.get_workspace(0)); | |||||
incr_voidp(tsrc.raw_ptr, strd_src); | |||||
incr_voidp(tdiff.raw_ptr, strd_diff); | |||||
incr_voidp(tgrad.raw_ptr, strd_grad); | |||||
} | |||||
} | } | ||||
} | } | ||||
@@ -28,22 +28,7 @@ Convolution3DForwardImpl::AlgoPack::AlgoPack() { | |||||
} | } | ||||
all_algos.push_back(&inplace_matmul); | all_algos.push_back(&inplace_matmul); | ||||
all_algos.push_back(&a1x1x1); | all_algos.push_back(&a1x1x1); | ||||
all_algos.reserve(all_algos.size() * 2); | |||||
// add gconv algos by AlgoGroupConvGeneral | |||||
auto all_algos_data = all_algos.data(); | |||||
for (size_t i = 1; i < all_algos.size(); ++ i) { | |||||
gconv.push_back({all_algos[i]}); | |||||
} | |||||
for (size_t i = 1; i < all_algos.size(); ++ i) { | |||||
algo2gconv[all_algos[i]] = &gconv[i - 1]; | |||||
} | |||||
for (auto &&i: gconv) { | |||||
all_algos.push_back(&i); | |||||
} | |||||
megdnn_assert(all_algos_data == all_algos.data()); | |||||
non_cudnn_algos.push_back(all_algos.rbegin()[1]); // group inplace_matmul | |||||
non_cudnn_algos.push_back(all_algos.rbegin()[0]); // group 1x1x1 | |||||
all_algos.push_back(&group); | |||||
for (auto&& algo : all_algos) { | for (auto&& algo : all_algos) { | ||||
m_all_algos_map.emplace(algo->info().desc, algo); | m_all_algos_map.emplace(algo->info().desc, algo); | ||||
@@ -66,28 +51,25 @@ Convolution3DForwardImpl::AlgoPack::cudnn_from_enum( | |||||
Convolution3DForwardImpl::AlgoPack Convolution3DForwardImpl::sm_algo_pack; | Convolution3DForwardImpl::AlgoPack Convolution3DForwardImpl::sm_algo_pack; | ||||
Convolution3DForwardImpl::AlgoBase::SizeArgs::SizeArgs( | Convolution3DForwardImpl::AlgoBase::SizeArgs::SizeArgs( | ||||
Convolution3DForwardImpl *o, | |||||
const TensorLayout &src, const TensorLayout &filter, | |||||
const TensorLayout &dst): | |||||
SizeArgs(o, src, o->make_canonized_filter_meta(src.ndim, filter), dst) | |||||
{ | |||||
} | |||||
const Convolution3DForwardImpl* o, const TensorLayout& src, | |||||
const TensorLayout& filter, const TensorLayout& dst) | |||||
: SizeArgs(o, src, filter, | |||||
o->make_canonized_filter_meta(src.ndim, filter), dst) {} | |||||
Convolution3DForwardImpl::AlgoBase::SizeArgs::SizeArgs( | Convolution3DForwardImpl::AlgoBase::SizeArgs::SizeArgs( | ||||
Convolution3DForwardImpl *o, | |||||
const TensorLayout &src, const CanonizedFilterMeta &filter, | |||||
const TensorLayout &dst): | |||||
ForwardSizeArgs{ | |||||
concrete_handle(o->handle()), | |||||
&src, filter, &dst, | |||||
o->param().data_type | |||||
}, | |||||
opr{o} | |||||
{ | |||||
} | |||||
const Convolution3DForwardImpl* o, const TensorLayout& src, | |||||
const TensorLayout& filter, const CanonizedFilterMeta& filter_meta, | |||||
const TensorLayout& dst) | |||||
: ForwardSizeArgs{concrete_handle(o->handle()), | |||||
&src, | |||||
&filter, | |||||
filter_meta, | |||||
&dst, | |||||
o->param().data_type}, | |||||
opr{o} {} | |||||
Convolution3DForwardImpl::AlgoBase::ExecArgs::ExecArgs( | Convolution3DForwardImpl::AlgoBase::ExecArgs::ExecArgs( | ||||
Convolution3DForwardImpl *opr, | |||||
const Convolution3DForwardImpl *opr, | |||||
_megdnn_tensor_in src, | _megdnn_tensor_in src, | ||||
_megdnn_tensor_in filter, | _megdnn_tensor_in filter, | ||||
_megdnn_tensor_out dst, | _megdnn_tensor_out dst, | ||||
@@ -48,22 +48,24 @@ public: | |||||
AlgoBase() : Algorithm() { m_handle_type = Handle::HandleType::CUDA; } | AlgoBase() : Algorithm() { m_handle_type = Handle::HandleType::CUDA; } | ||||
struct SizeArgs : public convolution3d::ForwardSizeArgs { | struct SizeArgs : public convolution3d::ForwardSizeArgs { | ||||
Convolution3DForwardImpl* opr; | |||||
const Convolution3DForwardImpl* opr; | |||||
std::string to_string() const; | std::string to_string() const; | ||||
void init_desc(convolution3d::CUDNNForwardDescs& desc) const { | void init_desc(convolution3d::CUDNNForwardDescs& desc) const { | ||||
desc.set(*src_layout, filter_meta, *dst_layout, opr->param()); | desc.set(*src_layout, filter_meta, *dst_layout, opr->param()); | ||||
} | } | ||||
SizeArgs(Convolution3DForwardImpl* opr, const TensorLayout& src, | |||||
SizeArgs(const Convolution3DForwardImpl* opr, const TensorLayout& src, | |||||
const TensorLayout& filter, const TensorLayout& dst); | const TensorLayout& filter, const TensorLayout& dst); | ||||
SizeArgs(Convolution3DForwardImpl* opr, const TensorLayout& src, | |||||
const CanonizedFilterMeta& filter, const TensorLayout& dst); | |||||
SizeArgs(const Convolution3DForwardImpl* opr, const TensorLayout& src, | |||||
const TensorLayout& filter, | |||||
const CanonizedFilterMeta& filter_meta, | |||||
const TensorLayout& dst); | |||||
}; | }; | ||||
struct ExecArgs : public SizeArgs { | struct ExecArgs : public SizeArgs { | ||||
const TensorND *src_tensor, *filter_tensor, *dst_tensor; | const TensorND *src_tensor, *filter_tensor, *dst_tensor; | ||||
Workspace workspace; | Workspace workspace; | ||||
ExecArgs(Convolution3DForwardImpl* opr, _megdnn_tensor_in src, | |||||
ExecArgs(const Convolution3DForwardImpl* opr, _megdnn_tensor_in src, | |||||
_megdnn_tensor_in filter, _megdnn_tensor_out dst, | _megdnn_tensor_in filter, _megdnn_tensor_out dst, | ||||
_megdnn_workspace workspace); | _megdnn_workspace workspace); | ||||
}; | }; | ||||
@@ -114,35 +116,22 @@ public: | |||||
//! implement group conv by another algo | //! implement group conv by another algo | ||||
class Convolution3DForwardImpl::AlgoGroupConvGeneral final : public AlgoBase { | class Convolution3DForwardImpl::AlgoGroupConvGeneral final : public AlgoBase { | ||||
AlgoBase* m_impl; | |||||
std::string m_name; | |||||
public: | public: | ||||
AlgoGroupConvGeneral(AlgoBase* impl); | |||||
bool is_available(const SizeArgs& args) const override; | bool is_available(const SizeArgs& args) const override; | ||||
size_t get_workspace_in_bytes(const SizeArgs& args) const override; | size_t get_workspace_in_bytes(const SizeArgs& args) const override; | ||||
void exec(const ExecArgs& args) const override; | void exec(const ExecArgs& args) const override; | ||||
std::vector<SearchItem> get_subopr_list( | |||||
const TensorLayoutArray& layouts, | |||||
const OperatorBase* opr) const override; | |||||
const char* name() const override { return m_name.c_str(); } | |||||
const char* name() const override { return "CUDA:GROUP_CONV3D_FORWARD"; } | |||||
AlgoAttribute attribute() const override { | AlgoAttribute attribute() const override { | ||||
auto ret = AlgoAttribute::DEFAULT; | |||||
if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) { | |||||
ret |= AlgoAttribute::REPRODUCIBLE; | |||||
} | |||||
#define cb(attr) \ | |||||
if (m_impl->contain_attribute_all(attr)) { \ | |||||
ret |= attr; \ | |||||
} | |||||
MEGDNN_FOREACH_ALGO_ATTRIBUTE_INHERITABLE(cb) | |||||
#undef cb | |||||
return ret; | |||||
return AlgoAttribute::REPRODUCIBLE; | |||||
} | } | ||||
static void modify_size_args(SizeArgs& args, TensorLayout& src_pg, | |||||
TensorLayout& dst_pg); | |||||
MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) | MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) | ||||
private: | |||||
WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; | |||||
}; | }; | ||||
class Convolution3DForwardImpl::AlgoCUDNN final : public AlgoBase { | class Convolution3DForwardImpl::AlgoCUDNN final : public AlgoBase { | ||||
@@ -226,8 +215,7 @@ public: | |||||
Algo1x1x1 a1x1x1; | Algo1x1x1 a1x1x1; | ||||
AlgoInplaceMatmul inplace_matmul; | AlgoInplaceMatmul inplace_matmul; | ||||
AlgoChanwise chanwise; | AlgoChanwise chanwise; | ||||
std::vector<AlgoGroupConvGeneral> gconv; | |||||
std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv; | |||||
AlgoGroupConvGeneral group; | |||||
std::vector<AlgoBase*> | std::vector<AlgoBase*> | ||||
//! all algorithms | //! all algorithms | ||||
@@ -15,84 +15,136 @@ using namespace megdnn; | |||||
using namespace cuda; | using namespace cuda; | ||||
using namespace convolution3d; | using namespace convolution3d; | ||||
void Convolution3DForwardImpl::AlgoGroupConvGeneral::modify_size_args( | |||||
Convolution3DForwardImpl::AlgoBase::SizeArgs &args, | |||||
TensorLayout &src_pg, TensorLayout &dst_pg) { | |||||
src_pg = *args.src_layout; | |||||
dst_pg = *args.dst_layout; | |||||
namespace { | |||||
std::pair<TensorLayoutArray, Convolution3DForwardImpl::Param> sub_opr_config( | |||||
const Convolution3DForwardImpl::AlgoBase::SizeArgs& args) { | |||||
TensorLayout src_pg = *args.src_layout; | |||||
SmallVector<size_t> flt_shape(0); | |||||
std::vector<ptrdiff_t> flt_stride(0); | |||||
size_t idx = 0; | |||||
// check if the first dim is group | |||||
if (args.filter_layout->ndim > args.src_layout->ndim) | |||||
++idx; | |||||
for (; idx < args.filter_layout->ndim; ++idx) { | |||||
flt_shape.push_back(args.filter_layout->shape[idx]); | |||||
flt_stride.push_back(args.filter_layout->stride[idx]); | |||||
} | |||||
TensorLayout filter_pg(flt_shape, flt_stride, | |||||
args.filter_layout->dtype, | |||||
args.filter_layout->format); | |||||
TensorLayout dst_pg = *args.dst_layout; | |||||
auto nr_grp = args.filter_meta.group; | auto nr_grp = args.filter_meta.group; | ||||
args.filter_meta.group = 1; | |||||
size_t c_pos; | size_t c_pos; | ||||
if (args.filter_meta.format == Param::Format::NCDHW) { | |||||
if (args.filter_meta.format == param::Convolution3D::Format::NCDHW) { | |||||
c_pos = 1; | c_pos = 1; | ||||
} else { | } else { | ||||
megdnn_assert(args.filter_meta.format == Param::Format::NDHWC, | |||||
megdnn_assert( | |||||
args.filter_meta.format == param::Convolution3D::Format::NDHWC, | |||||
"invalid conv format"); | "invalid conv format"); | ||||
c_pos = 4; | c_pos = 4; | ||||
} | } | ||||
src_pg.shape[c_pos] /= nr_grp; | src_pg.shape[c_pos] /= nr_grp; | ||||
dst_pg.shape[c_pos] /= nr_grp; | dst_pg.shape[c_pos] /= nr_grp; | ||||
args.src_layout = &src_pg; | |||||
args.dst_layout = &dst_pg; | |||||
megdnn::param::Convolution3D param = args.opr->param(); | |||||
param.sparse = megdnn::param::Convolution3D::Sparse::DENSE; | |||||
std::pair<TensorLayoutArray, Convolution3DForwardImpl::Param> ret; | |||||
ret.first = {src_pg, filter_pg, dst_pg}; | |||||
ret.second = param; | |||||
return ret; | |||||
} | |||||
std::pair<TensorLayoutArray, std::unique_ptr<Convolution3DForward>> | |||||
prepare_sub_opr(const Convolution3DForwardImpl::AlgoBase::SizeArgs& args) { | |||||
auto conv3d_opr = args.handle->create_operator<Convolution3D>(); | |||||
set_execution_policy<Convolution3DForward, Convolution3DForward*>( | |||||
args.opr, conv3d_opr.get()); | |||||
auto&& config = sub_opr_config(args); | |||||
conv3d_opr->param() = config.second; | |||||
return {config.first, std::move(conv3d_opr)}; | |||||
} | } | ||||
} // namespace | |||||
std::vector<Algorithm::SearchItem> | |||||
Convolution3DForwardImpl::AlgoGroupConvGeneral::get_subopr_list( | |||||
const TensorLayoutArray& layouts, const OperatorBase* opr) const { | |||||
AlgoBase::SizeArgs args{static_cast<const Convolution3DForwardImpl*>(opr), | |||||
layouts[0], layouts[1], layouts[2]}; | |||||
auto&& config = sub_opr_config(args); | |||||
Convolution3DForwardImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( | |||||
AlgoBase *impl): | |||||
m_impl{impl} { | |||||
m_name = "group_conv3d:"; | |||||
m_name += impl->name(); | |||||
std::string param_str; | |||||
Algorithm::serialize_write_pod(config.second, param_str); | |||||
return {{Algorithm::OprType::CONVOLUTION3D_FORWARD, param_str, | |||||
config.first}}; | |||||
} | } | ||||
bool Convolution3DForwardImpl::AlgoGroupConvGeneral::is_available( | bool Convolution3DForwardImpl::AlgoGroupConvGeneral::is_available( | ||||
const SizeArgs &args) const { | const SizeArgs &args) const { | ||||
if (args.filter_meta.group <= 1) | if (args.filter_meta.group <= 1) | ||||
return false; | return false; | ||||
auto sub_args = args; | |||||
TensorLayout src_pg, dst_pg; | |||||
modify_size_args(sub_args, src_pg, dst_pg); | |||||
return m_impl->is_available(sub_args); | |||||
if (args.filter_meta.format != Param::Format::NCDHW && | |||||
args.filter_meta.format != Param::Format::NDHWC) { | |||||
return false; | |||||
} | |||||
auto config = prepare_sub_opr(args); | |||||
return get_algorithm( | |||||
static_cast<Convolution3DForwardImpl*>(config.second.get()), | |||||
config.first[0], config.first[1], config.first[2]); | |||||
} | |||||
WorkspaceBundle | |||||
Convolution3DForwardImpl::AlgoGroupConvGeneral::get_workspace_bundle( | |||||
void* ptr, const SizeArgs& args) const { | |||||
auto config = prepare_sub_opr(args); | |||||
size_t sizes = config.second->get_workspace_in_bytes( | |||||
config.first[0], config.first[1], config.first[2]); | |||||
return {ptr, {sizes}}; | |||||
} | } | ||||
size_t Convolution3DForwardImpl::AlgoGroupConvGeneral::get_workspace_in_bytes( | size_t Convolution3DForwardImpl::AlgoGroupConvGeneral::get_workspace_in_bytes( | ||||
const SizeArgs &args) const { | |||||
auto sub_args = args; | |||||
TensorLayout src_pg, dst_pg; | |||||
modify_size_args(sub_args, src_pg, dst_pg); | |||||
return m_impl->get_workspace_in_bytes(sub_args); | |||||
const SizeArgs& args) const { | |||||
return get_workspace_bundle(nullptr, args).total_size_in_bytes(); | |||||
} | } | ||||
void Convolution3DForwardImpl::AlgoGroupConvGeneral::exec( | void Convolution3DForwardImpl::AlgoGroupConvGeneral::exec( | ||||
const ExecArgs &args) const { | |||||
auto sub_args = args; | |||||
TensorND tsrc{*args.src_tensor}, tdst{*args.dst_tensor}, | |||||
tflt{*args.filter_tensor}; | |||||
modify_size_args(sub_args, tsrc.layout, tdst.layout); | |||||
sub_args.src_tensor = &tsrc; | |||||
sub_args.dst_tensor = &tdst; | |||||
sub_args.filter_tensor = &tflt; | |||||
const ExecArgs& args) const { | |||||
auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args); | |||||
{ | |||||
auto config = prepare_sub_opr(args); | |||||
TensorND tsrc{args.src_tensor->raw_ptr, config.first[0]}; | |||||
TensorND tfilter{args.filter_tensor->raw_ptr, config.first[1]}; | |||||
TensorND tdst{args.dst_tensor->raw_ptr, config.first[2]}; | |||||
size_t c_pos; | |||||
if (args.filter_meta.format == Param::Format::NCDHW) { | |||||
c_pos = 1; | |||||
} else { | |||||
megdnn_assert(args.filter_meta.format == Param::Format::NDHWC, | |||||
"invalid conv format"); | |||||
c_pos = 4; | |||||
} | |||||
size_t c_pos; | |||||
if (args.filter_meta.format == Param::Format::NCDHW) { | |||||
c_pos = 1; | |||||
} else { | |||||
megdnn_assert(args.filter_meta.format == Param::Format::NDHWC, | |||||
"invalid conv format"); | |||||
c_pos = 4; | |||||
} | |||||
auto grp = args.filter_meta.group; | |||||
auto&& fm = args.filter_meta; | |||||
auto strd_src = tsrc.layout.stride[c_pos] * fm.icpg * | |||||
tsrc.layout.dtype.size(), | |||||
strd_dst = tdst.layout.stride[c_pos] * fm.ocpg * | |||||
tdst.layout.dtype.size(), | |||||
strd_flt = fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] * | |||||
fm.spatial[2] * tfilter.layout.dtype.size(); | |||||
auto grp = args.filter_meta.group; | |||||
auto &&fm = args.filter_meta; | |||||
auto strd_src = tsrc.layout.stride[c_pos] * fm.icpg * tsrc.layout.dtype.size(), | |||||
strd_dst = tdst.layout.stride[c_pos] * fm.ocpg * tdst.layout.dtype.size(), | |||||
strd_flt = fm.icpg * fm.ocpg * | |||||
fm.spatial[0] * fm.spatial[1] * fm.spatial[2] * | |||||
tflt.layout.dtype.size(); | |||||
for (uint32_t g = 0; g < grp; ++ g) { | |||||
m_impl->exec(sub_args); | |||||
incr_voidp(tsrc.raw_ptr, strd_src); | |||||
incr_voidp(tdst.raw_ptr, strd_dst); | |||||
incr_voidp(tflt.raw_ptr, strd_flt); | |||||
for (uint32_t g = 0; g < grp; ++g) { | |||||
config.second->exec(tsrc, tfilter, tdst, bundle.get_workspace(0)); | |||||
incr_voidp(tsrc.raw_ptr, strd_src); | |||||
incr_voidp(tdst.raw_ptr, strd_dst); | |||||
incr_voidp(tfilter.raw_ptr, strd_flt); | |||||
} | |||||
} | } | ||||
} | } | ||||
@@ -26,6 +26,7 @@ namespace convolution3d { | |||||
struct ForwardSizeArgs { | struct ForwardSizeArgs { | ||||
HandleImpl *handle; | HandleImpl *handle; | ||||
const TensorLayout *src_layout; | const TensorLayout *src_layout; | ||||
const TensorLayout *filter_layout; | |||||
CanonizedFilterMeta filter_meta; | CanonizedFilterMeta filter_meta; | ||||
const TensorLayout *dst_layout; | const TensorLayout *dst_layout; | ||||
param::Convolution3D::DataType data_type; | param::Convolution3D::DataType data_type; | ||||
@@ -35,16 +35,6 @@ Convolution3DForwardImpl::get_algorithm_heuristic( | |||||
const TensorLayout& dst, size_t workspace_limit_in_bytes, | const TensorLayout& dst, size_t workspace_limit_in_bytes, | ||||
const AlgoAttribute& positive_attr, | const AlgoAttribute& positive_attr, | ||||
const AlgoAttribute& negative_attr) { | const AlgoAttribute& negative_attr) { | ||||
auto fm = check_layout_fwd(src, filter, dst); | |||||
return get_algorithm_heuristic(src, fm, dst, workspace_limit_in_bytes, | |||||
positive_attr, negative_attr); | |||||
} | |||||
Convolution3DForwardImpl::Algorithm* | |||||
Convolution3DForwardImpl::get_algorithm_heuristic( | |||||
const TensorLayout& src, const CanonizedFilterMeta& filter, | |||||
const TensorLayout& dst, size_t workspace_limit_in_bytes, | |||||
const AlgoAttribute& positive_attr, | |||||
const AlgoAttribute& negative_attr) { | |||||
AlgoBase::SizeArgs args(this, src, filter, dst); | AlgoBase::SizeArgs args(this, src, filter, dst); | ||||
#if CUDNN_MAJOR < 7 || (CUDNN_MAJOR == 7 && CUDNN_MINOR < 5) | #if CUDNN_MAJOR < 7 || (CUDNN_MAJOR == 7 && CUDNN_MINOR < 5) | ||||
@@ -98,18 +88,14 @@ Convolution3DForwardImpl::get_algorithm_heuristic( | |||||
if (auto algo = get_cudnn_algo()) | if (auto algo = get_cudnn_algo()) | ||||
return algo; | return algo; | ||||
} | } | ||||
if (args.filter_meta.group > 1) { | if (args.filter_meta.group > 1) { | ||||
auto orig_args = args; | |||||
TensorLayout a, b; | |||||
AlgoGroupConvGeneral::modify_size_args(args, a, b); | |||||
if (prefer_1x1x1()) { | |||||
return sm_algo_pack.algo2gconv.at(&sm_algo_pack.a1x1x1); | |||||
} | |||||
if (is_cudnn_supported(args)) { | |||||
if (auto algo = get_cudnn_algo()) | |||||
return sm_algo_pack.algo2gconv.at(algo); | |||||
if (auto algo = | |||||
megdnn::get_algo_match_attribute<Convolution3DForwardImpl>( | |||||
&sm_algo_pack.group, positive_attr, | |||||
negative_attr)) { | |||||
return algo; | |||||
} | } | ||||
args = orig_args; | |||||
} | } | ||||
return megdnn::get_algo_match_attribute<Convolution3DForwardImpl>( | return megdnn::get_algo_match_attribute<Convolution3DForwardImpl>( | ||||
@@ -129,7 +115,7 @@ size_t Convolution3DForwardImpl::get_workspace_in_bytes( | |||||
const TensorLayout& src, const TensorLayout& filter, | const TensorLayout& src, const TensorLayout& filter, | ||||
const TensorLayout& dst) { | const TensorLayout& dst) { | ||||
AlgoBase::SizeArgs args(this, src, filter, dst); | AlgoBase::SizeArgs args(this, src, filter, dst); | ||||
return get_algorithm(this, src, args.filter_meta, dst) | |||||
return get_algorithm(this, src, filter, dst) | |||||
->get_workspace_in_bytes(args); | ->get_workspace_in_bytes(args); | ||||
} | } | ||||
@@ -138,7 +124,7 @@ void Convolution3DForwardImpl::exec(_megdnn_tensor_in src, | |||||
_megdnn_tensor_out dst, | _megdnn_tensor_out dst, | ||||
_megdnn_workspace workspace) { | _megdnn_workspace workspace) { | ||||
AlgoBase::ExecArgs args(this, src, filter, dst, workspace); | AlgoBase::ExecArgs args(this, src, filter, dst, workspace); | ||||
auto algo = get_algorithm(this, src.layout, args.filter_meta, dst.layout); | |||||
auto algo = get_algorithm(this, src.layout, filter.layout, dst.layout); | |||||
algo->check_workspace(args, workspace).exec(args); | algo->check_workspace(args, workspace).exec(args); | ||||
} | } | ||||
@@ -151,7 +137,7 @@ void Convolution3DBackwardDataImpl::exec(_megdnn_tensor_in filter, | |||||
_megdnn_tensor_out grad, | _megdnn_tensor_out grad, | ||||
_megdnn_workspace workspace) { | _megdnn_workspace workspace) { | ||||
AlgoBase::ExecArgs args(this, filter, diff, grad, workspace); | AlgoBase::ExecArgs args(this, filter, diff, grad, workspace); | ||||
auto algo = get_algorithm(this, args.filter_meta, diff.layout, grad.layout); | |||||
auto algo = get_algorithm(this, filter.layout, diff.layout, grad.layout); | |||||
algo->check_workspace(args, workspace).exec(args); | algo->check_workspace(args, workspace).exec(args); | ||||
} | } | ||||
@@ -169,17 +155,6 @@ Convolution3DBackwardDataImpl::get_algorithm_heuristic( | |||||
const TensorLayout& grad, size_t workspace_limit_in_bytes, | const TensorLayout& grad, size_t workspace_limit_in_bytes, | ||||
const AlgoAttribute& positive_attr, | const AlgoAttribute& positive_attr, | ||||
const AlgoAttribute& negative_attr) { | const AlgoAttribute& negative_attr) { | ||||
auto fm = check_layout_fwd(grad, filter, diff); | |||||
return get_algorithm_heuristic(fm, diff, grad, workspace_limit_in_bytes, | |||||
positive_attr, negative_attr); | |||||
} | |||||
Convolution3DBackwardDataImpl::Algorithm* | |||||
Convolution3DBackwardDataImpl::get_algorithm_heuristic( | |||||
const CanonizedFilterMeta& filter, const TensorLayout& diff, | |||||
const TensorLayout& grad, size_t workspace_limit_in_bytes, | |||||
const AlgoAttribute& positive_attr, | |||||
const AlgoAttribute& negative_attr) { | |||||
AlgoBase::SizeArgs args(this, filter, diff, grad); | AlgoBase::SizeArgs args(this, filter, diff, grad); | ||||
if (args.filter_meta.group > 1 && | if (args.filter_meta.group > 1 && | ||||
@@ -215,14 +190,11 @@ Convolution3DBackwardDataImpl::get_algorithm_heuristic( | |||||
} | } | ||||
if (args.filter_meta.group > 1) { | if (args.filter_meta.group > 1) { | ||||
auto orig_args = args; | |||||
TensorLayout a, b; | |||||
AlgoGroupConvGeneral::modify_size_args(args, a, b); | |||||
if (is_cudnn_supported(args.as_fwd_args())) { | |||||
if (auto algo = get_cudnn_algo()) | |||||
return sm_algo_pack.algo2gconv.at(algo); | |||||
if (auto algo = megdnn::get_algo_match_attribute< | |||||
Convolution3DBackwardDataImpl>( | |||||
&sm_algo_pack.group, positive_attr, negative_attr)) { | |||||
return algo; | |||||
} | } | ||||
args = orig_args; | |||||
} | } | ||||
return megdnn::get_algo_match_attribute<Convolution3DBackwardDataImpl>( | return megdnn::get_algo_match_attribute<Convolution3DBackwardDataImpl>( | ||||
@@ -234,7 +206,7 @@ size_t Convolution3DBackwardDataImpl::get_workspace_in_bytes( | |||||
const TensorLayout& filter, const TensorLayout& diff, | const TensorLayout& filter, const TensorLayout& diff, | ||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
AlgoBase::SizeArgs args(this, filter, diff, grad); | AlgoBase::SizeArgs args(this, filter, diff, grad); | ||||
return get_algorithm(this, args.filter_meta, diff, grad) | |||||
return get_algorithm(this, filter, diff, grad) | |||||
->get_workspace_in_bytes(args); | ->get_workspace_in_bytes(args); | ||||
} | } | ||||
@@ -248,7 +220,7 @@ void Convolution3DBackwardFilterImpl::exec(_megdnn_tensor_in src, | |||||
_megdnn_workspace workspace) { | _megdnn_workspace workspace) { | ||||
AlgoBase::ExecArgs args(this, src, diff, grad, workspace); | AlgoBase::ExecArgs args(this, src, diff, grad, workspace); | ||||
auto algo = | auto algo = | ||||
get_algorithm(this, src.layout, diff.layout, args.grad_filter_meta); | |||||
get_algorithm(this, src.layout, diff.layout, grad.layout); | |||||
algo->check_workspace(args, workspace).exec(args); | algo->check_workspace(args, workspace).exec(args); | ||||
} | } | ||||
@@ -266,17 +238,6 @@ Convolution3DBackwardFilterImpl::get_algorithm_heuristic( | |||||
const TensorLayout& grad, size_t workspace_limit_in_bytes, | const TensorLayout& grad, size_t workspace_limit_in_bytes, | ||||
const AlgoAttribute& positive_attr, | const AlgoAttribute& positive_attr, | ||||
const AlgoAttribute& negative_attr) { | const AlgoAttribute& negative_attr) { | ||||
auto fm = check_layout_fwd(src, grad, diff); | |||||
return get_algorithm_heuristic(src, diff, fm, workspace_limit_in_bytes, | |||||
positive_attr, negative_attr); | |||||
} | |||||
Convolution3DBackwardFilterImpl::Algorithm* | |||||
Convolution3DBackwardFilterImpl::get_algorithm_heuristic( | |||||
const TensorLayout& src, const TensorLayout& diff, | |||||
const CanonizedFilterMeta& grad, size_t workspace_limit_in_bytes, | |||||
const AlgoAttribute& positive_attr, | |||||
const AlgoAttribute& negative_attr) { | |||||
AlgoBase::SizeArgs args(this, src, diff, grad); | AlgoBase::SizeArgs args(this, src, diff, grad); | ||||
if (args.grad_filter_meta.group > 1 && | if (args.grad_filter_meta.group > 1 && | ||||
@@ -310,15 +271,13 @@ Convolution3DBackwardFilterImpl::get_algorithm_heuristic( | |||||
if (auto algo = get_cudnn_algo()) | if (auto algo = get_cudnn_algo()) | ||||
return algo; | return algo; | ||||
} | } | ||||
if (args.grad_filter_meta.group > 1) { | if (args.grad_filter_meta.group > 1) { | ||||
auto orig_args = args; | |||||
TensorLayout a, b; | |||||
AlgoGroupConvGeneral::modify_size_args(args, a, b); | |||||
if (is_cudnn_supported(args.as_fwd_args())) { | |||||
if (auto algo = get_cudnn_algo()) | |||||
return sm_algo_pack.algo2gconv.at(algo); | |||||
if (auto algo = megdnn::get_algo_match_attribute< | |||||
Convolution3DBackwardFilterImpl>( | |||||
&sm_algo_pack.group, positive_attr, negative_attr)) { | |||||
return algo; | |||||
} | } | ||||
args = orig_args; | |||||
} | } | ||||
return megdnn::get_algo_match_attribute<Convolution3DBackwardFilterImpl>( | return megdnn::get_algo_match_attribute<Convolution3DBackwardFilterImpl>( | ||||
@@ -330,7 +289,7 @@ size_t Convolution3DBackwardFilterImpl::get_workspace_in_bytes( | |||||
const TensorLayout& src, const TensorLayout& diff, | const TensorLayout& src, const TensorLayout& diff, | ||||
const TensorLayout& grad) { | const TensorLayout& grad) { | ||||
AlgoBase::SizeArgs args(this, src, diff, grad); | AlgoBase::SizeArgs args(this, src, diff, grad); | ||||
return get_algorithm(this, src, diff, args.grad_filter_meta) | |||||
return get_algorithm(this, src, diff, grad) | |||||
->get_workspace_in_bytes(args); | ->get_workspace_in_bytes(args); | ||||
} | } | ||||
@@ -21,17 +21,6 @@ public: | |||||
using Convolution3DForward::Convolution3DForward; | using Convolution3DForward::Convolution3DForward; | ||||
void exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, | void exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, | ||||
_megdnn_tensor_out dst, _megdnn_workspace workspace) override; | _megdnn_tensor_out dst, _megdnn_workspace workspace) override; | ||||
AlgorithmInfo get_algorithm_info_heuristic(const TensorLayout& src, | |||||
const CanonizedFilterMeta& filter, | |||||
const TensorLayout& dst, | |||||
size_t workspace_limit_in_bytes, | |||||
const AlgoAttribute& positive_attr, | |||||
const AlgoAttribute& negative_attr) { | |||||
return get_algorithm_heuristic(src, filter, dst, | |||||
workspace_limit_in_bytes, positive_attr, | |||||
negative_attr) | |||||
->info(); | |||||
} | |||||
size_t get_workspace_in_bytes(const TensorLayout& src, | size_t get_workspace_in_bytes(const TensorLayout& src, | ||||
const TensorLayout& filter, | const TensorLayout& filter, | ||||
const TensorLayout& dst) override; | const TensorLayout& dst) override; | ||||
@@ -57,13 +46,6 @@ protected: | |||||
const AlgoAttribute& negative_attr) override; | const AlgoAttribute& negative_attr) override; | ||||
private: | private: | ||||
Algorithm* get_algorithm_heuristic(const TensorLayout& src, | |||||
const CanonizedFilterMeta& filter, | |||||
const TensorLayout& dst, | |||||
size_t workspace_limit_in_bytes, | |||||
const AlgoAttribute& positive_attr, | |||||
const AlgoAttribute& negative_attr); | |||||
static AlgoPack sm_algo_pack; | static AlgoPack sm_algo_pack; | ||||
}; | }; | ||||
@@ -72,16 +54,6 @@ public: | |||||
using Convolution3DBackwardData::Convolution3DBackwardData; | using Convolution3DBackwardData::Convolution3DBackwardData; | ||||
void exec(_megdnn_tensor_in filter, _megdnn_tensor_in diff, | void exec(_megdnn_tensor_in filter, _megdnn_tensor_in diff, | ||||
_megdnn_tensor_out grad, _megdnn_workspace workspace) override; | _megdnn_tensor_out grad, _megdnn_workspace workspace) override; | ||||
AlgorithmInfo get_algorithm_info_heuristic( | |||||
const CanonizedFilterMeta& filter, const TensorLayout& diff, | |||||
const TensorLayout& grad, size_t workspace_limit_in_bytes, | |||||
const AlgoAttribute& positive_attr, | |||||
const AlgoAttribute& negative_attr) { | |||||
return get_algorithm_heuristic(filter, diff, grad, | |||||
workspace_limit_in_bytes, positive_attr, | |||||
negative_attr) | |||||
->info(); | |||||
} | |||||
size_t get_workspace_in_bytes(const TensorLayout& filter, | size_t get_workspace_in_bytes(const TensorLayout& filter, | ||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
const TensorLayout& grad) override; | const TensorLayout& grad) override; | ||||
@@ -109,13 +81,6 @@ protected: | |||||
const AlgoAttribute& negative_attr) override; | const AlgoAttribute& negative_attr) override; | ||||
private: | private: | ||||
Algorithm* get_algorithm_heuristic(const CanonizedFilterMeta& filter, | |||||
const TensorLayout& diff, | |||||
const TensorLayout& grad, | |||||
size_t workspace_limit_in_bytes, | |||||
const AlgoAttribute& positive_attr, | |||||
const AlgoAttribute& negative_attr); | |||||
static AlgoPack sm_algo_pack; | static AlgoPack sm_algo_pack; | ||||
}; | }; | ||||
@@ -127,17 +92,6 @@ public: | |||||
size_t get_workspace_in_bytes(const TensorLayout& src, | size_t get_workspace_in_bytes(const TensorLayout& src, | ||||
const TensorLayout& diff, | const TensorLayout& diff, | ||||
const TensorLayout& grad) override; | const TensorLayout& grad) override; | ||||
AlgorithmInfo get_algorithm_info_heuristic( | |||||
const TensorLayout& src, const TensorLayout& diff, | |||||
const CanonizedFilterMeta& grad, size_t workspace_limit_in_bytes, | |||||
const AlgoAttribute& positive_attr, | |||||
const AlgoAttribute& negative_attr) { | |||||
return get_algorithm_heuristic(src, diff, grad, | |||||
workspace_limit_in_bytes, positive_attr, | |||||
negative_attr) | |||||
->info(); | |||||
} | |||||
const char* get_algorithm_set_name() const override; | const char* get_algorithm_set_name() const override; | ||||
class AlgoBase; | class AlgoBase; | ||||
@@ -162,13 +116,6 @@ protected: | |||||
const AlgoAttribute& negative_attr) override; | const AlgoAttribute& negative_attr) override; | ||||
private: | private: | ||||
Algorithm* get_algorithm_heuristic(const TensorLayout& src, | |||||
const TensorLayout& diff, | |||||
const CanonizedFilterMeta& grad, | |||||
size_t workspace_limit_in_bytes, | |||||
const AlgoAttribute& positive_attr, | |||||
const AlgoAttribute& negative_attr); | |||||
static AlgoPack sm_algo_pack; | static AlgoPack sm_algo_pack; | ||||
}; | }; | ||||
} // namespace cuda | } // namespace cuda | ||||
@@ -66,12 +66,10 @@ TEST_F(CUDA, GROUP_CONVOLUTION3D_FORWARD_1x1x1) { | |||||
auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, | auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, | ||||
size_t FD, size_t FH, size_t FW, size_t OC, size_t group) { | size_t FD, size_t FH, size_t FW, size_t OC, size_t group) { | ||||
Checker<Convolution3D> checker(handle_cuda()); | Checker<Convolution3D> checker(handle_cuda()); | ||||
#if CUDNN_MAJOR <= 6 | |||||
bool require_algo = true; | |||||
checker.set_before_exec_callback( | |||||
AlgoChecker<Convolution3DForward>{ | |||||
"group_conv3d:1x1x1", &require_algo}); | |||||
#endif | |||||
checker.set_before_exec_callback(AlgoChecker<Convolution3DForward>( | |||||
ExecutionPolicyAlgoName{"CUDA:GROUP_CONV3D_FORWARD", | |||||
{{"1x1x1", | |||||
{}}}})); | |||||
Convolution3D::Param param; | Convolution3D::Param param; | ||||
param.sparse = Convolution3D::Param::Sparse::GROUP; | param.sparse = Convolution3D::Param::Sparse::GROUP; | ||||
auto ICg = IC / group; | auto ICg = IC / group; | ||||
@@ -125,6 +123,45 @@ TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_DATA) { | |||||
run(2, 32, 64, 64, 64, 3, 3, 3, 32, 62, 62, 62, 0, 0, 0, 1, 1, 1, 4); | run(2, 32, 64, 64, 64, 3, 3, 3, 32, 62, 62, 62, 0, 0, 0, 1, 1, 1, 4); | ||||
} | } | ||||
TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_DATA_CUDNN) { | |||||
auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, | |||||
size_t FD, size_t FH, size_t FW, size_t OC, size_t OD, | |||||
size_t OH, size_t OW, size_t PD, size_t PH, size_t PW, | |||||
size_t SD, size_t SH, size_t SW, size_t group) { | |||||
Checker<Convolution3DBackwardData> checker(handle_cuda()); | |||||
checker.set_before_exec_callback( | |||||
AlgoChecker<Convolution3DBackwardData>(ExecutionPolicyAlgoName{ | |||||
"CUDA:GROUP_CONV3D_BACKWARD_DATA", {{"CUDNN", {}}}})); | |||||
Convolution3DBackwardData::Param param; | |||||
param.sparse = Convolution3D::Param::Sparse::GROUP; | |||||
param.pad_d = PD; | |||||
param.pad_h = PH; | |||||
param.pad_w = PW; | |||||
param.stride_d = SD; | |||||
param.stride_h = SH; | |||||
param.stride_w = SW; | |||||
auto ICg = IC / group; | |||||
auto OCg = OC / group; | |||||
checker.set_param(param).exec({{group, OCg, ICg, FD, FH, FW}, | |||||
{N, OC, OD, OH, OW}, | |||||
{N, IC, ID, IH, IW}}); | |||||
}; | |||||
// bug case in prev ver | |||||
run(1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 3, 0, 0, 1, 1, 1, 1, 2); | |||||
run(1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 0, 0, 1, 1, 1, 2, 2); | |||||
run(1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 0, 1, 0, 1, 2, 1, 2); | |||||
run(1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 0, 0, 2, 1, 1, 2); | |||||
// normal case | |||||
run(2, 64, 7, 7, 7, 3, 3, 3, 32, 5, 5, 5, 0, 0, 0, 1, 1, 1, 2); | |||||
// padded case | |||||
run(2, 32, 7, 7, 7, 3, 3, 3, 64, 7, 7, 7, 1, 1, 1, 1, 1, 1, 4); | |||||
// strided case | |||||
run(2, 32, 7, 7, 7, 3, 3, 3, 64, 3, 3, 3, 0, 0, 0, 2, 2, 2, 8); | |||||
// bigger case | |||||
run(2, 32, 64, 64, 64, 3, 3, 3, 32, 62, 62, 62, 0, 0, 0, 1, 1, 1, 4); | |||||
} | |||||
TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_FILTER) { | TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_FILTER) { | ||||
auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, | auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, | ||||
size_t FD, size_t FH, size_t FW, size_t OC, size_t OD, | size_t FD, size_t FH, size_t FW, size_t OC, size_t OD, | ||||
@@ -153,6 +190,39 @@ TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_FILTER) { | |||||
run(2, 32, 7, 7, 7, 3, 3, 3, 64, 3, 3, 3, 0, 0, 0, 2, 2, 2, 8); | run(2, 32, 7, 7, 7, 3, 3, 3, 64, 3, 3, 3, 0, 0, 0, 2, 2, 2, 8); | ||||
} | } | ||||
TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_FILTER_CUDNN) { | |||||
auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, | |||||
size_t FD, size_t FH, size_t FW, size_t OC, size_t OD, | |||||
size_t OH, size_t OW, size_t PD, size_t PH, size_t PW, | |||||
size_t SD, size_t SH, size_t SW, size_t group) { | |||||
Checker<Convolution3DBackwardFilter> checker(handle_cuda()); | |||||
checker.set_before_exec_callback( | |||||
AlgoChecker<Convolution3DBackwardFilter>( | |||||
ExecutionPolicyAlgoName{ | |||||
"CUDA:GROUP_CONV3D_BACKWARD_FILTER", | |||||
{{"CUDNN", {}}}})); | |||||
Convolution3DBackwardFilter::Param param; | |||||
param.sparse = Convolution3D::Param::Sparse::GROUP; | |||||
param.pad_d = PD; | |||||
param.pad_h = PH; | |||||
param.pad_w = PW; | |||||
param.stride_d = SD; | |||||
param.stride_h = SH; | |||||
param.stride_w = SW; | |||||
auto ICg = IC / group; | |||||
auto OCg = OC / group; | |||||
checker.set_param(param).exec({{N, IC, ID, IH, IW}, | |||||
{N, OC, OD, OH, OW}, | |||||
{group, OCg, ICg, FD, FH, FW}}); | |||||
}; | |||||
// normal case | |||||
run(2, 64, 7, 7, 7, 3, 3, 3, 32, 5, 5, 5, 0, 0, 0, 1, 1, 1, 2); | |||||
// padded case | |||||
run(2, 32, 7, 7, 7, 3, 3, 3, 64, 7, 7, 7, 1, 1, 1, 1, 1, 1, 4); | |||||
// strided case | |||||
run(2, 32, 7, 7, 7, 3, 3, 3, 64, 3, 3, 3, 0, 0, 0, 2, 2, 2, 8); | |||||
} | |||||
} // namespace test | } // namespace test | ||||
} // namespace megdnn | } // namespace megdnn | ||||