@@ -24,21 +24,7 @@ Convolution3DBackwardDataImpl::AlgoPack::AlgoPack() { | |||
for (auto &&i: cudnn) { | |||
all_algos.push_back(&i); | |||
} | |||
all_algos.reserve(all_algos.size() * 2); | |||
// add gconv algos by AlgoGroupConvGeneral | |||
auto all_algos_data = all_algos.data(); | |||
for (size_t i = 1; i < all_algos.size(); ++ i) { | |||
gconv.push_back({all_algos[i]}); | |||
} | |||
for (size_t i = 1; i < all_algos.size(); ++ i) { | |||
algo2gconv[all_algos[i]] = &gconv[i - 1]; | |||
} | |||
for (auto &&i: gconv) { | |||
all_algos.push_back(&i); | |||
} | |||
megdnn_assert(all_algos_data == all_algos.data()); | |||
all_algos.push_back(&group); | |||
for (auto&& algo : all_algos) { | |||
m_all_algos_map.emplace(algo->info().desc, algo); | |||
@@ -61,27 +47,26 @@ Convolution3DBackwardDataImpl::AlgoPack::cudnn_from_enum( | |||
Convolution3DBackwardDataImpl::AlgoPack Convolution3DBackwardDataImpl::sm_algo_pack; | |||
Convolution3DBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( | |||
Convolution3DBackwardDataImpl *o, | |||
const TensorLayout &filter, const TensorLayout &diff, | |||
const TensorLayout &grad): | |||
SizeArgs(o, o->make_canonized_filter_meta(grad.ndim, filter), diff, grad) | |||
{ | |||
} | |||
const Convolution3DBackwardDataImpl* o, const TensorLayout& filter, | |||
const TensorLayout& diff, const TensorLayout& grad) | |||
: SizeArgs(o, filter, o->make_canonized_filter_meta(grad.ndim, filter), | |||
diff, grad) {} | |||
Convolution3DBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( | |||
Convolution3DBackwardDataImpl *o, | |||
const CanonizedFilterMeta &filter, const TensorLayout &diff, | |||
const Convolution3DBackwardDataImpl *o, const TensorLayout& filter, | |||
const CanonizedFilterMeta &filter_meta, const TensorLayout &diff, | |||
const TensorLayout &grad): | |||
handle{concrete_handle(o->handle())}, | |||
filter_meta{filter}, | |||
filter_meta{filter_meta}, | |||
diff_layout{&diff}, | |||
grad_layout{&grad}, | |||
filter_layout{&filter}, | |||
opr{o} | |||
{ | |||
} | |||
Convolution3DBackwardDataImpl::AlgoBase::ExecArgs::ExecArgs( | |||
Convolution3DBackwardDataImpl *opr, | |||
const Convolution3DBackwardDataImpl *opr, | |||
_megdnn_tensor_in filter, | |||
_megdnn_tensor_in diff, | |||
_megdnn_tensor_out grad, | |||
@@ -42,31 +42,33 @@ public: | |||
struct SizeArgs { | |||
HandleImpl* handle; | |||
CanonizedFilterMeta filter_meta; | |||
const TensorLayout *diff_layout, *grad_layout; | |||
Convolution3DBackwardDataImpl* opr; | |||
const TensorLayout *diff_layout, *grad_layout, *filter_layout; | |||
const Convolution3DBackwardDataImpl* opr; | |||
std::string to_string() const; | |||
void init_desc(convolution3d::CUDNNBwdDataDescs& desc) const { | |||
desc.set(filter_meta, *diff_layout, *grad_layout, opr->param()); | |||
} | |||
SizeArgs(Convolution3DBackwardDataImpl* opr, const TensorLayout& filter, | |||
const TensorLayout& diff, const TensorLayout& grad); | |||
SizeArgs(Convolution3DBackwardDataImpl* opr, | |||
const CanonizedFilterMeta& filter, const TensorLayout& diff, | |||
SizeArgs(const Convolution3DBackwardDataImpl* opr, | |||
const TensorLayout& filter, const TensorLayout& diff, | |||
const TensorLayout& grad); | |||
SizeArgs(const Convolution3DBackwardDataImpl* opr, | |||
const TensorLayout& filter, | |||
const CanonizedFilterMeta& filter_meta, | |||
const TensorLayout& diff, const TensorLayout& grad); | |||
convolution3d::ForwardSizeArgs as_fwd_args() const { | |||
return {handle, grad_layout, filter_meta, diff_layout, | |||
opr->param().data_type}; | |||
return {handle, grad_layout, filter_layout, | |||
filter_meta, diff_layout, opr->param().data_type}; | |||
} | |||
}; | |||
struct ExecArgs : public SizeArgs { | |||
const TensorND *filter_tensor, *diff_tensor, *grad_tensor; | |||
Workspace workspace; | |||
ExecArgs(Convolution3DBackwardDataImpl* opr, _megdnn_tensor_in filter, | |||
_megdnn_tensor_in diff, _megdnn_tensor_out grad, | |||
_megdnn_workspace workspace); | |||
ExecArgs(const Convolution3DBackwardDataImpl* opr, | |||
_megdnn_tensor_in filter, _megdnn_tensor_in diff, | |||
_megdnn_tensor_out grad, _megdnn_workspace workspace); | |||
}; | |||
virtual bool is_available(const SizeArgs& args) const = 0; | |||
virtual size_t get_workspace_in_bytes(const SizeArgs& args) const = 0; | |||
@@ -154,29 +156,25 @@ public: | |||
//! implement group conv by another algo | |||
class Convolution3DBackwardDataImpl::AlgoGroupConvGeneral final | |||
: public AlgoBase { | |||
AlgoBase* m_impl; | |||
std::string m_name; | |||
public: | |||
AlgoGroupConvGeneral(AlgoBase* impl); | |||
bool is_available(const SizeArgs& args) const override; | |||
size_t get_workspace_in_bytes(const SizeArgs& args) const override; | |||
void exec(const ExecArgs& args) const override; | |||
std::vector<SearchItem> get_subopr_list( | |||
const TensorLayoutArray& layouts, | |||
const OperatorBase* opr) const override; | |||
const char* name() const override { return m_name.c_str(); } | |||
const char* name() const override { | |||
return "CUDA:GROUP_CONV3D_BACKWARD_DATA"; | |||
} | |||
static void modify_size_args(SizeArgs& args, TensorLayout& diff_pg, | |||
TensorLayout& grad_pg); | |||
AlgoAttribute attribute() const override { | |||
auto ret = static_cast<AlgoAttribute>(0); | |||
if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) { | |||
ret |= AlgoAttribute::REPRODUCIBLE; | |||
} | |||
return ret; | |||
return AlgoAttribute::REPRODUCIBLE; | |||
} | |||
MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) | |||
private: | |||
WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; | |||
}; | |||
class Convolution3DBackwardDataImpl::AlgoPack : NonCopyableObj { | |||
@@ -190,8 +188,7 @@ public: | |||
std::vector<AlgoCUDNN> cudnn; | |||
AlgoChanwise chanwise; | |||
std::vector<AlgoGroupConvGeneral> gconv; | |||
std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv; | |||
AlgoGroupConvGeneral group; | |||
std::vector<AlgoBase*> | |||
//! all algorithms | |||
@@ -15,68 +15,121 @@ using namespace megdnn; | |||
using namespace cuda; | |||
using namespace convolution3d; | |||
void Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::modify_size_args( | |||
Convolution3DBackwardDataImpl::AlgoBase::SizeArgs &args, | |||
TensorLayout &diff_pg, TensorLayout &grad_pg) { | |||
diff_pg = *args.diff_layout; | |||
grad_pg = *args.grad_layout; | |||
namespace { | |||
std::pair<TensorLayoutArray, Convolution3DBackwardDataImpl::Param> | |||
sub_opr_config(const Convolution3DBackwardDataImpl::AlgoBase::SizeArgs& args) { | |||
SmallVector<size_t> flt_shape(0); | |||
std::vector<ptrdiff_t> flt_stride(0); | |||
size_t idx = 0; | |||
// check if the first dim is group | |||
if (args.filter_layout->ndim > args.grad_layout->ndim) | |||
++idx; | |||
for (; idx < args.filter_layout->ndim; ++idx) { | |||
flt_shape.push_back(args.filter_layout->shape[idx]); | |||
flt_stride.push_back(args.filter_layout->stride[idx]); | |||
} | |||
TensorLayout filter_pg(flt_shape, flt_stride, | |||
args.filter_layout->dtype, | |||
args.filter_layout->format); | |||
TensorLayout diff_pg = *args.diff_layout; | |||
TensorLayout grad_pg = *args.grad_layout; | |||
auto nr_grp = args.filter_meta.group; | |||
args.filter_meta.group = 1; | |||
diff_pg.shape[1] /= nr_grp; | |||
grad_pg.shape[1] /= nr_grp; | |||
args.diff_layout = &diff_pg; | |||
args.grad_layout = &grad_pg; | |||
size_t c_pos = 1; | |||
diff_pg.shape[c_pos] /= nr_grp; | |||
grad_pg.shape[c_pos] /= nr_grp; | |||
megdnn::param::Convolution3D param = args.opr->param(); | |||
param.sparse = megdnn::param::Convolution3D::Sparse::DENSE; | |||
std::pair<TensorLayoutArray, Convolution3DBackwardDataImpl::Param> ret; | |||
ret.first = {filter_pg, diff_pg, grad_pg}; | |||
ret.second = param; | |||
return ret; | |||
} | |||
Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( | |||
AlgoBase *impl): | |||
m_impl{impl} | |||
{ | |||
m_name = "group_conv3d:"; | |||
m_name += impl->name(); | |||
std::pair<TensorLayoutArray, std::unique_ptr<Convolution3DBackwardData>> | |||
prepare_sub_opr(const Convolution3DBackwardDataImpl::AlgoBase::SizeArgs& args) { | |||
auto conv3d_backdata_opr = | |||
args.handle->create_operator<Convolution3DBackwardData>(); | |||
set_execution_policy<Convolution3DBackwardData, Convolution3DBackwardData*>( | |||
args.opr, conv3d_backdata_opr.get()); | |||
auto&& config = sub_opr_config(args); | |||
conv3d_backdata_opr->param() = config.second; | |||
return {config.first, std::move(conv3d_backdata_opr)}; | |||
} | |||
} // namespace | |||
std::vector<Algorithm::SearchItem> | |||
Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::get_subopr_list( | |||
const TensorLayoutArray& layouts, const OperatorBase* opr) const { | |||
AlgoBase::SizeArgs args{ | |||
static_cast<const Convolution3DBackwardDataImpl*>(opr), layouts[0], | |||
layouts[1], layouts[2]}; | |||
auto&& config = sub_opr_config(args); | |||
std::string param_str; | |||
Algorithm::serialize_write_pod(config.second, param_str); | |||
return {{Algorithm::OprType::CONVOLUTION3D_BACKWARD_DATA, param_str, | |||
config.first}}; | |||
} | |||
bool Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::is_available( | |||
const SizeArgs &args) const { | |||
if (args.filter_meta.group <= 1) | |||
return false; | |||
auto sub_args = args; | |||
TensorLayout diff_pg, grad_pg; | |||
modify_size_args(sub_args, diff_pg, grad_pg); | |||
return m_impl->is_available(sub_args); | |||
if (args.filter_meta.format != Param::Format::NCDHW) { | |||
return false; | |||
} | |||
auto config = prepare_sub_opr(args); | |||
return get_algorithm( | |||
static_cast<Convolution3DBackwardDataImpl*>(config.second.get()), | |||
config.first[0], config.first[1], config.first[2]); | |||
} | |||
WorkspaceBundle | |||
Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::get_workspace_bundle( | |||
void* ptr, const SizeArgs& args) const { | |||
auto config = prepare_sub_opr(args); | |||
size_t sizes = config.second->get_workspace_in_bytes( | |||
config.first[0], config.first[1], config.first[2]); | |||
return {ptr, {sizes}}; | |||
} | |||
size_t Convolution3DBackwardDataImpl::AlgoGroupConvGeneral:: | |||
get_workspace_in_bytes(const SizeArgs &args) const { | |||
auto sub_args = args; | |||
TensorLayout diff_pg, grad_pg; | |||
modify_size_args(sub_args, diff_pg, grad_pg); | |||
return m_impl->get_workspace_in_bytes(sub_args); | |||
size_t | |||
Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::get_workspace_in_bytes( | |||
const SizeArgs& args) const { | |||
return get_workspace_bundle(nullptr, args).total_size_in_bytes(); | |||
} | |||
void Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::exec( | |||
const ExecArgs &args) const { | |||
auto sub_args = args; | |||
TensorND tflt{*args.filter_tensor}, tdiff{*args.diff_tensor}, | |||
tgrad{*args.grad_tensor}; | |||
modify_size_args(sub_args, tdiff.layout, tgrad.layout); | |||
sub_args.filter_tensor = &tflt; | |||
sub_args.diff_tensor = &tdiff; | |||
sub_args.grad_tensor = &tgrad; | |||
auto grp = args.filter_meta.group; | |||
auto &&fm = args.filter_meta; | |||
auto strd_flt = (fm.icpg * fm.ocpg * | |||
fm.spatial[0] * fm.spatial[1] * fm.spatial[2] * tflt.layout.dtype.size()), | |||
strd_diff = ( | |||
tdiff.layout.stride[1] * fm.ocpg * tdiff.layout.dtype.size()), | |||
strd_grad = ( | |||
tgrad.layout.stride[1] * fm.icpg * tgrad.layout.dtype.size()); | |||
for (uint32_t g = 0; g < grp; ++ g) { | |||
m_impl->exec(sub_args); | |||
incr_voidp(tflt.raw_ptr, strd_flt); | |||
incr_voidp(tdiff.raw_ptr, strd_diff); | |||
incr_voidp(tgrad.raw_ptr, strd_grad); | |||
const ExecArgs& args) const { | |||
auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args); | |||
{ | |||
auto config = prepare_sub_opr(args); | |||
TensorND tfilter{args.filter_tensor->raw_ptr, config.first[0]}; | |||
TensorND tdiff{args.diff_tensor->raw_ptr, config.first[1]}; | |||
TensorND tgrad{args.grad_tensor->raw_ptr, config.first[2]}; | |||
size_t c_pos = 1; | |||
auto grp = args.filter_meta.group; | |||
auto&& fm = args.filter_meta; | |||
auto strd_flt = (fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] * | |||
fm.spatial[2] * tfilter.layout.dtype.size()), | |||
strd_diff = (tdiff.layout.stride[c_pos] * fm.ocpg * | |||
tdiff.layout.dtype.size()), | |||
strd_grad = (tgrad.layout.stride[c_pos] * fm.icpg * | |||
tgrad.layout.dtype.size()); | |||
for (uint32_t g = 0; g < grp; ++g) { | |||
config.second->exec(tfilter, tdiff, tgrad, bundle.get_workspace(0)); | |||
incr_voidp(tfilter.raw_ptr, strd_flt); | |||
incr_voidp(tdiff.raw_ptr, strd_diff); | |||
incr_voidp(tgrad.raw_ptr, strd_grad); | |||
} | |||
} | |||
} | |||
@@ -26,21 +26,7 @@ Convolution3DBackwardFilterImpl::AlgoPack::AlgoPack() { | |||
} | |||
all_algos.push_back(&inplace_matmul); | |||
all_algos.reserve(all_algos.size() * 2); | |||
// add gconv algos by AlgoGroupConvGeneral | |||
auto all_algos_data = all_algos.data(); | |||
for (size_t i = 1; i < all_algos.size(); ++ i) { | |||
gconv.push_back({all_algos[i]}); | |||
} | |||
for (size_t i = 1; i < all_algos.size(); ++ i) { | |||
algo2gconv[all_algos[i]] = &gconv[i - 1]; | |||
} | |||
for (auto &&i: gconv) { | |||
all_algos.push_back(&i); | |||
} | |||
megdnn_assert(all_algos_data == all_algos.data()); | |||
non_cudnn_algos.push_back(all_algos.rbegin()[0]); //group inplace_matmul | |||
all_algos.push_back(&group); | |||
for (auto&& algo : all_algos) { | |||
m_all_algos_map.emplace(algo->info().desc, algo); | |||
@@ -64,27 +50,26 @@ Convolution3DBackwardFilterImpl::AlgoPack | |||
Convolution3DBackwardFilterImpl::sm_algo_pack; | |||
Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( | |||
Convolution3DBackwardFilterImpl *o, | |||
const Convolution3DBackwardFilterImpl *o, | |||
const TensorLayout &src, const TensorLayout &diff, | |||
const TensorLayout &grad): | |||
SizeArgs(o, src, diff, o->make_canonized_filter_meta(src.ndim, grad)) | |||
SizeArgs(o, src, diff, grad, o->make_canonized_filter_meta(src.ndim, grad)) | |||
{ | |||
} | |||
Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( | |||
Convolution3DBackwardFilterImpl *o, | |||
const TensorLayout &src, const TensorLayout &diff, | |||
const CanonizedFilterMeta &grad): | |||
handle{concrete_handle(o->handle())}, | |||
src_layout{&src}, | |||
diff_layout{&diff}, | |||
grad_filter_meta{grad}, | |||
opr{o} | |||
{ | |||
} | |||
const Convolution3DBackwardFilterImpl* o, const TensorLayout& src, | |||
const TensorLayout& diff, const TensorLayout& grad, | |||
const CanonizedFilterMeta& grad_meta) | |||
: handle{concrete_handle(o->handle())}, | |||
src_layout{&src}, | |||
diff_layout{&diff}, | |||
grad_layout{&grad}, | |||
grad_filter_meta{grad_meta}, | |||
opr{o} {} | |||
Convolution3DBackwardFilterImpl::AlgoBase::ExecArgs::ExecArgs( | |||
Convolution3DBackwardFilterImpl *opr, | |||
const Convolution3DBackwardFilterImpl *opr, | |||
_megdnn_tensor_in src, | |||
_megdnn_tensor_in diff, | |||
_megdnn_tensor_out grad, | |||
@@ -36,31 +36,34 @@ public: | |||
struct SizeArgs { | |||
HandleImpl* handle; | |||
const TensorLayout *src_layout, *diff_layout; | |||
const TensorLayout *src_layout, *diff_layout, *grad_layout; | |||
CanonizedFilterMeta grad_filter_meta; | |||
Convolution3DBackwardFilterImpl* opr; | |||
const Convolution3DBackwardFilterImpl* opr; | |||
std::string to_string() const; | |||
void init_desc(convolution3d::CUDNNBwdFilterDescs& desc) const { | |||
desc.set(*src_layout, *diff_layout, grad_filter_meta, opr->param()); | |||
} | |||
SizeArgs(Convolution3DBackwardFilterImpl* opr, const TensorLayout& src, | |||
const TensorLayout& diff, const TensorLayout& grad); | |||
SizeArgs(Convolution3DBackwardFilterImpl* opr, const TensorLayout& src, | |||
const TensorLayout& diff, const CanonizedFilterMeta& grad); | |||
SizeArgs(const Convolution3DBackwardFilterImpl* opr, | |||
const TensorLayout& src, const TensorLayout& diff, | |||
const TensorLayout& grad); | |||
SizeArgs(const Convolution3DBackwardFilterImpl* opr, | |||
const TensorLayout& src, const TensorLayout& diff, | |||
const TensorLayout& grad, | |||
const CanonizedFilterMeta& grad_meta); | |||
convolution3d::ForwardSizeArgs as_fwd_args() const { | |||
return {handle, src_layout, grad_filter_meta, diff_layout, | |||
opr->param().data_type}; | |||
return {handle, src_layout, grad_layout, | |||
grad_filter_meta, diff_layout, opr->param().data_type}; | |||
} | |||
}; | |||
struct ExecArgs : public SizeArgs { | |||
const TensorND *src_tensor, *diff_tensor, *grad_tensor; | |||
Workspace workspace; | |||
ExecArgs(Convolution3DBackwardFilterImpl* opr, _megdnn_tensor_in src, | |||
_megdnn_tensor_in diff, _megdnn_tensor_out grad, | |||
_megdnn_workspace workspace); | |||
ExecArgs(const Convolution3DBackwardFilterImpl* opr, | |||
_megdnn_tensor_in src, _megdnn_tensor_in diff, | |||
_megdnn_tensor_out grad, _megdnn_workspace workspace); | |||
}; | |||
virtual bool is_available(const SizeArgs& args) const = 0; | |||
virtual size_t get_workspace_in_bytes(const SizeArgs& args) const = 0; | |||
@@ -162,30 +165,25 @@ public: | |||
//! implement group conv by another algo | |||
class Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral final | |||
: public AlgoBase { | |||
AlgoBase* m_impl; | |||
std::string m_name; | |||
public: | |||
AlgoGroupConvGeneral(AlgoBase* impl); | |||
bool is_available(const SizeArgs& args) const override; | |||
size_t get_workspace_in_bytes(const SizeArgs& args) const override; | |||
void exec(const ExecArgs& args) const override; | |||
std::vector<SearchItem> get_subopr_list( | |||
const TensorLayoutArray& layouts, | |||
const OperatorBase* opr) const override; | |||
const char* name() const override { return m_name.c_str(); } | |||
const char* name() const override { | |||
return "CUDA:GROUP_CONV3D_BACKWARD_FILTER"; | |||
} | |||
AlgoAttribute attribute() const override { | |||
auto ret = static_cast<AlgoAttribute>(0); | |||
if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) { | |||
ret |= AlgoAttribute::REPRODUCIBLE; | |||
} | |||
return ret; | |||
return AlgoAttribute::REPRODUCIBLE; | |||
} | |||
static void modify_size_args(SizeArgs& args, TensorLayout& src_pg, | |||
TensorLayout& diff_pg); | |||
MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) | |||
private: | |||
WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; | |||
}; | |||
class Convolution3DBackwardFilterImpl::AlgoPack : NonCopyableObj { | |||
@@ -200,8 +198,7 @@ public: | |||
std::vector<AlgoCUDNN> cudnn; | |||
AlgoInplaceMatmul inplace_matmul; | |||
AlgoChanwise chanwise; | |||
std::vector<AlgoGroupConvGeneral> gconv; | |||
std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv; | |||
AlgoGroupConvGeneral group; | |||
std::vector<AlgoBase*> | |||
//! all algorithms | |||
@@ -15,69 +15,123 @@ using namespace megdnn; | |||
using namespace cuda; | |||
using namespace convolution3d; | |||
void Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::modify_size_args( | |||
Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs &args, | |||
TensorLayout &src_pg, TensorLayout &diff_pg) { | |||
src_pg = *args.src_layout; | |||
diff_pg = *args.diff_layout; | |||
namespace { | |||
std::pair<TensorLayoutArray, Convolution3DBackwardFilterImpl::Param> | |||
sub_opr_config( | |||
const Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs& args) { | |||
SmallVector<size_t> flt_shape(0); | |||
std::vector<ptrdiff_t> flt_stride(0); | |||
size_t idx = 0; | |||
// check if the first dim is group | |||
if (args.grad_layout->ndim > args.src_layout->ndim) | |||
++idx; | |||
for (; idx < args.grad_layout->ndim; ++idx) { | |||
flt_shape.push_back(args.grad_layout->shape[idx]); | |||
flt_stride.push_back(args.grad_layout->stride[idx]); | |||
} | |||
TensorLayout grad_pg(flt_shape, flt_stride, args.grad_layout->dtype, | |||
args.grad_layout->format); | |||
TensorLayout src_pg = *args.src_layout; | |||
TensorLayout diff_pg = *args.diff_layout; | |||
auto nr_grp = args.grad_filter_meta.group; | |||
args.grad_filter_meta.group = 1; | |||
src_pg.shape[1] /= nr_grp; | |||
diff_pg.shape[1] /= nr_grp; | |||
args.src_layout = &src_pg; | |||
args.diff_layout = &diff_pg; | |||
size_t c_pos = 1; | |||
src_pg.shape[c_pos] /= nr_grp; | |||
diff_pg.shape[c_pos] /= nr_grp; | |||
megdnn::param::Convolution3D param = args.opr->param(); | |||
param.sparse = megdnn::param::Convolution3D::Sparse::DENSE; | |||
std::pair<TensorLayoutArray, Convolution3DBackwardFilterImpl::Param> ret; | |||
ret.first = {src_pg, diff_pg, grad_pg}; | |||
ret.second = param; | |||
return ret; | |||
} | |||
Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( | |||
AlgoBase *impl): | |||
m_impl{impl} | |||
{ | |||
m_name = "group_conv3d:"; | |||
m_name += impl->name(); | |||
std::pair<TensorLayoutArray, std::unique_ptr<Convolution3DBackwardFilter>> | |||
prepare_sub_opr( | |||
const Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs& args) { | |||
auto conv3d_backfilter_opr = | |||
args.handle->create_operator<Convolution3DBackwardFilter>(); | |||
set_execution_policy<Convolution3DBackwardFilter, | |||
Convolution3DBackwardFilter*>( | |||
args.opr, conv3d_backfilter_opr.get()); | |||
auto&& config = sub_opr_config(args); | |||
conv3d_backfilter_opr->param() = config.second; | |||
return {config.first, std::move(conv3d_backfilter_opr)}; | |||
} | |||
} // namespace | |||
std::vector<Algorithm::SearchItem> | |||
Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::get_subopr_list( | |||
const TensorLayoutArray& layouts, const OperatorBase* opr) const { | |||
AlgoBase::SizeArgs args{ | |||
static_cast<const Convolution3DBackwardFilterImpl*>(opr), | |||
layouts[0], layouts[1], layouts[2]}; | |||
auto&& config = sub_opr_config(args); | |||
std::string param_str; | |||
Algorithm::serialize_write_pod(config.second, param_str); | |||
return {{Algorithm::OprType::CONVOLUTION3D_BACKWARD_FILTER, param_str, | |||
config.first}}; | |||
} | |||
bool Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::is_available( | |||
const SizeArgs &args) const { | |||
const SizeArgs& args) const { | |||
if (args.grad_filter_meta.group <= 1) | |||
return false; | |||
auto sub_args = args; | |||
TensorLayout src_pg, diff_pg; | |||
modify_size_args(sub_args, src_pg, diff_pg); | |||
return m_impl->is_available(sub_args); | |||
if (args.grad_filter_meta.format != Param::Format::NCDHW) { | |||
return false; | |||
} | |||
auto config = prepare_sub_opr(args); | |||
return get_algorithm( | |||
static_cast<Convolution3DBackwardFilterImpl*>(config.second.get()), | |||
config.first[0], config.first[1], config.first[2]); | |||
} | |||
WorkspaceBundle | |||
Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::get_workspace_bundle( | |||
void* ptr, const SizeArgs& args) const { | |||
auto config = prepare_sub_opr(args); | |||
size_t sizes = config.second->get_workspace_in_bytes( | |||
config.first[0], config.first[1], config.first[2]); | |||
return {ptr, {sizes}}; | |||
} | |||
size_t Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral:: | |||
get_workspace_in_bytes(const SizeArgs &args) const { | |||
auto sub_args = args; | |||
TensorLayout src_pg, diff_pg; | |||
modify_size_args(sub_args, src_pg, diff_pg); | |||
return m_impl->get_workspace_in_bytes(sub_args); | |||
size_t | |||
Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::get_workspace_in_bytes( | |||
const SizeArgs& args) const { | |||
return get_workspace_bundle(nullptr, args).total_size_in_bytes(); | |||
} | |||
void Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::exec( | |||
const ExecArgs &args) const { | |||
auto sub_args = args; | |||
TensorND tsrc{*args.src_tensor}, tdiff{*args.diff_tensor}, | |||
tgrad{*args.grad_tensor}; | |||
modify_size_args(sub_args, tsrc.layout, tdiff.layout); | |||
sub_args.src_tensor = &tsrc; | |||
sub_args.diff_tensor = &tdiff; | |||
sub_args.grad_tensor = &tgrad; | |||
auto &&fm = args.grad_filter_meta; | |||
auto grp = fm.group; | |||
auto strd_src = ( | |||
tsrc.layout.stride[1] * fm.icpg * tsrc.layout.dtype.size()), | |||
strd_diff = ( | |||
tdiff.layout.stride[1] * fm.ocpg * tdiff.layout.dtype.size()), | |||
strd_grad = (fm.icpg * fm.ocpg * | |||
fm.spatial[0] * fm.spatial[1] * fm.spatial[2] * tgrad.layout.dtype.size()); | |||
for (uint32_t g = 0; g < grp; ++ g) { | |||
m_impl->exec(sub_args); | |||
incr_voidp(tsrc.raw_ptr, strd_src); | |||
incr_voidp(tdiff.raw_ptr, strd_diff); | |||
incr_voidp(tgrad.raw_ptr, strd_grad); | |||
const ExecArgs& args) const { | |||
auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args); | |||
{ | |||
auto config = prepare_sub_opr(args); | |||
TensorND tsrc{args.src_tensor->raw_ptr, config.first[0]}; | |||
TensorND tdiff{args.diff_tensor->raw_ptr, config.first[1]}; | |||
TensorND tgrad{args.grad_tensor->raw_ptr, config.first[2]}; | |||
size_t c_pos = 1; | |||
auto grp = args.grad_filter_meta.group; | |||
auto&& fm = args.grad_filter_meta; | |||
auto strd_src = (tsrc.layout.stride[c_pos] * fm.icpg * | |||
tsrc.layout.dtype.size()), | |||
strd_diff = (tdiff.layout.stride[c_pos] * fm.ocpg * | |||
tdiff.layout.dtype.size()), | |||
strd_grad = (fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] * | |||
fm.spatial[2] * tgrad.layout.dtype.size()); | |||
for (uint32_t g = 0; g < grp; ++g) { | |||
config.second->exec(tsrc, tdiff, tgrad, bundle.get_workspace(0)); | |||
incr_voidp(tsrc.raw_ptr, strd_src); | |||
incr_voidp(tdiff.raw_ptr, strd_diff); | |||
incr_voidp(tgrad.raw_ptr, strd_grad); | |||
} | |||
} | |||
} | |||
@@ -28,22 +28,7 @@ Convolution3DForwardImpl::AlgoPack::AlgoPack() { | |||
} | |||
all_algos.push_back(&inplace_matmul); | |||
all_algos.push_back(&a1x1x1); | |||
all_algos.reserve(all_algos.size() * 2); | |||
// add gconv algos by AlgoGroupConvGeneral | |||
auto all_algos_data = all_algos.data(); | |||
for (size_t i = 1; i < all_algos.size(); ++ i) { | |||
gconv.push_back({all_algos[i]}); | |||
} | |||
for (size_t i = 1; i < all_algos.size(); ++ i) { | |||
algo2gconv[all_algos[i]] = &gconv[i - 1]; | |||
} | |||
for (auto &&i: gconv) { | |||
all_algos.push_back(&i); | |||
} | |||
megdnn_assert(all_algos_data == all_algos.data()); | |||
non_cudnn_algos.push_back(all_algos.rbegin()[1]); // group inplace_matmul | |||
non_cudnn_algos.push_back(all_algos.rbegin()[0]); // group 1x1x1 | |||
all_algos.push_back(&group); | |||
for (auto&& algo : all_algos) { | |||
m_all_algos_map.emplace(algo->info().desc, algo); | |||
@@ -66,28 +51,25 @@ Convolution3DForwardImpl::AlgoPack::cudnn_from_enum( | |||
Convolution3DForwardImpl::AlgoPack Convolution3DForwardImpl::sm_algo_pack; | |||
Convolution3DForwardImpl::AlgoBase::SizeArgs::SizeArgs( | |||
Convolution3DForwardImpl *o, | |||
const TensorLayout &src, const TensorLayout &filter, | |||
const TensorLayout &dst): | |||
SizeArgs(o, src, o->make_canonized_filter_meta(src.ndim, filter), dst) | |||
{ | |||
} | |||
const Convolution3DForwardImpl* o, const TensorLayout& src, | |||
const TensorLayout& filter, const TensorLayout& dst) | |||
: SizeArgs(o, src, filter, | |||
o->make_canonized_filter_meta(src.ndim, filter), dst) {} | |||
Convolution3DForwardImpl::AlgoBase::SizeArgs::SizeArgs( | |||
Convolution3DForwardImpl *o, | |||
const TensorLayout &src, const CanonizedFilterMeta &filter, | |||
const TensorLayout &dst): | |||
ForwardSizeArgs{ | |||
concrete_handle(o->handle()), | |||
&src, filter, &dst, | |||
o->param().data_type | |||
}, | |||
opr{o} | |||
{ | |||
} | |||
const Convolution3DForwardImpl* o, const TensorLayout& src, | |||
const TensorLayout& filter, const CanonizedFilterMeta& filter_meta, | |||
const TensorLayout& dst) | |||
: ForwardSizeArgs{concrete_handle(o->handle()), | |||
&src, | |||
&filter, | |||
filter_meta, | |||
&dst, | |||
o->param().data_type}, | |||
opr{o} {} | |||
Convolution3DForwardImpl::AlgoBase::ExecArgs::ExecArgs( | |||
Convolution3DForwardImpl *opr, | |||
const Convolution3DForwardImpl *opr, | |||
_megdnn_tensor_in src, | |||
_megdnn_tensor_in filter, | |||
_megdnn_tensor_out dst, | |||
@@ -48,22 +48,24 @@ public: | |||
AlgoBase() : Algorithm() { m_handle_type = Handle::HandleType::CUDA; } | |||
struct SizeArgs : public convolution3d::ForwardSizeArgs { | |||
Convolution3DForwardImpl* opr; | |||
const Convolution3DForwardImpl* opr; | |||
std::string to_string() const; | |||
void init_desc(convolution3d::CUDNNForwardDescs& desc) const { | |||
desc.set(*src_layout, filter_meta, *dst_layout, opr->param()); | |||
} | |||
SizeArgs(Convolution3DForwardImpl* opr, const TensorLayout& src, | |||
SizeArgs(const Convolution3DForwardImpl* opr, const TensorLayout& src, | |||
const TensorLayout& filter, const TensorLayout& dst); | |||
SizeArgs(Convolution3DForwardImpl* opr, const TensorLayout& src, | |||
const CanonizedFilterMeta& filter, const TensorLayout& dst); | |||
SizeArgs(const Convolution3DForwardImpl* opr, const TensorLayout& src, | |||
const TensorLayout& filter, | |||
const CanonizedFilterMeta& filter_meta, | |||
const TensorLayout& dst); | |||
}; | |||
struct ExecArgs : public SizeArgs { | |||
const TensorND *src_tensor, *filter_tensor, *dst_tensor; | |||
Workspace workspace; | |||
ExecArgs(Convolution3DForwardImpl* opr, _megdnn_tensor_in src, | |||
ExecArgs(const Convolution3DForwardImpl* opr, _megdnn_tensor_in src, | |||
_megdnn_tensor_in filter, _megdnn_tensor_out dst, | |||
_megdnn_workspace workspace); | |||
}; | |||
@@ -114,35 +116,22 @@ public: | |||
//! implement group conv by another algo | |||
class Convolution3DForwardImpl::AlgoGroupConvGeneral final : public AlgoBase { | |||
AlgoBase* m_impl; | |||
std::string m_name; | |||
public: | |||
AlgoGroupConvGeneral(AlgoBase* impl); | |||
bool is_available(const SizeArgs& args) const override; | |||
size_t get_workspace_in_bytes(const SizeArgs& args) const override; | |||
void exec(const ExecArgs& args) const override; | |||
std::vector<SearchItem> get_subopr_list( | |||
const TensorLayoutArray& layouts, | |||
const OperatorBase* opr) const override; | |||
const char* name() const override { return m_name.c_str(); } | |||
const char* name() const override { return "CUDA:GROUP_CONV3D_FORWARD"; } | |||
AlgoAttribute attribute() const override { | |||
auto ret = AlgoAttribute::DEFAULT; | |||
if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) { | |||
ret |= AlgoAttribute::REPRODUCIBLE; | |||
} | |||
#define cb(attr) \ | |||
if (m_impl->contain_attribute_all(attr)) { \ | |||
ret |= attr; \ | |||
} | |||
MEGDNN_FOREACH_ALGO_ATTRIBUTE_INHERITABLE(cb) | |||
#undef cb | |||
return ret; | |||
return AlgoAttribute::REPRODUCIBLE; | |||
} | |||
static void modify_size_args(SizeArgs& args, TensorLayout& src_pg, | |||
TensorLayout& dst_pg); | |||
MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) | |||
private: | |||
WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; | |||
}; | |||
class Convolution3DForwardImpl::AlgoCUDNN final : public AlgoBase { | |||
@@ -226,8 +215,7 @@ public: | |||
Algo1x1x1 a1x1x1; | |||
AlgoInplaceMatmul inplace_matmul; | |||
AlgoChanwise chanwise; | |||
std::vector<AlgoGroupConvGeneral> gconv; | |||
std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv; | |||
AlgoGroupConvGeneral group; | |||
std::vector<AlgoBase*> | |||
//! all algorithms | |||
@@ -15,84 +15,136 @@ using namespace megdnn; | |||
using namespace cuda; | |||
using namespace convolution3d; | |||
void Convolution3DForwardImpl::AlgoGroupConvGeneral::modify_size_args( | |||
Convolution3DForwardImpl::AlgoBase::SizeArgs &args, | |||
TensorLayout &src_pg, TensorLayout &dst_pg) { | |||
src_pg = *args.src_layout; | |||
dst_pg = *args.dst_layout; | |||
namespace { | |||
std::pair<TensorLayoutArray, Convolution3DForwardImpl::Param> sub_opr_config( | |||
const Convolution3DForwardImpl::AlgoBase::SizeArgs& args) { | |||
TensorLayout src_pg = *args.src_layout; | |||
SmallVector<size_t> flt_shape(0); | |||
std::vector<ptrdiff_t> flt_stride(0); | |||
size_t idx = 0; | |||
// check if the first dim is group | |||
if (args.filter_layout->ndim > args.src_layout->ndim) | |||
++idx; | |||
for (; idx < args.filter_layout->ndim; ++idx) { | |||
flt_shape.push_back(args.filter_layout->shape[idx]); | |||
flt_stride.push_back(args.filter_layout->stride[idx]); | |||
} | |||
TensorLayout filter_pg(flt_shape, flt_stride, | |||
args.filter_layout->dtype, | |||
args.filter_layout->format); | |||
TensorLayout dst_pg = *args.dst_layout; | |||
auto nr_grp = args.filter_meta.group; | |||
args.filter_meta.group = 1; | |||
size_t c_pos; | |||
if (args.filter_meta.format == Param::Format::NCDHW) { | |||
if (args.filter_meta.format == param::Convolution3D::Format::NCDHW) { | |||
c_pos = 1; | |||
} else { | |||
megdnn_assert(args.filter_meta.format == Param::Format::NDHWC, | |||
megdnn_assert( | |||
args.filter_meta.format == param::Convolution3D::Format::NDHWC, | |||
"invalid conv format"); | |||
c_pos = 4; | |||
} | |||
src_pg.shape[c_pos] /= nr_grp; | |||
dst_pg.shape[c_pos] /= nr_grp; | |||
args.src_layout = &src_pg; | |||
args.dst_layout = &dst_pg; | |||
megdnn::param::Convolution3D param = args.opr->param(); | |||
param.sparse = megdnn::param::Convolution3D::Sparse::DENSE; | |||
std::pair<TensorLayoutArray, Convolution3DForwardImpl::Param> ret; | |||
ret.first = {src_pg, filter_pg, dst_pg}; | |||
ret.second = param; | |||
return ret; | |||
} | |||
std::pair<TensorLayoutArray, std::unique_ptr<Convolution3DForward>> | |||
prepare_sub_opr(const Convolution3DForwardImpl::AlgoBase::SizeArgs& args) { | |||
auto conv3d_opr = args.handle->create_operator<Convolution3D>(); | |||
set_execution_policy<Convolution3DForward, Convolution3DForward*>( | |||
args.opr, conv3d_opr.get()); | |||
auto&& config = sub_opr_config(args); | |||
conv3d_opr->param() = config.second; | |||
return {config.first, std::move(conv3d_opr)}; | |||
} | |||
} // namespace | |||
std::vector<Algorithm::SearchItem> | |||
Convolution3DForwardImpl::AlgoGroupConvGeneral::get_subopr_list( | |||
const TensorLayoutArray& layouts, const OperatorBase* opr) const { | |||
AlgoBase::SizeArgs args{static_cast<const Convolution3DForwardImpl*>(opr), | |||
layouts[0], layouts[1], layouts[2]}; | |||
auto&& config = sub_opr_config(args); | |||
Convolution3DForwardImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( | |||
AlgoBase *impl): | |||
m_impl{impl} { | |||
m_name = "group_conv3d:"; | |||
m_name += impl->name(); | |||
std::string param_str; | |||
Algorithm::serialize_write_pod(config.second, param_str); | |||
return {{Algorithm::OprType::CONVOLUTION3D_FORWARD, param_str, | |||
config.first}}; | |||
} | |||
bool Convolution3DForwardImpl::AlgoGroupConvGeneral::is_available( | |||
const SizeArgs &args) const { | |||
if (args.filter_meta.group <= 1) | |||
return false; | |||
auto sub_args = args; | |||
TensorLayout src_pg, dst_pg; | |||
modify_size_args(sub_args, src_pg, dst_pg); | |||
return m_impl->is_available(sub_args); | |||
if (args.filter_meta.format != Param::Format::NCDHW && | |||
args.filter_meta.format != Param::Format::NDHWC) { | |||
return false; | |||
} | |||
auto config = prepare_sub_opr(args); | |||
return get_algorithm( | |||
static_cast<Convolution3DForwardImpl*>(config.second.get()), | |||
config.first[0], config.first[1], config.first[2]); | |||
} | |||
WorkspaceBundle | |||
Convolution3DForwardImpl::AlgoGroupConvGeneral::get_workspace_bundle( | |||
void* ptr, const SizeArgs& args) const { | |||
auto config = prepare_sub_opr(args); | |||
size_t sizes = config.second->get_workspace_in_bytes( | |||
config.first[0], config.first[1], config.first[2]); | |||
return {ptr, {sizes}}; | |||
} | |||
size_t Convolution3DForwardImpl::AlgoGroupConvGeneral::get_workspace_in_bytes( | |||
const SizeArgs &args) const { | |||
auto sub_args = args; | |||
TensorLayout src_pg, dst_pg; | |||
modify_size_args(sub_args, src_pg, dst_pg); | |||
return m_impl->get_workspace_in_bytes(sub_args); | |||
const SizeArgs& args) const { | |||
return get_workspace_bundle(nullptr, args).total_size_in_bytes(); | |||
} | |||
void Convolution3DForwardImpl::AlgoGroupConvGeneral::exec( | |||
const ExecArgs &args) const { | |||
auto sub_args = args; | |||
TensorND tsrc{*args.src_tensor}, tdst{*args.dst_tensor}, | |||
tflt{*args.filter_tensor}; | |||
modify_size_args(sub_args, tsrc.layout, tdst.layout); | |||
sub_args.src_tensor = &tsrc; | |||
sub_args.dst_tensor = &tdst; | |||
sub_args.filter_tensor = &tflt; | |||
const ExecArgs& args) const { | |||
auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args); | |||
{ | |||
auto config = prepare_sub_opr(args); | |||
TensorND tsrc{args.src_tensor->raw_ptr, config.first[0]}; | |||
TensorND tfilter{args.filter_tensor->raw_ptr, config.first[1]}; | |||
TensorND tdst{args.dst_tensor->raw_ptr, config.first[2]}; | |||
size_t c_pos; | |||
if (args.filter_meta.format == Param::Format::NCDHW) { | |||
c_pos = 1; | |||
} else { | |||
megdnn_assert(args.filter_meta.format == Param::Format::NDHWC, | |||
"invalid conv format"); | |||
c_pos = 4; | |||
} | |||
size_t c_pos; | |||
if (args.filter_meta.format == Param::Format::NCDHW) { | |||
c_pos = 1; | |||
} else { | |||
megdnn_assert(args.filter_meta.format == Param::Format::NDHWC, | |||
"invalid conv format"); | |||
c_pos = 4; | |||
} | |||
auto grp = args.filter_meta.group; | |||
auto&& fm = args.filter_meta; | |||
auto strd_src = tsrc.layout.stride[c_pos] * fm.icpg * | |||
tsrc.layout.dtype.size(), | |||
strd_dst = tdst.layout.stride[c_pos] * fm.ocpg * | |||
tdst.layout.dtype.size(), | |||
strd_flt = fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] * | |||
fm.spatial[2] * tfilter.layout.dtype.size(); | |||
auto grp = args.filter_meta.group; | |||
auto &&fm = args.filter_meta; | |||
auto strd_src = tsrc.layout.stride[c_pos] * fm.icpg * tsrc.layout.dtype.size(), | |||
strd_dst = tdst.layout.stride[c_pos] * fm.ocpg * tdst.layout.dtype.size(), | |||
strd_flt = fm.icpg * fm.ocpg * | |||
fm.spatial[0] * fm.spatial[1] * fm.spatial[2] * | |||
tflt.layout.dtype.size(); | |||
for (uint32_t g = 0; g < grp; ++ g) { | |||
m_impl->exec(sub_args); | |||
incr_voidp(tsrc.raw_ptr, strd_src); | |||
incr_voidp(tdst.raw_ptr, strd_dst); | |||
incr_voidp(tflt.raw_ptr, strd_flt); | |||
for (uint32_t g = 0; g < grp; ++g) { | |||
config.second->exec(tsrc, tfilter, tdst, bundle.get_workspace(0)); | |||
incr_voidp(tsrc.raw_ptr, strd_src); | |||
incr_voidp(tdst.raw_ptr, strd_dst); | |||
incr_voidp(tfilter.raw_ptr, strd_flt); | |||
} | |||
} | |||
} | |||
@@ -26,6 +26,7 @@ namespace convolution3d { | |||
struct ForwardSizeArgs { | |||
HandleImpl *handle; | |||
const TensorLayout *src_layout; | |||
const TensorLayout *filter_layout; | |||
CanonizedFilterMeta filter_meta; | |||
const TensorLayout *dst_layout; | |||
param::Convolution3D::DataType data_type; | |||
@@ -35,16 +35,6 @@ Convolution3DForwardImpl::get_algorithm_heuristic( | |||
const TensorLayout& dst, size_t workspace_limit_in_bytes, | |||
const AlgoAttribute& positive_attr, | |||
const AlgoAttribute& negative_attr) { | |||
auto fm = check_layout_fwd(src, filter, dst); | |||
return get_algorithm_heuristic(src, fm, dst, workspace_limit_in_bytes, | |||
positive_attr, negative_attr); | |||
} | |||
Convolution3DForwardImpl::Algorithm* | |||
Convolution3DForwardImpl::get_algorithm_heuristic( | |||
const TensorLayout& src, const CanonizedFilterMeta& filter, | |||
const TensorLayout& dst, size_t workspace_limit_in_bytes, | |||
const AlgoAttribute& positive_attr, | |||
const AlgoAttribute& negative_attr) { | |||
AlgoBase::SizeArgs args(this, src, filter, dst); | |||
#if CUDNN_MAJOR < 7 || (CUDNN_MAJOR == 7 && CUDNN_MINOR < 5) | |||
@@ -98,18 +88,14 @@ Convolution3DForwardImpl::get_algorithm_heuristic( | |||
if (auto algo = get_cudnn_algo()) | |||
return algo; | |||
} | |||
if (args.filter_meta.group > 1) { | |||
auto orig_args = args; | |||
TensorLayout a, b; | |||
AlgoGroupConvGeneral::modify_size_args(args, a, b); | |||
if (prefer_1x1x1()) { | |||
return sm_algo_pack.algo2gconv.at(&sm_algo_pack.a1x1x1); | |||
} | |||
if (is_cudnn_supported(args)) { | |||
if (auto algo = get_cudnn_algo()) | |||
return sm_algo_pack.algo2gconv.at(algo); | |||
if (auto algo = | |||
megdnn::get_algo_match_attribute<Convolution3DForwardImpl>( | |||
&sm_algo_pack.group, positive_attr, | |||
negative_attr)) { | |||
return algo; | |||
} | |||
args = orig_args; | |||
} | |||
return megdnn::get_algo_match_attribute<Convolution3DForwardImpl>( | |||
@@ -129,7 +115,7 @@ size_t Convolution3DForwardImpl::get_workspace_in_bytes( | |||
const TensorLayout& src, const TensorLayout& filter, | |||
const TensorLayout& dst) { | |||
AlgoBase::SizeArgs args(this, src, filter, dst); | |||
return get_algorithm(this, src, args.filter_meta, dst) | |||
return get_algorithm(this, src, filter, dst) | |||
->get_workspace_in_bytes(args); | |||
} | |||
@@ -138,7 +124,7 @@ void Convolution3DForwardImpl::exec(_megdnn_tensor_in src, | |||
_megdnn_tensor_out dst, | |||
_megdnn_workspace workspace) { | |||
AlgoBase::ExecArgs args(this, src, filter, dst, workspace); | |||
auto algo = get_algorithm(this, src.layout, args.filter_meta, dst.layout); | |||
auto algo = get_algorithm(this, src.layout, filter.layout, dst.layout); | |||
algo->check_workspace(args, workspace).exec(args); | |||
} | |||
@@ -151,7 +137,7 @@ void Convolution3DBackwardDataImpl::exec(_megdnn_tensor_in filter, | |||
_megdnn_tensor_out grad, | |||
_megdnn_workspace workspace) { | |||
AlgoBase::ExecArgs args(this, filter, diff, grad, workspace); | |||
auto algo = get_algorithm(this, args.filter_meta, diff.layout, grad.layout); | |||
auto algo = get_algorithm(this, filter.layout, diff.layout, grad.layout); | |||
algo->check_workspace(args, workspace).exec(args); | |||
} | |||
@@ -169,17 +155,6 @@ Convolution3DBackwardDataImpl::get_algorithm_heuristic( | |||
const TensorLayout& grad, size_t workspace_limit_in_bytes, | |||
const AlgoAttribute& positive_attr, | |||
const AlgoAttribute& negative_attr) { | |||
auto fm = check_layout_fwd(grad, filter, diff); | |||
return get_algorithm_heuristic(fm, diff, grad, workspace_limit_in_bytes, | |||
positive_attr, negative_attr); | |||
} | |||
Convolution3DBackwardDataImpl::Algorithm* | |||
Convolution3DBackwardDataImpl::get_algorithm_heuristic( | |||
const CanonizedFilterMeta& filter, const TensorLayout& diff, | |||
const TensorLayout& grad, size_t workspace_limit_in_bytes, | |||
const AlgoAttribute& positive_attr, | |||
const AlgoAttribute& negative_attr) { | |||
AlgoBase::SizeArgs args(this, filter, diff, grad); | |||
if (args.filter_meta.group > 1 && | |||
@@ -215,14 +190,11 @@ Convolution3DBackwardDataImpl::get_algorithm_heuristic( | |||
} | |||
if (args.filter_meta.group > 1) { | |||
auto orig_args = args; | |||
TensorLayout a, b; | |||
AlgoGroupConvGeneral::modify_size_args(args, a, b); | |||
if (is_cudnn_supported(args.as_fwd_args())) { | |||
if (auto algo = get_cudnn_algo()) | |||
return sm_algo_pack.algo2gconv.at(algo); | |||
if (auto algo = megdnn::get_algo_match_attribute< | |||
Convolution3DBackwardDataImpl>( | |||
&sm_algo_pack.group, positive_attr, negative_attr)) { | |||
return algo; | |||
} | |||
args = orig_args; | |||
} | |||
return megdnn::get_algo_match_attribute<Convolution3DBackwardDataImpl>( | |||
@@ -234,7 +206,7 @@ size_t Convolution3DBackwardDataImpl::get_workspace_in_bytes( | |||
const TensorLayout& filter, const TensorLayout& diff, | |||
const TensorLayout& grad) { | |||
AlgoBase::SizeArgs args(this, filter, diff, grad); | |||
return get_algorithm(this, args.filter_meta, diff, grad) | |||
return get_algorithm(this, filter, diff, grad) | |||
->get_workspace_in_bytes(args); | |||
} | |||
@@ -248,7 +220,7 @@ void Convolution3DBackwardFilterImpl::exec(_megdnn_tensor_in src, | |||
_megdnn_workspace workspace) { | |||
AlgoBase::ExecArgs args(this, src, diff, grad, workspace); | |||
auto algo = | |||
get_algorithm(this, src.layout, diff.layout, args.grad_filter_meta); | |||
get_algorithm(this, src.layout, diff.layout, grad.layout); | |||
algo->check_workspace(args, workspace).exec(args); | |||
} | |||
@@ -266,17 +238,6 @@ Convolution3DBackwardFilterImpl::get_algorithm_heuristic( | |||
const TensorLayout& grad, size_t workspace_limit_in_bytes, | |||
const AlgoAttribute& positive_attr, | |||
const AlgoAttribute& negative_attr) { | |||
auto fm = check_layout_fwd(src, grad, diff); | |||
return get_algorithm_heuristic(src, diff, fm, workspace_limit_in_bytes, | |||
positive_attr, negative_attr); | |||
} | |||
Convolution3DBackwardFilterImpl::Algorithm* | |||
Convolution3DBackwardFilterImpl::get_algorithm_heuristic( | |||
const TensorLayout& src, const TensorLayout& diff, | |||
const CanonizedFilterMeta& grad, size_t workspace_limit_in_bytes, | |||
const AlgoAttribute& positive_attr, | |||
const AlgoAttribute& negative_attr) { | |||
AlgoBase::SizeArgs args(this, src, diff, grad); | |||
if (args.grad_filter_meta.group > 1 && | |||
@@ -310,15 +271,13 @@ Convolution3DBackwardFilterImpl::get_algorithm_heuristic( | |||
if (auto algo = get_cudnn_algo()) | |||
return algo; | |||
} | |||
if (args.grad_filter_meta.group > 1) { | |||
auto orig_args = args; | |||
TensorLayout a, b; | |||
AlgoGroupConvGeneral::modify_size_args(args, a, b); | |||
if (is_cudnn_supported(args.as_fwd_args())) { | |||
if (auto algo = get_cudnn_algo()) | |||
return sm_algo_pack.algo2gconv.at(algo); | |||
if (auto algo = megdnn::get_algo_match_attribute< | |||
Convolution3DBackwardFilterImpl>( | |||
&sm_algo_pack.group, positive_attr, negative_attr)) { | |||
return algo; | |||
} | |||
args = orig_args; | |||
} | |||
return megdnn::get_algo_match_attribute<Convolution3DBackwardFilterImpl>( | |||
@@ -330,7 +289,7 @@ size_t Convolution3DBackwardFilterImpl::get_workspace_in_bytes( | |||
const TensorLayout& src, const TensorLayout& diff, | |||
const TensorLayout& grad) { | |||
AlgoBase::SizeArgs args(this, src, diff, grad); | |||
return get_algorithm(this, src, diff, args.grad_filter_meta) | |||
return get_algorithm(this, src, diff, grad) | |||
->get_workspace_in_bytes(args); | |||
} | |||
@@ -21,17 +21,6 @@ public: | |||
using Convolution3DForward::Convolution3DForward; | |||
void exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, | |||
_megdnn_tensor_out dst, _megdnn_workspace workspace) override; | |||
AlgorithmInfo get_algorithm_info_heuristic(const TensorLayout& src, | |||
const CanonizedFilterMeta& filter, | |||
const TensorLayout& dst, | |||
size_t workspace_limit_in_bytes, | |||
const AlgoAttribute& positive_attr, | |||
const AlgoAttribute& negative_attr) { | |||
return get_algorithm_heuristic(src, filter, dst, | |||
workspace_limit_in_bytes, positive_attr, | |||
negative_attr) | |||
->info(); | |||
} | |||
size_t get_workspace_in_bytes(const TensorLayout& src, | |||
const TensorLayout& filter, | |||
const TensorLayout& dst) override; | |||
@@ -57,13 +46,6 @@ protected: | |||
const AlgoAttribute& negative_attr) override; | |||
private: | |||
Algorithm* get_algorithm_heuristic(const TensorLayout& src, | |||
const CanonizedFilterMeta& filter, | |||
const TensorLayout& dst, | |||
size_t workspace_limit_in_bytes, | |||
const AlgoAttribute& positive_attr, | |||
const AlgoAttribute& negative_attr); | |||
static AlgoPack sm_algo_pack; | |||
}; | |||
@@ -72,16 +54,6 @@ public: | |||
using Convolution3DBackwardData::Convolution3DBackwardData; | |||
void exec(_megdnn_tensor_in filter, _megdnn_tensor_in diff, | |||
_megdnn_tensor_out grad, _megdnn_workspace workspace) override; | |||
AlgorithmInfo get_algorithm_info_heuristic( | |||
const CanonizedFilterMeta& filter, const TensorLayout& diff, | |||
const TensorLayout& grad, size_t workspace_limit_in_bytes, | |||
const AlgoAttribute& positive_attr, | |||
const AlgoAttribute& negative_attr) { | |||
return get_algorithm_heuristic(filter, diff, grad, | |||
workspace_limit_in_bytes, positive_attr, | |||
negative_attr) | |||
->info(); | |||
} | |||
size_t get_workspace_in_bytes(const TensorLayout& filter, | |||
const TensorLayout& diff, | |||
const TensorLayout& grad) override; | |||
@@ -109,13 +81,6 @@ protected: | |||
const AlgoAttribute& negative_attr) override; | |||
private: | |||
Algorithm* get_algorithm_heuristic(const CanonizedFilterMeta& filter, | |||
const TensorLayout& diff, | |||
const TensorLayout& grad, | |||
size_t workspace_limit_in_bytes, | |||
const AlgoAttribute& positive_attr, | |||
const AlgoAttribute& negative_attr); | |||
static AlgoPack sm_algo_pack; | |||
}; | |||
@@ -127,17 +92,6 @@ public: | |||
size_t get_workspace_in_bytes(const TensorLayout& src, | |||
const TensorLayout& diff, | |||
const TensorLayout& grad) override; | |||
AlgorithmInfo get_algorithm_info_heuristic( | |||
const TensorLayout& src, const TensorLayout& diff, | |||
const CanonizedFilterMeta& grad, size_t workspace_limit_in_bytes, | |||
const AlgoAttribute& positive_attr, | |||
const AlgoAttribute& negative_attr) { | |||
return get_algorithm_heuristic(src, diff, grad, | |||
workspace_limit_in_bytes, positive_attr, | |||
negative_attr) | |||
->info(); | |||
} | |||
const char* get_algorithm_set_name() const override; | |||
class AlgoBase; | |||
@@ -162,13 +116,6 @@ protected: | |||
const AlgoAttribute& negative_attr) override; | |||
private: | |||
Algorithm* get_algorithm_heuristic(const TensorLayout& src, | |||
const TensorLayout& diff, | |||
const CanonizedFilterMeta& grad, | |||
size_t workspace_limit_in_bytes, | |||
const AlgoAttribute& positive_attr, | |||
const AlgoAttribute& negative_attr); | |||
static AlgoPack sm_algo_pack; | |||
}; | |||
} // namespace cuda | |||
@@ -66,12 +66,10 @@ TEST_F(CUDA, GROUP_CONVOLUTION3D_FORWARD_1x1x1) { | |||
auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, | |||
size_t FD, size_t FH, size_t FW, size_t OC, size_t group) { | |||
Checker<Convolution3D> checker(handle_cuda()); | |||
#if CUDNN_MAJOR <= 6 | |||
bool require_algo = true; | |||
checker.set_before_exec_callback( | |||
AlgoChecker<Convolution3DForward>{ | |||
"group_conv3d:1x1x1", &require_algo}); | |||
#endif | |||
checker.set_before_exec_callback(AlgoChecker<Convolution3DForward>( | |||
ExecutionPolicyAlgoName{"CUDA:GROUP_CONV3D_FORWARD", | |||
{{"1x1x1", | |||
{}}}})); | |||
Convolution3D::Param param; | |||
param.sparse = Convolution3D::Param::Sparse::GROUP; | |||
auto ICg = IC / group; | |||
@@ -125,6 +123,45 @@ TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_DATA) { | |||
run(2, 32, 64, 64, 64, 3, 3, 3, 32, 62, 62, 62, 0, 0, 0, 1, 1, 1, 4); | |||
} | |||
TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_DATA_CUDNN) { | |||
auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, | |||
size_t FD, size_t FH, size_t FW, size_t OC, size_t OD, | |||
size_t OH, size_t OW, size_t PD, size_t PH, size_t PW, | |||
size_t SD, size_t SH, size_t SW, size_t group) { | |||
Checker<Convolution3DBackwardData> checker(handle_cuda()); | |||
checker.set_before_exec_callback( | |||
AlgoChecker<Convolution3DBackwardData>(ExecutionPolicyAlgoName{ | |||
"CUDA:GROUP_CONV3D_BACKWARD_DATA", {{"CUDNN", {}}}})); | |||
Convolution3DBackwardData::Param param; | |||
param.sparse = Convolution3D::Param::Sparse::GROUP; | |||
param.pad_d = PD; | |||
param.pad_h = PH; | |||
param.pad_w = PW; | |||
param.stride_d = SD; | |||
param.stride_h = SH; | |||
param.stride_w = SW; | |||
auto ICg = IC / group; | |||
auto OCg = OC / group; | |||
checker.set_param(param).exec({{group, OCg, ICg, FD, FH, FW}, | |||
{N, OC, OD, OH, OW}, | |||
{N, IC, ID, IH, IW}}); | |||
}; | |||
// bug case in prev ver | |||
run(1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 3, 0, 0, 1, 1, 1, 1, 2); | |||
run(1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 0, 0, 1, 1, 1, 2, 2); | |||
run(1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 0, 1, 0, 1, 2, 1, 2); | |||
run(1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 0, 0, 2, 1, 1, 2); | |||
// normal case | |||
run(2, 64, 7, 7, 7, 3, 3, 3, 32, 5, 5, 5, 0, 0, 0, 1, 1, 1, 2); | |||
// padded case | |||
run(2, 32, 7, 7, 7, 3, 3, 3, 64, 7, 7, 7, 1, 1, 1, 1, 1, 1, 4); | |||
// strided case | |||
run(2, 32, 7, 7, 7, 3, 3, 3, 64, 3, 3, 3, 0, 0, 0, 2, 2, 2, 8); | |||
// bigger case | |||
run(2, 32, 64, 64, 64, 3, 3, 3, 32, 62, 62, 62, 0, 0, 0, 1, 1, 1, 4); | |||
} | |||
TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_FILTER) { | |||
auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, | |||
size_t FD, size_t FH, size_t FW, size_t OC, size_t OD, | |||
@@ -153,6 +190,39 @@ TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_FILTER) { | |||
run(2, 32, 7, 7, 7, 3, 3, 3, 64, 3, 3, 3, 0, 0, 0, 2, 2, 2, 8); | |||
} | |||
TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_FILTER_CUDNN) { | |||
auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, | |||
size_t FD, size_t FH, size_t FW, size_t OC, size_t OD, | |||
size_t OH, size_t OW, size_t PD, size_t PH, size_t PW, | |||
size_t SD, size_t SH, size_t SW, size_t group) { | |||
Checker<Convolution3DBackwardFilter> checker(handle_cuda()); | |||
checker.set_before_exec_callback( | |||
AlgoChecker<Convolution3DBackwardFilter>( | |||
ExecutionPolicyAlgoName{ | |||
"CUDA:GROUP_CONV3D_BACKWARD_FILTER", | |||
{{"CUDNN", {}}}})); | |||
Convolution3DBackwardFilter::Param param; | |||
param.sparse = Convolution3D::Param::Sparse::GROUP; | |||
param.pad_d = PD; | |||
param.pad_h = PH; | |||
param.pad_w = PW; | |||
param.stride_d = SD; | |||
param.stride_h = SH; | |||
param.stride_w = SW; | |||
auto ICg = IC / group; | |||
auto OCg = OC / group; | |||
checker.set_param(param).exec({{N, IC, ID, IH, IW}, | |||
{N, OC, OD, OH, OW}, | |||
{group, OCg, ICg, FD, FH, FW}}); | |||
}; | |||
// normal case | |||
run(2, 64, 7, 7, 7, 3, 3, 3, 32, 5, 5, 5, 0, 0, 0, 1, 1, 1, 2); | |||
// padded case | |||
run(2, 32, 7, 7, 7, 3, 3, 3, 64, 7, 7, 7, 1, 1, 1, 1, 1, 1, 4); | |||
// strided case | |||
run(2, 32, 7, 7, 7, 3, 3, 3, 64, 3, 3, 3, 0, 0, 0, 2, 2, 2, 8); | |||
} | |||
} // namespace test | |||
} // namespace megdnn | |||