diff --git a/dnn/include/megdnn/oprs.h b/dnn/include/megdnn/oprs.h index 0bdaeaa3..1fc1552d 100644 --- a/dnn/include/megdnn/oprs.h +++ b/dnn/include/megdnn/oprs.h @@ -18,4 +18,39 @@ #include "megdnn/oprs/utils.h" #include "megdnn/oprs/linalg.h" +template +struct OprArityTrait; + +template +struct OprArityTraitTmpl { + static constexpr int arity_in = _arity_in; + static constexpr int arity_out = _arity_out; + static constexpr int arity = arity_in + arity_out; +}; + +#define INST_ARITY(_Opr, _in, _out) \ + template <> \ + struct OprArityTrait<_Opr> : public OprArityTraitTmpl<_Opr, _in, _out> {}; + +INST_ARITY(megdnn::ConvolutionBackwardData, 2, 1); +INST_ARITY(megdnn::ConvolutionBackwardFilter, 2, 1); +INST_ARITY(megdnn::Convolution3DForward, 2, 1); +INST_ARITY(megdnn::Convolution3DBackwardData, 2, 1); +INST_ARITY(megdnn::Convolution3DBackwardFilter, 2, 1); +INST_ARITY(megdnn::LocalShareForward, 2, 1); +INST_ARITY(megdnn::LocalShareBackwardData, 2, 1); +INST_ARITY(megdnn::LocalShareBackwardFilter, 2, 1); +INST_ARITY(megdnn::Convolution, 2, 1); +INST_ARITY(megdnn::DeformableConvForward, 4, 1); +INST_ARITY(megdnn::DeformableConvBackwardFilter, 4, 1); +INST_ARITY(megdnn::BatchConvBiasForward, 4, 1); +INST_ARITY(megdnn::ConvBias, 4, 1); +INST_ARITY(megdnn::DeformableConvBackwardData, 5, 3); +INST_ARITY(megdnn::MatrixMul, 2, 1); +INST_ARITY(megdnn::BatchedMatrixMul, 2, 1); + +#undef INST_ARITY + + + // vim: syntax=cpp.doxygen diff --git a/dnn/src/common/algo_base.h b/dnn/src/common/algo_base.h index 37ec919f..ca24b59f 100644 --- a/dnn/src/common/algo_base.h +++ b/dnn/src/common/algo_base.h @@ -47,6 +47,9 @@ namespace megdnn { return algo_pack().all_algos_map().at(desc); \ } +#define MEGDNN_FOREACH_ALGO_ATTRIBUTE_INHERITABLE(cb) \ + cb(AlgoAttribute::ACCURACY_DEPEND_ON_BATCH) + /** * \brief construct algo from AlgorithmDesc */ diff --git a/dnn/src/common/conv_bias.cpp b/dnn/src/common/conv_bias.cpp index b23f1781..13c77fe7 100644 --- a/dnn/src/common/conv_bias.cpp +++ b/dnn/src/common/conv_bias.cpp @@ -323,6 +323,34 @@ void handle_bias_and_nonlinear(Handle* handle, param::ConvBias args, } } +bool check_bias_share_in_channel(const TensorLayout& bias, + const param::ConvBias::Format format) { + bool share_in_channel = false; + if (format == param::ConvBias::Format::NCHW || + format == param::ConvBias::Format::NCHW4_NCHW) { + share_in_channel = (bias.ndim == 4 && bias[0] == 1 && bias[2] == 1 && + bias[3] == 1); + } else if (format == param::ConvBias::Format::NHWC) { + share_in_channel = (bias.ndim == 4 && bias[0] == 1 && bias[1] == 1 && + bias[2] == 1); + } else if (format == param::ConvBias::Format::NCHW4 || + format == param::ConvBias::Format::NCHW8 || + format == param::ConvBias::Format::NCHW32 || + format == param::ConvBias::Format::NCHW4_NCHW32 || + format == param::ConvBias::Format::NCHW32_NCHW4) { + share_in_channel = (bias.ndim == 5 && bias[0] == 1 && bias[2] == 1 && + bias[3] == 1); + } else if (format == param::ConvBias::Format::NHWCD4) { + share_in_channel = (bias.ndim == 5 && bias[0] == 1 && bias[1] == 1 && + bias[3] == 1); + } else { + megdnn_assert(format == param::ConvBias::Format::CHWN4); + share_in_channel = (bias.ndim == 5 && bias[1] == 1 && bias[2] == 1 && + bias[3] == 1); + } + return share_in_channel; +} + } // namespace megdnn // vim: syntax=cpp.doxygen diff --git a/dnn/src/common/conv_bias.h b/dnn/src/common/conv_bias.h index fc9da4c1..3eeacfc1 100644 --- a/dnn/src/common/conv_bias.h +++ b/dnn/src/common/conv_bias.h @@ -21,6 +21,9 @@ void handle_bias_and_nonlinear(Handle* handle, param::ConvBias args, const TensorND* conv_dst_tensor, const TensorND* dst_tensor, const TensorND* bias_tensor); + +bool check_bias_share_in_channel(const TensorLayout& bias, + const param::ConvBias::Format format); } // namespace megdnn // vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/batch_conv_bias/gemm_int8_nchw4_dp4a.cpp b/dnn/src/cuda/batch_conv_bias/gemm_int8_nchw4_dp4a.cpp index 0f0ab818..433c2c23 100644 --- a/dnn/src/cuda/batch_conv_bias/gemm_int8_nchw4_dp4a.cpp +++ b/dnn/src/cuda/batch_conv_bias/gemm_int8_nchw4_dp4a.cpp @@ -9,7 +9,7 @@ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ -#include "src/common/utils.h" +#include "src/common/conv_bias.h" #include "src/cuda/batch_conv_bias/algo.h" #include "src/cuda/batch_conv_bias/batch_conv_bias.cuh" #include "src/cuda/batch_conv_bias/opr_impl.h" @@ -106,7 +106,7 @@ bool BatchConvBiasForwardImpl::AlgoInt8NCHW4DotProdGemm::is_available( using Mode = Param::Mode; bool available = true; auto&& param = args.opr->param(); - if (!conv_bias::check_bias_share_in_channel(args.bias_layout, param.format)) + if (!check_bias_share_in_channel(args.bias_layout, param.format)) return false; if (param.format != Format::NCHW4) return false; diff --git a/dnn/src/cuda/batch_conv_bias/implicit_gemm_int8_nchw4_dp4a.cpp b/dnn/src/cuda/batch_conv_bias/implicit_gemm_int8_nchw4_dp4a.cpp index 6e5c8b3e..ba32390d 100644 --- a/dnn/src/cuda/batch_conv_bias/implicit_gemm_int8_nchw4_dp4a.cpp +++ b/dnn/src/cuda/batch_conv_bias/implicit_gemm_int8_nchw4_dp4a.cpp @@ -10,7 +10,7 @@ */ #include "megdnn/oprs/general.h" -#include "src/common/utils.h" +#include "src/common/conv_bias.h" #include "src/cuda/batch_conv_bias/algo.h" #include "src/cuda/batch_conv_bias/batch_conv_bias.cuh" #include "src/cuda/batch_conv_bias/opr_impl.h" @@ -86,7 +86,7 @@ bool BatchConvBiasForwardImpl::AlgoInt8NCHW4DotProdImplicitGemmPrecomp:: using Mode = Param::Mode; bool available = true; auto&& param = args.opr->param(); - if (!conv_bias::check_bias_share_in_channel(args.bias_layout, param.format)) + if (!check_bias_share_in_channel(args.bias_layout, param.format)) return false; if (param.format != Format::NCHW4) return false; diff --git a/dnn/src/cuda/batched_matrix_mul/algo.h b/dnn/src/cuda/batched_matrix_mul/algo.h index 49979b42..637d3da7 100644 --- a/dnn/src/cuda/batched_matrix_mul/algo.h +++ b/dnn/src/cuda/batched_matrix_mul/algo.h @@ -115,7 +115,8 @@ public: size_t get_workspace_in_bytes(const SizeArgs& /*args*/) const override; void exec(const ExecArgs& args) const final; AlgoAttribute attribute() const override { - return AlgoAttribute::REPRODUCIBLE; + return AlgoAttribute::REPRODUCIBLE | + AlgoAttribute::ACCURACY_DEPEND_ON_BATCH; } const char* name() const override { return "CUBLAS"; } MEGDNN_DECL_ALGO_TYPE(CUDA_CUBLAS) @@ -128,7 +129,8 @@ public: size_t get_workspace_in_bytes(const SizeArgs& /*args*/) const override; void exec(const ExecArgs& args) const final; AlgoAttribute attribute() const override { - return AlgoAttribute::REPRODUCIBLE; + return AlgoAttribute::REPRODUCIBLE | + AlgoAttribute::ACCURACY_DEPEND_ON_BATCH; } const char* name() const override { return "CUBLAS_LT"; } MEGDNN_DECL_ALGO_TYPE(CUDA_CUBLASLT) diff --git a/dnn/src/cuda/conv_bias/algo.h b/dnn/src/cuda/conv_bias/algo.h index 9ed82fe1..8da597f3 100644 --- a/dnn/src/cuda/conv_bias/algo.h +++ b/dnn/src/cuda/conv_bias/algo.h @@ -173,6 +173,9 @@ public: if (m_attr.is_reproducible) { ret |= AlgoAttribute::REPRODUCIBLE; } + if (m_attr.accuracy_depend_on_batch) { + ret |= AlgoAttribute::ACCURACY_DEPEND_ON_BATCH; + } return ret; } @@ -280,6 +283,9 @@ public: if (m_attr.is_reproducible) { ret |= AlgoAttribute::REPRODUCIBLE; } + if (m_attr.accuracy_depend_on_batch) { + ret |= AlgoAttribute::ACCURACY_DEPEND_ON_BATCH; + } return ret; } @@ -352,7 +358,8 @@ public: const OperatorBase* opr) const override; MEGDNN_DECL_ALGO_TYPE(CUDA_MATMUL) AlgoAttribute attribute() const override { - return AlgoAttribute::REPRODUCIBLE; + return AlgoAttribute::REPRODUCIBLE | + AlgoAttribute::ACCURACY_DEPEND_ON_BATCH; } private: @@ -406,7 +413,8 @@ public: const OperatorBase* opr) const override; AlgoAttribute attribute() const override { - return AlgoAttribute::REPRODUCIBLE; + return AlgoAttribute::REPRODUCIBLE | + AlgoAttribute::ACCURACY_DEPEND_ON_BATCH; } MEGDNN_DECL_ALGO_TYPE(CUDA_BATCHED_MATMUL) @@ -428,7 +436,14 @@ public: const char* name() const override { return m_name.c_str(); } AlgoAttribute attribute() const override { - auto ret = static_cast(0); + auto ret = AlgoAttribute::DEFAULT; +#define cb(attr) \ + if (m_impl->contain_attribute_all(attr)) { \ + ret |= attr; \ + } + MEGDNN_FOREACH_ALGO_ATTRIBUTE_INHERITABLE(cb) +#undef cb + if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) { ret |= AlgoAttribute::REPRODUCIBLE; } diff --git a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp index 99fca489..ab0968de 100644 --- a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp +++ b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp @@ -16,6 +16,7 @@ #include "src/cuda/conv_bias/helper.h" #include "src/cuda/cudnn_wrapper.h" #include "src/cuda/utils.h" +#include "src/common/conv_bias.h" using namespace megdnn; using namespace cuda; @@ -29,7 +30,7 @@ bool ConvBiasForwardImpl::AlgoCUDNNConvBiasActivation::is_available( } if (args.bias_layout->ndim == 0 || - !conv_bias::check_bias_share_in_channel(*(args.bias_layout), + !check_bias_share_in_channel(*(args.bias_layout), args.opr->param().format)) { return false; } diff --git a/dnn/src/cuda/conv_bias/helper.cpp b/dnn/src/cuda/conv_bias/helper.cpp index 8bb1d8b3..02955adc 100644 --- a/dnn/src/cuda/conv_bias/helper.cpp +++ b/dnn/src/cuda/conv_bias/helper.cpp @@ -168,34 +168,6 @@ bool is_cudnn_supported(const BiasForwardSizeArgs& args) { return supported; } -bool check_bias_share_in_channel(const TensorLayout& bias, - const param::ConvBias::Format format) { - bool share_in_channel = false; - if (format == param::ConvBias::Format::NCHW || - format == param::ConvBias::Format::NCHW4_NCHW) { - share_in_channel = (bias.ndim == 4 && bias[0] == 1 && bias[2] == 1 && - bias[3] == 1); - } else if (format == param::ConvBias::Format::NHWC) { - share_in_channel = (bias.ndim == 4 && bias[0] == 1 && bias[1] == 1 && - bias[2] == 1); - } else if (format == param::ConvBias::Format::NCHW4 || - format == param::ConvBias::Format::NCHW8 || - format == param::ConvBias::Format::NCHW32 || - format == param::ConvBias::Format::NCHW4_NCHW32 || - format == param::ConvBias::Format::NCHW32_NCHW4) { - share_in_channel = (bias.ndim == 5 && bias[0] == 1 && bias[2] == 1 && - bias[3] == 1); - } else if (format == param::ConvBias::Format::NHWCD4) { - share_in_channel = (bias.ndim == 5 && bias[0] == 1 && bias[1] == 1 && - bias[3] == 1); - } else { - megdnn_assert(format == param::ConvBias::Format::CHWN4); - share_in_channel = (bias.ndim == 5 && bias[1] == 1 && bias[2] == 1 && - bias[3] == 1); - } - return share_in_channel; -} - SmallVector matmul_get_workspace_bundle( const BiasForwardSizeArgs& args) { auto dtype = args.src_layout->dtype; diff --git a/dnn/src/cuda/conv_bias/helper.h b/dnn/src/cuda/conv_bias/helper.h index ceade354..0d3687d4 100644 --- a/dnn/src/cuda/conv_bias/helper.h +++ b/dnn/src/cuda/conv_bias/helper.h @@ -126,9 +126,6 @@ namespace conv_bias { } }; - bool check_bias_share_in_channel(const TensorLayout& bias, - const param::ConvBias::Format format); - } // namespace conv_bias } // namespace cuda } // namespace megdnn diff --git a/dnn/src/cuda/conv_bias/implicit_gemm_int8_chwn4_dp4a.cpp b/dnn/src/cuda/conv_bias/implicit_gemm_int8_chwn4_dp4a.cpp index d7b4dff1..b6c49577 100644 --- a/dnn/src/cuda/conv_bias/implicit_gemm_int8_chwn4_dp4a.cpp +++ b/dnn/src/cuda/conv_bias/implicit_gemm_int8_chwn4_dp4a.cpp @@ -15,6 +15,7 @@ #include "src/cuda/convolution_helper/layout.cuh" #include "src/cuda/convolution_helper/parameter.cuh" #include "src/cuda/utils.h" +#include "src/common/conv_bias.h" using namespace megdnn; using namespace cuda; @@ -83,7 +84,7 @@ bool ConvBiasForwardImpl::AlgoInt8CHWN4DotProdImplicitGemm::is_available( bool available = true; auto&& param = args.opr->param(); auto&& fm = args.filter_meta; - if (!conv_bias::check_bias_share_in_channel(*(args.bias_layout), + if (!check_bias_share_in_channel(*(args.bias_layout), param.format)) return false; if (param.format != Format::CHWN4) diff --git a/dnn/src/cuda/conv_bias/implicit_gemm_int8_chwn4_imma.cpp b/dnn/src/cuda/conv_bias/implicit_gemm_int8_chwn4_imma.cpp index de206a83..f4cc3f9f 100644 --- a/dnn/src/cuda/conv_bias/implicit_gemm_int8_chwn4_imma.cpp +++ b/dnn/src/cuda/conv_bias/implicit_gemm_int8_chwn4_imma.cpp @@ -15,6 +15,7 @@ #include "src/cuda/convolution_helper/layout.cuh" #include "src/cuda/convolution_helper/parameter.cuh" #include "src/cuda/utils.h" +#include "src/common/conv_bias.h" using namespace megdnn; using namespace cuda; @@ -71,7 +72,7 @@ bool ConvBiasForwardImpl::AlgoInt8CHWN4IMMAImplicitGemm::is_available( bool available = true; auto&& param = args.opr->param(); auto&& fm = args.filter_meta; - if (!conv_bias::check_bias_share_in_channel(*(args.bias_layout), + if (!check_bias_share_in_channel(*(args.bias_layout), param.format)) return false; if (param.format != Format::CHWN4) diff --git a/dnn/src/cuda/conv_bias/implicit_gemm_int8_chwn4_imma_reorder_filter.cpp b/dnn/src/cuda/conv_bias/implicit_gemm_int8_chwn4_imma_reorder_filter.cpp index 0671d2a3..240122c1 100644 --- a/dnn/src/cuda/conv_bias/implicit_gemm_int8_chwn4_imma_reorder_filter.cpp +++ b/dnn/src/cuda/conv_bias/implicit_gemm_int8_chwn4_imma_reorder_filter.cpp @@ -15,6 +15,7 @@ #include "src/cuda/convolution_helper/layout.cuh" #include "src/cuda/convolution_helper/parameter.cuh" #include "src/cuda/utils.h" +#include "src/common/conv_bias.h" using namespace megdnn; using namespace cuda; @@ -118,7 +119,7 @@ bool ConvBiasForwardImpl::AlgoInt8CHWN4IMMAImplicitGemmReorderFilter:: bool available = true; auto&& param = args.opr->param(); auto&& fm = args.filter_meta; - if (!conv_bias::check_bias_share_in_channel(*(args.bias_layout), + if (!check_bias_share_in_channel(*(args.bias_layout), param.format)) return false; if (param.format != Format::CHWN4) diff --git a/dnn/src/cuda/conv_bias/implicit_gemm_int8_chwn4_imma_unroll_width.cpp b/dnn/src/cuda/conv_bias/implicit_gemm_int8_chwn4_imma_unroll_width.cpp index df732e90..98d00829 100644 --- a/dnn/src/cuda/conv_bias/implicit_gemm_int8_chwn4_imma_unroll_width.cpp +++ b/dnn/src/cuda/conv_bias/implicit_gemm_int8_chwn4_imma_unroll_width.cpp @@ -15,6 +15,7 @@ #include "src/cuda/convolution_helper/layout.cuh" #include "src/cuda/convolution_helper/parameter.cuh" #include "src/cuda/utils.h" +#include "src/common/conv_bias.h" using namespace megdnn; using namespace cuda; @@ -118,7 +119,7 @@ bool ConvBiasForwardImpl::AlgoInt8CHWN4IMMAImplicitGemmUnrollWidth:: bool available = true; auto&& param = args.opr->param(); auto&& fm = args.filter_meta; - if (!conv_bias::check_bias_share_in_channel(*(args.bias_layout), + if (!check_bias_share_in_channel(*(args.bias_layout), param.format)) return false; if (param.format != Format::CHWN4) diff --git a/dnn/src/cuda/conv_bias/implicit_gemm_int8_nchw32_imma.cpp b/dnn/src/cuda/conv_bias/implicit_gemm_int8_nchw32_imma.cpp index 5f1ef475..3e674080 100644 --- a/dnn/src/cuda/conv_bias/implicit_gemm_int8_nchw32_imma.cpp +++ b/dnn/src/cuda/conv_bias/implicit_gemm_int8_nchw32_imma.cpp @@ -14,6 +14,7 @@ #include "src/cuda/conv_bias/cutlass_convolution_wrapper.cuh" #include "src/cuda/convolution_helper/parameter.cuh" #include "src/cuda/utils.h" +#include "src/common/conv_bias.h" using namespace megdnn; using namespace cuda; @@ -32,7 +33,7 @@ bool ConvBiasForwardImpl::AlgoInt8NCHW32IMMAImplicitGemm::is_available( bool available = true; auto&& param = args.opr->param(); auto&& fm = args.filter_meta; - if (!conv_bias::check_bias_share_in_channel(*(args.bias_layout), + if (!check_bias_share_in_channel(*(args.bias_layout), param.format)) return false; if (param.format != Format::NCHW32 && param.format != Format::NCHW32_NCHW4) diff --git a/dnn/src/cuda/conv_bias/implicit_gemm_int8_nchw4_dp4a.cpp b/dnn/src/cuda/conv_bias/implicit_gemm_int8_nchw4_dp4a.cpp index ace4620a..b38d607a 100644 --- a/dnn/src/cuda/conv_bias/implicit_gemm_int8_nchw4_dp4a.cpp +++ b/dnn/src/cuda/conv_bias/implicit_gemm_int8_nchw4_dp4a.cpp @@ -13,6 +13,7 @@ #include "src/cuda/utils.h" #include "src/cuda/convolution_helper/parameter.cuh" #include "src/cuda/conv_bias/cutlass_convolution_wrapper.cuh" +#include "src/common/conv_bias.h" using namespace megdnn; using namespace cuda; @@ -29,7 +30,7 @@ bool ConvBiasForwardImpl::AlgoInt8NCHW4DotProdImplicitGemm::is_available( bool available = true; auto&& param = args.opr->param(); auto&& fm = args.filter_meta; - if (!conv_bias::check_bias_share_in_channel(*(args.bias_layout), + if (!check_bias_share_in_channel(*(args.bias_layout), param.format)) return false; if (param.format == Format::NCHW4_NCHW32) { diff --git a/dnn/src/cuda/conv_bias/implicit_gemm_int8_nchw4_imma.cpp b/dnn/src/cuda/conv_bias/implicit_gemm_int8_nchw4_imma.cpp index b268ea3d..d7818c59 100644 --- a/dnn/src/cuda/conv_bias/implicit_gemm_int8_nchw4_imma.cpp +++ b/dnn/src/cuda/conv_bias/implicit_gemm_int8_nchw4_imma.cpp @@ -12,6 +12,7 @@ #include "./algo.h" #include "src/cuda/utils.h" #include "src/cuda/convolution_helper/bias_visitor.cuh" +#include "src/common/conv_bias.h" using namespace megdnn; using namespace cuda; @@ -29,7 +30,7 @@ bool ConvBiasForwardImpl::AlgoInt8NCHW4IMMAImplicitGemm::is_available( bool available = true; auto&& param = args.opr->param(); auto&& fm = args.filter_meta; - if (!conv_bias::check_bias_share_in_channel(*(args.bias_layout), + if (!check_bias_share_in_channel(*(args.bias_layout), param.format)) return false; if (param.format != Format::NCHW4) diff --git a/dnn/src/cuda/convolution/backward_data/algo.h b/dnn/src/cuda/convolution/backward_data/algo.h index 00439d30..a4286c6d 100644 --- a/dnn/src/cuda/convolution/backward_data/algo.h +++ b/dnn/src/cuda/convolution/backward_data/algo.h @@ -127,6 +127,9 @@ public: if (m_attr.is_reproducible) { ret |= AlgoAttribute::REPRODUCIBLE; } + if (m_attr.accuracy_depend_on_batch) { + ret |= AlgoAttribute::ACCURACY_DEPEND_ON_BATCH; + } return ret; } cudnnConvolutionBwdDataAlgo_t cudnn_enum() const { return m_cudnn_enum; } @@ -158,7 +161,8 @@ public: const char* name() const override { return "MATMUL"; } MEGDNN_DECL_ALGO_TYPE(CUDA_MATMUL) AlgoAttribute attribute() const override { - return AlgoAttribute::REPRODUCIBLE; + return AlgoAttribute::REPRODUCIBLE | + AlgoAttribute::ACCURACY_DEPEND_ON_BATCH; } }; diff --git a/dnn/src/cuda/convolution/backward_filter/algo.h b/dnn/src/cuda/convolution/backward_filter/algo.h index 73cd48a2..3d38d650 100644 --- a/dnn/src/cuda/convolution/backward_filter/algo.h +++ b/dnn/src/cuda/convolution/backward_filter/algo.h @@ -123,6 +123,9 @@ public: if (m_attr.is_reproducible) { ret |= AlgoAttribute::REPRODUCIBLE; } + if (m_attr.accuracy_depend_on_batch) { + ret |= AlgoAttribute::ACCURACY_DEPEND_ON_BATCH; + } return ret; } @@ -155,7 +158,8 @@ public: const char* name() const override { return "MATMUL"; } MEGDNN_DECL_ALGO_TYPE(CUDA_MATMUL) AlgoAttribute attribute() const override { - return AlgoAttribute::REPRODUCIBLE; + return AlgoAttribute::REPRODUCIBLE | + AlgoAttribute::ACCURACY_DEPEND_ON_BATCH; } }; diff --git a/dnn/src/cuda/convolution3d/backward_data/algo.h b/dnn/src/cuda/convolution3d/backward_data/algo.h index fdedb99e..2cdcf873 100644 --- a/dnn/src/cuda/convolution3d/backward_data/algo.h +++ b/dnn/src/cuda/convolution3d/backward_data/algo.h @@ -119,6 +119,9 @@ public: if (m_attr.is_reproducible) { ret |= AlgoAttribute::REPRODUCIBLE; } + if (m_attr.accuracy_depend_on_batch) { + ret |= AlgoAttribute::ACCURACY_DEPEND_ON_BATCH; + } return ret; } diff --git a/dnn/src/cuda/convolution3d/backward_filter/algo.h b/dnn/src/cuda/convolution3d/backward_filter/algo.h index e2bd9d42..d3c5c398 100644 --- a/dnn/src/cuda/convolution3d/backward_filter/algo.h +++ b/dnn/src/cuda/convolution3d/backward_filter/algo.h @@ -112,6 +112,9 @@ public: if (m_attr.is_reproducible) { ret |= AlgoAttribute::REPRODUCIBLE; } + if (m_attr.accuracy_depend_on_batch) { + ret |= AlgoAttribute::ACCURACY_DEPEND_ON_BATCH; + } return ret; } diff --git a/dnn/src/cuda/convolution3d/forward/algo.h b/dnn/src/cuda/convolution3d/forward/algo.h index 9c096e5a..96048e98 100644 --- a/dnn/src/cuda/convolution3d/forward/algo.h +++ b/dnn/src/cuda/convolution3d/forward/algo.h @@ -106,7 +106,8 @@ public: const char* name() const override { return "1x1x1"; } AlgoAttribute attribute() const override { - return AlgoAttribute::REPRODUCIBLE; + return AlgoAttribute::REPRODUCIBLE | + AlgoAttribute::ACCURACY_DEPEND_ON_BATCH; } MEGDNN_DECL_ALGO_TYPE(CUDA_1X1X1) }; @@ -126,10 +127,17 @@ public: const char* name() const override { return m_name.c_str(); } AlgoAttribute attribute() const override { - auto ret = static_cast(0); + auto ret = AlgoAttribute::DEFAULT; if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) { ret |= AlgoAttribute::REPRODUCIBLE; } +#define cb(attr) \ + if (m_impl->contain_attribute_all(attr)) { \ + ret |= attr; \ + } + MEGDNN_FOREACH_ALGO_ATTRIBUTE_INHERITABLE(cb) +#undef cb + return ret; } static void modify_size_args(SizeArgs& args, TensorLayout& src_pg, @@ -157,6 +165,9 @@ public: if (m_attr.is_reproducible) { ret |= AlgoAttribute::REPRODUCIBLE; } + if (m_attr.accuracy_depend_on_batch) { + ret |= AlgoAttribute::ACCURACY_DEPEND_ON_BATCH; + } return ret; } diff --git a/dnn/src/cuda/cudnn_wrapper.cpp b/dnn/src/cuda/cudnn_wrapper.cpp index bd2ec42d..5101d4ad 100644 --- a/dnn/src/cuda/cudnn_wrapper.cpp +++ b/dnn/src/cuda/cudnn_wrapper.cpp @@ -470,9 +470,9 @@ void Conv3DDesc::set(const param::Convolution3D& param, const size_t nr_group) { #define V(v) V1(v) #define DEF_NAME(NAME) \ #NAME "v" V(CUDNN_MAJOR) "." V(CUDNN_MINOR) "." V(CUDNN_PATCHLEVEL) -#define DEF_ALGO(NAME, PROD) \ - { \ - NAME, { DEF_NAME(NAME), PROD } \ +#define DEF_ALGO(NAME, PROD1, PROD2) \ + { \ + NAME, { DEF_NAME(NAME), PROD1, PROD2 } \ } #if !(CUDNN_MAJOR >= 6 || CUDNN_MINOR >= 1) @@ -483,19 +483,18 @@ const std::unordered_map CudnnAlgoPack::conv_bwd_data_algos() { static const std::unordered_map - algos = { - DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_0, false), - DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_1, true), - DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT, true), - DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING, true), + algos = + { DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_0, false, false), + DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_1, true, false), + DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT, true, true), + DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING, true, true), #if CUDNN_MAJOR >= 5 - DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD, true), + DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD, true, false), #if CUDNN_MAJOR >= 6 || CUDNN_MINOR >= 1 - DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED, - true), + DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED, true, false), #endif #endif - }; + }; return algos; } @@ -505,15 +504,16 @@ CudnnAlgoPack::conv_bwd_flt_algos() { static const std::unordered_map algos = { - DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0, false), - DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1, true), - DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT, true), - DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3, false), + DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0, false, false), + DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1, true, false), + DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT, true, true), + DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3, false, false), #if CUDNN_MAJOR >= 6 || (CUDNN_MAJOR >= 5 && CUDNN_MINOR >= 1) DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED, - true), + true, false), #if CUDNN_MAJOR >= 6 - DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING, true), + DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING, true, + true), #endif #endif @@ -522,28 +522,30 @@ CudnnAlgoPack::conv_bwd_flt_algos() { return algos; } - const std::unordered_map CudnnAlgoPack::conv_fwd_algos() { static const std::unordered_map - algos = { - DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM, true), - DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM, - true), - DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_GEMM, true), - DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_DIRECT, true), - DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_FFT, true), - DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING, true), + algos = + { DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM, true, false), +#if CUDNN_VERSION == 8004 + DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM, true, true), +#else + DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM, true, false), +#endif + DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_GEMM, true, false), + DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_DIRECT, true, false), + DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_FFT, true, true), + DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING, true, true), #if CUDNN_MAJOR >= 5 - DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD, true), + DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD, true, false), #if CUDNN_MAJOR >= 6 || CUDNN_MINOR >= 1 - DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED, true), + DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED, true, false), #endif #endif - }; + }; return algos; } @@ -553,9 +555,10 @@ CudnnAlgoPack::conv3d_bwd_data_algos() { static const std::unordered_map algos = { - DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_0, false), - DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_1, true), - DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING, true), + DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_0, false, false), + DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_1, true, false), + DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING, true, + true), }; return algos; @@ -568,9 +571,9 @@ CudnnAlgoPack::conv3d_bwd_flt_algos() { static const std::unordered_map algos = { - DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0, false), - DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1, true), - DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3, false), + DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0, false, false), + DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1, true, false), + DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3, false, false), }; return algos; @@ -581,10 +584,15 @@ CudnnAlgoPack::conv3d_fwd_algos() { static const std::unordered_map algos = { - DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM, true), - DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM, - true), - DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING, true), + DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM, true, false), +#if CUDNN_VERSION == 8004 + DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM, true, + true), +#else + DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM, true, + false), +#endif + DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING, true, true), }; return algos; diff --git a/dnn/src/cuda/cudnn_wrapper.h b/dnn/src/cuda/cudnn_wrapper.h index be35b3c1..d07fe2e3 100644 --- a/dnn/src/cuda/cudnn_wrapper.h +++ b/dnn/src/cuda/cudnn_wrapper.h @@ -112,6 +112,7 @@ public: struct Attr { std::string name; bool is_reproducible; + bool accuracy_depend_on_batch; }; static const std::unordered_map diff --git a/dnn/src/cuda/matrix_mul/algos.h b/dnn/src/cuda/matrix_mul/algos.h index fc6394e9..27e3fb6f 100644 --- a/dnn/src/cuda/matrix_mul/algos.h +++ b/dnn/src/cuda/matrix_mul/algos.h @@ -115,7 +115,8 @@ public: MEGDNN_DECL_ALGO_TYPE(CUDA_CUBLAS) AlgoAttribute attribute() const override { return AlgoAttribute::REPRODUCIBLE | - AlgoAttribute::USABLE_DEPEND_ON_SHAPE; + AlgoAttribute::USABLE_DEPEND_ON_SHAPE | + AlgoAttribute::ACCURACY_DEPEND_ON_BATCH; } }; @@ -142,7 +143,8 @@ public: void exec(const ExecArgs& args) const override; MEGDNN_DECL_ALGO_TYPE(CUDA_CUBLASLT) AlgoAttribute attribute() const override { - return AlgoAttribute::REPRODUCIBLE; + return AlgoAttribute::REPRODUCIBLE | + AlgoAttribute::ACCURACY_DEPEND_ON_BATCH; } }; #endif diff --git a/dnn/src/x86/matrix_mul/algos.h b/dnn/src/x86/matrix_mul/algos.h index 7216f22e..e1fe4cb9 100644 --- a/dnn/src/x86/matrix_mul/algos.h +++ b/dnn/src/x86/matrix_mul/algos.h @@ -25,7 +25,8 @@ public: size_t get_workspace(const KernSizeParam&) const override { return 0; } kern_t get_kern(const KernSizeParam&) const override; AlgoAttribute attribute() const override { - return AlgoAttribute::REPRODUCIBLE; + return AlgoAttribute::REPRODUCIBLE | + AlgoAttribute::ACCURACY_DEPEND_ON_BATCH; } PackMode packmode() const override { return PackMode::NO_PACK; } MEGDNN_OVERRIDE_MATMUL_DESC(8, 16, 1, 4, AlgoDataType::FLOAT32, DEFAULT) @@ -36,7 +37,8 @@ public: class MatrixMulImpl::AlgoF32MKLPackA : public AlgoBase { public: AlgoAttribute attribute() const override { - return AlgoAttribute::REPRODUCIBLE; + return AlgoAttribute::REPRODUCIBLE | + AlgoAttribute::ACCURACY_DEPEND_ON_BATCH; } const char* name() const override { return "X86_F32_MKL_PACKA"; } bool usable(const KernSizeParam&) const override; diff --git a/dnn/test/common/accuracy_shake_checker.cpp b/dnn/test/common/accuracy_shake_checker.cpp new file mode 100644 index 00000000..1fe41b80 --- /dev/null +++ b/dnn/test/common/accuracy_shake_checker.cpp @@ -0,0 +1,109 @@ +/** + * \file dnn/test/common/accuracy_shake_checker.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ + +#include "test/common/accuracy_shake_checker.h" + +using namespace megdnn; +using namespace test; + +namespace { + +template +::testing::AssertionResult assert_tensor_binary_eq( + const char* expr0, const char* expr1, const char* /*expr2*/, + const TensorND& v0, const TensorND& v1, const std::string& algo_name) { + ctype* it0_orig = v0.ptr(); + ctype* it1 = v1.ptr(); + ctype* it0 = it0_orig; + auto nr_elem = v1.layout.total_nr_elems(); + auto nr_elem_single_batch = v0.layout.total_nr_elems(); + for (size_t i = 0; i < nr_elem; ++i) { + if (i % nr_elem_single_batch == 0) { + it0 = it0_orig; + } + ctype iv0 = *it0, iv1 = *it1; + + if (!good_float(iv0) || !good_float(iv1) || + memcmp(it0, it1, sizeof(ctype))) { + Index index(v1.layout, i); + return ::testing::AssertionFailure() + << "Unequal value\n" + << "Value of: " << expr1 << "\n" + << " Actual: " << (iv1 + 0) << "\n" + << "Expected: " << expr0 << "\n" + << "Which is: " << (iv0 + 0) << "\n" + << "At index: " << index.to_string() << "/" + << v1.layout.TensorShape::to_string() << "\n" + << " DType: " << v1.layout.dtype.name() << "\n" + << "algo: " << algo_name; + } + + ++it0; + ++it1; + } + + return ::testing::AssertionSuccess(); +} +} // namespace + +::testing::AssertionResult test::__assert_tensor_binary_eq( + const char* expr0, const char* expr1, const char* expr2, + const TensorND& v0, const TensorND& v1, + const Algorithm::Info::Desc& algo) { + bool shape_match = v0.layout[0] == 1; + for (size_t i = 1; i < v0.layout.ndim; ++i) { + shape_match &= v0.layout[i] == v1.layout[i]; + } + if (!shape_match) { + return ::testing::AssertionFailure() + << "Shape mismatch\n" + << "Value of: " << expr1 << "\n" + << " Actual: " << v1.layout.TensorShape::to_string() << "\n" + << "Expected: " << expr0 << "\n" + << "Which is: " << v0.layout.TensorShape::to_string() << "\n" + << "algo: " << algo.name << "\n"; + } + + if (!v0.layout.is_physical_contiguous() || + !v1.layout.is_physical_contiguous()) { + return ::testing::AssertionFailure() + << "layout should be physical contiguous\n" + << "Value of: " << expr1 << "\n" + << " Actual: " << v1.layout.is_physical_contiguous() << "\n" + << "Expected: " << expr0 << "\n" + << "Which is: " << v0.layout.is_physical_contiguous() << "\n" + << "algo: " << algo.name << "\n"; + } + auto dtype = v0.layout.dtype; + if (dtype != v1.layout.dtype) { + return ::testing::AssertionFailure() + << "Data type should match\n" + << "Value of: " << expr1 << "\n" + << " Actual: " << v1.layout.dtype.name() << "\n" + << "Expected: " << expr0 << "\n" + << "Which is: " << v0.layout.dtype.name() << "\n" + << "algo: " << algo.name << "\n"; + } + + switch (dtype.enumv()) { +#define cb(_dt) \ + case DTypeTrait<_dt>::enumv: \ + return assert_tensor_binary_eq::ctype>( \ + expr0, expr1, expr2, v0, v1, algo.name); + MEGDNN_FOREACH_COMPUTING_DTYPE(cb) + MEGDNN_FOREACH_QUANTIZED_DTYPE(cb) +#undef cb + default : megdnn_trap(); + } +} + +// vim: syntax=cpp.doxygen diff --git a/dnn/test/common/accuracy_shake_checker.h b/dnn/test/common/accuracy_shake_checker.h new file mode 100644 index 00000000..efbbe6b9 --- /dev/null +++ b/dnn/test/common/accuracy_shake_checker.h @@ -0,0 +1,396 @@ +/** + * \file dnn/test/common/accuracy_shake_checker.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#pragma once + +#include +#include "megdnn/oprs.h" +#include "src/common/conv_bias.h" +#include "src/common/utils.h" +#include "test/common/checker.h" +#include "test/common/index.h" + +namespace megdnn { +namespace test { + +namespace { + +template +struct BatchTrait { + //! index of batch in tensor, 3 for CHWN4 e.g. + static size_t index_of_batch(const typename Opr::Param&) { return 0; } + + //! indices contain batch in inputs and outputs, src(0) dst(2) for conv e.g. + static std::vector indices_contain_batch; + + static std::vector indices_contain_batch_broadcast; +}; + +template +std::vector BatchTrait::indices_contain_batch = {}; +template +std::vector BatchTrait::indices_contain_batch_broadcast = {}; + +#define DEFAULT_INDEX_OF_BATCH(opr) \ + static size_t index_of_batch(const opr::Param&) { return 0; } + +#define CONV_INDEX_OF_BATCH(opr) \ + static size_t index_of_batch(const opr::Param& p) { \ + if (p.format == opr::Param::Format::CHWN4) { \ + return 3; \ + } \ + return 0; \ + } + +#define OPR_WITHOUT_INPUT_BROADCAST(INDEX_OF_BATCH, opr, idxs, idxs_brdcst) \ + template <> \ + struct BatchTrait { \ + INDEX_OF_BATCH(opr) \ + static std::vector indices_contain_batch; \ + static std::vector indices_contain_batch_broadcast; \ + }; \ + std::vector BatchTrait::indices_contain_batch = idxs; \ + std::vector BatchTrait::indices_contain_batch_broadcast = \ + idxs_brdcst; + +OPR_WITHOUT_INPUT_BROADCAST(DEFAULT_INDEX_OF_BATCH, + megdnn::Convolution3DForward, + (std::initializer_list{0, 2}), {}) +OPR_WITHOUT_INPUT_BROADCAST(DEFAULT_INDEX_OF_BATCH, + megdnn::Convolution3DBackwardData, + (std::initializer_list{1, 2}), {}) +OPR_WITHOUT_INPUT_BROADCAST(DEFAULT_INDEX_OF_BATCH, + megdnn::Convolution3DBackwardFilter, + (std::initializer_list{0, 1}), {}) +OPR_WITHOUT_INPUT_BROADCAST(DEFAULT_INDEX_OF_BATCH, megdnn::BatchedMatrixMul, + (std::initializer_list{0, 1, 2}), {}) + +OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::ConvolutionForward, + (std::initializer_list{0, 2}), {}) +OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, + megdnn::ConvolutionBackwardData, + (std::initializer_list{1, 2}), {}) +OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, + megdnn::ConvolutionBackwardFilter, + (std::initializer_list{0, 1}), {}) +OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::LocalShareForward, + (std::initializer_list{0, 2}), {}) +OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::LocalShareBackwardData, + (std::initializer_list{1, 2}), {}) +OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, + megdnn::LocalShareBackwardFilter, + (std::initializer_list{0, 1}), {}) +OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::DeformableConvForward, + (std::initializer_list{0, 2, 3, 4}), {}) +OPR_WITHOUT_INPUT_BROADCAST( + CONV_INDEX_OF_BATCH, megdnn::DeformableConvBackwardData, + (std::initializer_list{0, 2, 3, 4, 5, 6, 7}), {}) +OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, + megdnn::DeformableConvBackwardFilter, + (std::initializer_list{0, 1, 2, 3}), {}) +OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::BatchConvBiasForward, + (std::initializer_list{0, 1, 2, 3, 4}), {}) +OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::ConvBiasForward, + (std::initializer_list{0, 3, 4}), {2}) +#undef OPR_WITHOUT_INPUT_BROADCAST +#undef DEFAULT_INDEX_OF_BATCH +#undef CONV_INDEX_OF_BATCH + +template +struct LayoutsModifier { + static void on(TensorLayoutArray& layouts, const typename Opr::Param& p, + size_t new_batch_size) { + size_t batch_index = BatchTrait::index_of_batch(p); + for (size_t index : BatchTrait::indices_contain_batch) { + layouts.at(index)[batch_index] = new_batch_size; + } + + for (size_t index : BatchTrait::indices_contain_batch_broadcast) { + if (!check_bias_share_in_channel(layouts.at(index), p.format)) { + layouts.at(index)[batch_index] = new_batch_size; + } + } + } +}; + +#define OPR_NO_BIAS(opr) \ + template <> \ + struct LayoutsModifier { \ + static void on(TensorLayoutArray& layouts, \ + const typename opr::Param& p, size_t new_batch_size) { \ + size_t batch_index = BatchTrait::index_of_batch(p); \ + for (size_t index : BatchTrait::indices_contain_batch) { \ + layouts.at(index)[batch_index] = new_batch_size; \ + } \ + } \ + }; + +OPR_NO_BIAS(megdnn::Convolution3D) +OPR_NO_BIAS(megdnn::BatchedMatrixMul) +#undef OPR_NO_BIAS + +template <> +struct LayoutsModifier { +public: + static void on(TensorLayoutArray& layouts, + const megdnn::MatrixMul::Param& p, + size_t new_batch_size) { + assert(!p.transposeA && !p.transposeB); + MEGDNN_MARK_USED_VAR(p); + layouts.at(0)[0] = new_batch_size; + layouts.at(2)[0] = new_batch_size; + } +}; + +template > +class AlgoGenerator { +public: + AlgoGenerator(ExecutionPolicyAlgoName name) + : m_policy_name{name} {} + + std::vector operator()( + Opr* opr, const CheckerHelper::TensorValueArray& arr) { + TensorLayoutArray layouts; + for (auto&& val : arr) { + layouts.push_back(val.layout); + } + std::vector ret; + megdnn_assert(layouts.size() == OprTrait::arity); + for (auto algo_info : + AlgoProxy::arity>::get_all_algorithms_info( + opr, layouts)) { + if (!(algo_info.attribute & + AlgoAttribute::ACCURACY_DEPEND_ON_BATCH) && + std::regex_match( + algo_info.desc.name, + std::regex("(.*)(" + m_policy_name.name + ")(.*)"))) { + ret.push_back(algo_info.desc); + } else { + continue; + } + } + return ret; + } + +private: + ExecutionPolicyAlgoName m_policy_name; +}; + +} // namespace + +::testing::AssertionResult __assert_tensor_binary_eq( + const char* expr0, const char* expr1, const char* expr2, + const TensorND& v0, const TensorND& v1, + const Algorithm::Info::Desc& algo); + +template > +class AccuracyShakeChecker : public CheckerHelper { +public: + static constexpr int arity_in = OprArityTrait::arity_in; + using Param = typename Opr::Param; + using BeforeExecCallback = std::function( + Opr*, const TensorValueArray&)>; + AccuracyShakeChecker(Handle* handle, bool check_dispatch = false) + : CheckerHelper(handle, check_dispatch), + m_before_exec_callback{AlgoGenerator("")}, + m_param(Param()) {} + + TensorLayoutArray make_layouts(const TensorShapeArray& shapes) { + TensorLayoutArray layouts(shapes.size()); + for (size_t i = 0; i < shapes.size(); ++i) { + DType dt = (m_dtype.find(i) != m_dtype.end() ? m_dtype[i] + : dtype::Float32()); + TensorFormat fmt = + (m_fmt.find(i) != m_fmt.end() ? m_fmt[i] : TensorFormat{}); + layouts[i] = TensorLayout(shapes[i], dt, fmt); + } + return layouts; + } + + /*! + * \brief execute opr on current param/dtype/rng config + * \param shapes input/output shapes, which would be passed as + * arguments to Opr::deduce_layout + * + * Checker would construct TensorLayout vectors from shapes and dtypes, + * and call exec(TensorLayoutArray &). + */ + AccuracyShakeChecker& exec(const TensorShapeArray& shapes) { + exec(make_layouts(shapes)); + return *this; + } + + void exec(TensorLayoutArray layouts); + + AccuracyShakeChecker& set_param(Param p) { + m_param = p; + opr()->param() = p; + return *this; + } + AccuracyShakeChecker& set_dtype(size_t idx, DType dtype) { + m_dtype[idx] = dtype; + return *this; + } + AccuracyShakeChecker& set_rng(size_t idx, RNG* rng) { + m_rng[idx] = rng; + return *this; + } + + //! set a callback to be invoked before executing the operator + AccuracyShakeChecker& set_before_exec_callback( + const BeforeExecCallback& cb) { + m_before_exec_callback = cb; + return *this; + } + + AccuracyShakeChecker& reset_before_exec_callback() { + m_before_exec_callback = nullptr; + return *this; + } + + //! get the opr impl so setting other than param() can be modified + Opr* opr() { + if (!m_opr_cur) { + m_opr_cur = m_handle_cur->create_operator(); + } + return m_opr_cur.get(); + } + +private: + BeforeExecCallback m_before_exec_callback; + Param m_param; + Proxy m_proxy; + std::unique_ptr m_opr_cur; + std::shared_ptr m_tensors_cur_host, + m_tensors_single_batch_host; + + void init_host_values(); + + void check_tensors_ignore_batch( + const TensorValueArray& tensors_single_batch, + const TensorValueArray& tensors, const Algorithm::Info::Desc& desc); +}; + +template +void AccuracyShakeChecker::exec(TensorLayoutArray layouts) { + auto opr_cur = this->opr(); + opr_cur->param() = m_param; + + m_proxy.deduce_layout(opr_cur, layouts); + + TensorLayoutArray layouts_single_batch = layouts; + for (size_t i=0; i::index_of_batch(opr_cur->param())) + << "index of batch should be 0 "; + + LayoutsModifier::on(layouts_single_batch, opr_cur->param(), 1); + + // allocate input + auto tensors_single_batch_storage = + alloc_tensors(m_handle_cur, layouts_single_batch, 0); + m_tensors_single_batch_host = + alloc_tensors(m_handle_naive.get(), layouts_single_batch, 0); + auto tensors_cur_storage = alloc_tensors(m_handle_cur, layouts, 0); + m_tensors_cur_host = + alloc_tensors(m_handle_naive.get(), layouts, 0); + auto &&tensors_single_batch = *tensors_single_batch_storage; + auto &&tensors_single_batch_host = *m_tensors_single_batch_host; + auto &&tensors_cur = *tensors_cur_storage; + auto &&tensors_cur_host = *m_tensors_cur_host; + + // allocate output + auto tensors_single_batch_storage_out = + alloc_tensors(m_handle_naive.get(), layouts_single_batch, 0); + auto tensors_cur_storage_out = + alloc_tensors(m_handle_naive.get(), layouts, 0); + auto &&tensors_single_batch_out = *tensors_single_batch_storage_out; + auto &&tensors_cur_out = *tensors_cur_storage_out; + + init_host_values(); + + copy_tensors_to_device(tensors_cur, tensors_cur_host); + copy_tensors_to_device(tensors_single_batch, tensors_single_batch_host); + + std::vector algo_desc; + if (m_before_exec_callback) { + algo_desc = m_before_exec_callback(opr_cur, tensors_cur); + } else { + algo_desc.push_back({}); + } + for (size_t i = 0; i < algo_desc.size(); ++i) { + opr_cur->execution_policy().algo = algo_desc[i]; + m_proxy.exec(opr_cur, tensors_cur); + m_proxy.exec(opr_cur, tensors_single_batch); + + copy_tensors_from_device(tensors_cur_out, tensors_cur); + copy_tensors_from_device(tensors_single_batch_out, + tensors_single_batch); + + check_tensors_ignore_batch(tensors_single_batch_out, tensors_cur_out, + algo_desc[i]); + } +} + +template +void AccuracyShakeChecker::init_host_values() { + size_t index_of_batch = 0; + auto &&tensors_single_batch = *m_tensors_single_batch_host; + auto &&tensors_cur = *m_tensors_cur_host; + for (size_t i = 0; i < arity_in; ++i) { + auto &&tensor_single_batch = tensors_single_batch[i]; + auto &&tensor_cur = tensors_cur[i]; + auto rng = m_rng[i]; + if (!rng) + rng = m_default_rng.get(); + rng->gen(tensor_single_batch); + + dt_byte* raw_storage_cur = static_cast(tensor_cur.raw_ptr) + + tensor_cur.layout.span().low_byte; + dt_byte* raw_storage_single_batch = + static_cast(tensor_single_batch.raw_ptr) + + tensor_single_batch.layout.span().low_byte; + const size_t step = tensor_single_batch.layout.span().dist_byte(); + if (tensor_cur.layout.eq_shape(tensor_single_batch.layout)) { + memcpy(raw_storage_cur, raw_storage_single_batch, step); + } else { + ASSERT_TRUE(1 == tensor_single_batch.layout[index_of_batch]) + << "bad batch size " + << tensor_single_batch.layout[index_of_batch]; + for (size_t b=0; b +void AccuracyShakeChecker::check_tensors_ignore_batch( + const TensorValueArray& tensors_single_batch, + const TensorValueArray& tensors, const Algorithm::Info::Desc& algo) { + for (size_t i = 0; i < tensors_single_batch.size(); ++i) { + if (tensors_single_batch[i].layout.ndim == 0 || + tensors_single_batch[i].layout.eq_shape(tensors[i].layout)) + continue; + ASSERT_PRED_FORMAT3(::megdnn::test::__assert_tensor_binary_eq, + tensors_single_batch[i], tensors[i], algo); + } +} + +} // namespace test +} // namespace megdnn + +// vim: syntax=cpp.doxygen diff --git a/dnn/test/common/checker.cpp b/dnn/test/common/checker.cpp index 7d396ab1..3e2fab9f 100644 --- a/dnn/test/common/checker.cpp +++ b/dnn/test/common/checker.cpp @@ -19,50 +19,6 @@ using namespace megdnn; using namespace test; namespace { - bool good_float(float val) { - return std::isfinite(val); - } - - bool good_float(int) { - return true; - } - - bool good_float(dt_qint8) { - return true; - } - - bool good_float(dt_qint16) { - return true; - } - - bool good_float(dt_quint8) { - return true; - } - - bool good_float(dt_qint32) { - return true; - } - - // A hack for the (x+0) promote to int trick on dt_quint8. - int operator +(dt_quint8 lhs, int rhs) { - megdnn_assert(rhs == 0, "unexpected rhs"); - return lhs.as_uint8(); - } - - int operator +(dt_qint32 lhs, int rhs) { - megdnn_assert(rhs == 0, "unexpected rhs"); - return lhs.as_int32(); - } - - int operator +(dt_qint8 lhs, int rhs) { - megdnn_assert(rhs == 0, "unexpected rhs"); - return int8_t(lhs); - } - - int operator +(dt_qint16 lhs, int rhs) { - megdnn_assert(rhs == 0, "unexpected rhs"); - return lhs.as_int16(); - } template ::testing::AssertionResult assert_tensor_eq_with_iter( diff --git a/dnn/test/common/checker.h b/dnn/test/common/checker.h index dd749b51..eff092c5 100644 --- a/dnn/test/common/checker.h +++ b/dnn/test/common/checker.h @@ -86,6 +86,7 @@ protected: size_t m_offset = 0; CheckerHelper(Handle* handle, bool check_dispatch = true); + ~CheckerHelper() noexcept; using OprExec = std::function; @@ -100,14 +101,15 @@ protected: void enable_contig_naive() { m_enable_contig_naive = true; } -private: - std::shared_ptr m_tensors_naive; - - void init_naive_values(); void copy_tensors_to_device(const TensorValueArray& dest, const TensorValueArray& src); void copy_tensors_from_device(const TensorValueArray& dest, const TensorValueArray& src); + +private: + std::shared_ptr m_tensors_naive; + + void init_naive_values(); void check_tensors(const TensorValueArray& expected, const TensorValueArray& computed); }; diff --git a/dnn/test/common/utils.h b/dnn/test/common/utils.h index c87573d7..5cebb001 100644 --- a/dnn/test/common/utils.h +++ b/dnn/test/common/utils.h @@ -311,6 +311,51 @@ public: size_t get_cpu_count(); +static inline bool good_float(float val) { + return std::isfinite(val); +} + +static inline bool good_float(int) { + return true; +} + +static inline bool good_float(dt_qint8) { + return true; +} + +static inline bool good_float(dt_qint16) { + return true; +} + +static inline bool good_float(dt_quint8) { + return true; +} + +static inline bool good_float(dt_qint32) { + return true; +} + +// A hack for the (x+0) promote to int trick on dt_quint8. +static inline int operator+(dt_quint8 lhs, int rhs) { + megdnn_assert(rhs == 0, "unexpected rhs"); + return lhs.as_uint8(); +} + +static inline int operator+(dt_qint32 lhs, int rhs) { + megdnn_assert(rhs == 0, "unexpected rhs"); + return lhs.as_int32(); +} + +static inline int operator+(dt_qint8 lhs, int rhs) { + megdnn_assert(rhs == 0, "unexpected rhs"); + return int8_t(lhs); +} + +static inline int operator+(dt_qint16 lhs, int rhs) { + megdnn_assert(rhs == 0, "unexpected rhs"); + return lhs.as_int16(); +} + } // namespace test static inline bool operator==(const TensorLayout& a, const TensorLayout& b) { diff --git a/dnn/test/cuda/accuracy_shake.cpp b/dnn/test/cuda/accuracy_shake.cpp new file mode 100644 index 00000000..ed93afe8 --- /dev/null +++ b/dnn/test/cuda/accuracy_shake.cpp @@ -0,0 +1,247 @@ +/** + * \file dnn/test/cuda/accuracy_shake.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ +#include "megdnn/dtype.h" +#include "megdnn/oprs.h" +#include "megdnn/opr_param_defs.h" +#include "test/cuda/fixture.h" +#include "test/cuda/utils.h" +#include "test/common/rng.h" +#include "test/common/accuracy_shake_checker.h" + +namespace megdnn { +namespace test { + +TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD) { + require_compute_capability(6, 1); + AccuracyShakeChecker checker(handle_cuda()); + NormalRNG default_rng; + checker.set_dtype(0, dtype::Float32()) + .set_dtype(1, dtype::Float32()) + .set_dtype(2, dtype::Float32()) + .set_rng(0, &default_rng) + .set_rng(1, &default_rng); + // convolution + checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {}, {}, {}}); + // convbias without z + checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {1, 64, 1, 1}, {}, {}}); + // convbias with z + checker.exec({{64, 16, 32, 32}, + {64, 16, 3, 3}, + {1, 64, 1, 1}, + {64, 64, 30, 30}, + {}}); + ConvBias::Param param; + // group + param.sparse = ConvBias::Param::Sparse::GROUP; + checker.set_param(param); + checker.exec({{64, 16, 32, 32}, {2, 32, 8, 3, 3}, {}, {}, {}}); + checker.exec({{64, 16, 32, 32}, {2, 32, 8, 3, 3}, {1, 64, 1, 1}, {}, {}}); + checker.exec({{64, 16, 32, 32}, + {2, 32, 8, 3, 3}, + {1, 64, 1, 1}, + {64, 64, 30, 30}, + {}}); +} + +TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NCHW) { + require_compute_capability(6, 1); + AccuracyShakeChecker checker(handle_cuda()); + UniformIntRNG int_rng{-128, 127}; + + checker.set_dtype(0, dtype::QuantizedS8(2.5f)) + .set_dtype(1, dtype::QuantizedS8(2.5f)) + .set_dtype(2, dtype::QuantizedS32(6.25f)) + .set_dtype(3, dtype::QuantizedS8(0.25f)) + .set_dtype(4, dtype::QuantizedS8(0.25f)) + .set_rng(0, &int_rng) + .set_rng(1, &int_rng) + .set_rng(2, &int_rng) + .set_rng(3, &int_rng); + + + // convolution + checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {}, {}, {}}); + // convbias without z + checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {1, 64, 1, 1}, {}, {}}); + // convbias with z + checker.exec({{64, 16, 32, 32}, + {64, 16, 3, 3}, + {1, 64, 1, 1}, + {64, 64, 30, 30}, + {}}); + // group + ConvBias::Param param; + param.sparse = ConvBias::Param::Sparse::GROUP; + checker.set_param(param); + checker.exec({{64, 16, 32, 32}, {2, 32, 8, 3, 3}, {}, {}, {}}); + checker.exec({{64, 16, 32, 32}, {2, 32, 8, 3, 3}, {1, 64, 1, 1}, {}, {}}); + checker.exec({{64, 16, 32, 32}, + {2, 32, 8, 3, 3}, + {1, 64, 1, 1}, + {64, 64, 30, 30}, + {}}); +} + +TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NHWC) { + require_compute_capability(6, 1); + + UniformIntRNG int_rng{-50, 50}; + AccuracyShakeChecker checker(handle_cuda()); + ConvBias::Param param; + param.format = ConvBias::Param::Format::NHWC; + checker.set_dtype(0, dtype::QuantizedS8(2.5f)) + .set_dtype(1, dtype::QuantizedS8(2.5f)) + .set_dtype(2, dtype::QuantizedS32(6.25f)) + .set_dtype(4, dtype::QuantizedS8(60.25f)) + .set_rng(0, &int_rng) + .set_rng(1, &int_rng) + .set_rng(2, &int_rng) + .set_param(param); + checker.exec({{20, 32, 32, 4}, {24, 1, 1, 4}, {1, 1, 1, 24}, {}, {}}); + + param.sparse = ConvBias::Param::Sparse::GROUP; + checker.set_param(param).exec( + {{20, 32, 32, 16}, {4, 4, 1, 1, 4}, {1, 1, 1, 16}, {}, {}}); +} + +TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NCHWX) { + using Format = ConvBias::Param::Format; + require_compute_capability(6, 1); + AccuracyShakeChecker checker(handle_cuda()); + UniformIntRNG int_rng{-5, 5}; + UniformFloatRNG float_rng{-50, 50}; + + checker.set_dtype(0, dtype::QuantizedS8(1.2f)) + .set_dtype(1, dtype::QuantizedS8(1.3f)) + .set_dtype(2, dtype::QuantizedS32(1.2 * 1.3f)) + .set_dtype(3, dtype::QuantizedS8(1.3f)) + .set_dtype(4, dtype::QuantizedS8(1.3f)) + .set_rng(0, &int_rng) + .set_rng(1, &int_rng) + .set_rng(2, &int_rng) + .set_rng(3, &int_rng); + + auto run = [&](const TensorShapeArray& shapes, const Format& format) { + ConvBias::Param param; + param.format = format; + checker.set_param(param).exec( + {shapes[0], shapes[1], shapes[2], {}, {}}); + }; + + run({{20, 2, 24, 24, 4}, {24, 2, 3, 3, 4}, {1, 6, 1, 1, 4}}, Format::NCHW4); + run({{20, 1, 24, 24, 32}, {64, 1, 3, 3, 32}, {1, 2, 1, 1, 32}}, + Format::NCHW32); + run({{16, 4, 23, 40, 4}, + {32, 4, 3, 3, 4}, + {1, 1, 1, 1, 32}}, Format::NCHW4_NCHW32); + + checker.set_dtype(0, dtype::QuantizedS8(1.9980618f)) + .set_dtype(1, dtype::QuantizedS8(1.9980927f)) + .set_dtype(2, dtype::Float32()) + .set_dtype(3, dtype::Float32()) + .set_dtype(4, dtype::Float32()) + .set_rng(0, &int_rng) + .set_rng(1, &int_rng) + .set_rng(2, &float_rng) + .set_rng(3, &float_rng); + run({{16, 4, 92, 160, 4}, {20, 4, 3, 3, 4}, {1, 20, 1, 1}}, + Format::NCHW4_NCHW); +} + +TEST_F(CUDA, SHAKE_MATRIX_MUL_FORWARD) { + AccuracyShakeChecker checker(handle_cuda()); + + checker.set_dtype(0, dtype::Float32()) + .set_dtype(1, dtype::Float32()) + .set_dtype(2, dtype::Float32()) + .exec({{50, 100}, {100, 60}, {}}); +} + +TEST_F(CUDA, SHAKE_BATCH_CONV_BIAS_QS8) { + require_compute_capability(6, 1); + AccuracyShakeChecker checker(handle_cuda()); + UniformIntRNG const_rng{1, 1}; + UniformIntRNG rng{-5, 5}; + UniformIntRNG bias_rng{-50, 50}; + checker.set_rng(0, &rng) + .set_rng(1, &rng) + .set_rng(2, &rng) + .set_rng(3, &rng) + .set_dtype(0, dtype::QuantizedS8{1.2f}) + .set_dtype(1, dtype::QuantizedS8{1.3f}) + .set_dtype(2, dtype::QuantizedS32{1.2f * 1.3f}) + .set_dtype(3, dtype::QuantizedS8{1.1f}) + .set_dtype(4, dtype::QuantizedS8{1.1f}); + param::BatchConvBias param; + param.pad_h = 2, param.pad_w = 1; + param.stride_h = 1, param.stride_w = 2; + param.format = param::BatchConvBias::Format::NCHW4; + checker.set_param(param).exec({{32, 4, 24, 24, 4}, + {32, 32, 4, 1, 1, 4}, + {1, 8, 1, 1, 4}, + {}, + {}}); +} + +TEST_F(CUDA, SHAKE_BATCHED_MATRIX_MUL) { + AccuracyShakeChecker checker(handle_cuda()); + + UniformIntRNG int_rng{-127, 127}; + NormalRNG default_rng; + checker.set_dtype(0, dtype::QuantizedS8(1.2f)) + .set_dtype(1, dtype::QuantizedS8(1.3f)) + .set_dtype(2, {}) + .set_rng(0, &int_rng) + .set_rng(1, &int_rng); + + checker.exec({{20, 424, 368}, {20, 368, 256}, {20, 424, 256}}); + + checker.set_dtype(0, dtype::Float32()) + .set_dtype(1, dtype::Float32()) + .set_dtype(2, dtype::Float32()) + .set_rng(0, &default_rng) + .set_rng(1, &default_rng); + + checker.exec({{20, 424, 368}, {20, 368, 256}, {20, 424, 256}}); +} + +TEST_F(CUDA, SHAKE_CONVOLUTION3D_FORWARD) { + AccuracyShakeChecker checker(handle_cuda()); + NormalRNG default_rng; + float scale = 1.0f / sqrt(5); + UniformFloatRNG rng(scale, 2 * scale); + param::Convolution3D param; + param.mode = param::Convolution3D::Mode::CROSS_CORRELATION; + param.stride_d = param.stride_h = param.stride_w = 2; + param.pad_d = param.pad_h = param.pad_w = 0; + param.dilate_d = param.dilate_h = param.dilate_w = 1; + checker.set_dtype(0, dtype::Float32()) + .set_dtype(1, dtype::Float32()) + .set_rng(0, &default_rng) + .set_rng(1, &default_rng) + .set_param(param) + .exec({{20, 5, 12, 12, 16}, {5, 5, 3, 3, 3}, {}}); +} + +TEST_F(CUDA, SHAKE_LOCAL_SHARE) { + AccuracyShakeChecker checker(handle_cuda()); + using Param = LocalShare::Param; + Param param; + param.spatial_groups_h = param.spatial_groups_w = 3; + checker.set_param(param); + checker.exec({{20, 16, 32, 32}, {3, 3, 16, 3, 3, 64}, {}}); +} + +} // namespace test +} // namespace megdnn + +// vim: syntax=cpp.doxygen diff --git a/dnn/test/cuda/convolution.cpp b/dnn/test/cuda/convolution.cpp index 95f46a6c..7bde5223 100644 --- a/dnn/test/cuda/convolution.cpp +++ b/dnn/test/cuda/convolution.cpp @@ -20,6 +20,7 @@ #include "test/common/rng.h" #include "test/cuda/benchmark.h" #include "src/cuda/utils.h" +#include "test/common/accuracy_shake_checker.h" #define V1(x) #x #define V(x) V1(x) diff --git a/dnn/test/x86/accuracy_shake.cpp b/dnn/test/x86/accuracy_shake.cpp new file mode 100644 index 00000000..2e43c33e --- /dev/null +++ b/dnn/test/x86/accuracy_shake.cpp @@ -0,0 +1,104 @@ +/** + * \file dnn/test/x86/accuracy_shake.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ +#include "test/x86/fixture.h" + +#include "megdnn/opr_param_defs.h" +#include "megdnn/oprs.h" +#include "test/common/accuracy_shake_checker.h" +#include "test/common/convolution.h" +#include "test/common/rng.h" +#include "test/common/tensor.h" +#include "test/common/workspace_wrapper.h" + +namespace megdnn { +namespace test { + +TEST_F(X86, SHAKE_CONV_BIAS_FORWARD) { + AccuracyShakeChecker checker(handle()); + NormalRNG default_rng; + checker.set_dtype(0, dtype::Float32()) + .set_dtype(1, dtype::Float32()) + .set_dtype(2, dtype::Float32()) + .set_rng(0, &default_rng) + .set_rng(1, &default_rng); + checker.set_before_exec_callback(AlgoGenerator("X86")); + // convolution + checker.exec({{6, 16, 32, 32}, {64, 16, 3, 3}, {}, {}, {}}); + // convbias without z + checker.exec({{6, 16, 32, 32}, {64, 16, 3, 3}, {1, 64, 1, 1}, {}, {}}); + // convbias with z + checker.exec({{6, 16, 32, 32}, + {64, 16, 3, 3}, + {1, 64, 1, 1}, + {6, 64, 30, 30}, + {}}); + // group + ConvBias::Param param; + param.sparse = ConvBias::Param::Sparse::GROUP; + checker.set_param(param); + checker.exec({{6, 16, 32, 32}, {2, 32, 8, 3, 3}, {}, {}, {}}); + checker.exec({{6, 16, 32, 32}, {2, 32, 8, 3, 3}, {1, 64, 1, 1}, {}, {}}); + checker.exec({{6, 16, 32, 32}, + {2, 32, 8, 3, 3}, + {1, 64, 1, 1}, + {6, 64, 30, 30}, + {}}); +} + +TEST_F(X86, SHAKE_CONV_BIAS_FORWARD_INT8) { + AccuracyShakeChecker checker(handle()); + UniformIntRNG rng{-50, 50}; + checker.set_dtype(0, dtype::QuantizedS8(2.5f)) + .set_dtype(1, dtype::QuantizedS8(2.5f)) + .set_dtype(2, dtype::QuantizedS32(6.25f)) + .set_dtype(3, dtype::QuantizedS32(6.25f)) + .set_dtype(4, {}) + .set_rng(0, &rng) + .set_rng(1, &rng) + .set_rng(2, &rng); + checker.set_before_exec_callback(AlgoGenerator("X86")); + // convolution + checker.exec({{6, 16, 32, 32}, {64, 16, 3, 3}, {}, {}, {}}); + // convbias without z + checker.exec({{6, 16, 32, 32}, {64, 16, 3, 3}, {1, 64, 1, 1}, {}, {}}); + // convbias with z + checker.exec({{6, 16, 32, 32}, + {64, 16, 3, 3}, + {1, 64, 1, 1}, + {6, 64, 30, 30}, + {}}); + // group + ConvBias::Param param; + param.sparse = ConvBias::Param::Sparse::GROUP; + checker.set_param(param); + checker.exec({{6, 16, 32, 32}, {2, 32, 8, 3, 3}, {}, {}, {}}); + checker.exec({{6, 16, 32, 32}, {2, 32, 8, 3, 3}, {1, 64, 1, 1}, {}, {}}); + checker.exec({{6, 16, 32, 32}, + {2, 32, 8, 3, 3}, + {1, 64, 1, 1}, + {6, 64, 30, 30}, + {}}); +} + +TEST_F(X86, SHAKE_MATRIX_MUL_FORWARD) { + AccuracyShakeChecker checker(handle()); + + checker.set_dtype(0, dtype::Float32()) + .set_dtype(1, dtype::Float32()) + .set_dtype(2, dtype::Float32()) + .exec({{20, 100}, {100, 60}, {}}); +} + +} // namespace test +} // namespace megdnn + +// vim: syntax=cpp.doxygen diff --git a/dnn/test/x86/convolution.cpp b/dnn/test/x86/convolution.cpp index 2f5a0dc8..b8227e66 100644 --- a/dnn/test/x86/convolution.cpp +++ b/dnn/test/x86/convolution.cpp @@ -15,6 +15,7 @@ #include "megdnn/oprs.h" #include "test/common/benchmarker.h" #include "test/common/checker.h" +#include "test/common/accuracy_shake_checker.h" #include "test/common/convolution.h" #include "test/common/rng.h" #include "test/common/tensor.h" diff --git a/src/opr/include/megbrain/opr/search_policy/profiler.h b/src/opr/include/megbrain/opr/search_policy/profiler.h index 486563cb..489fcad6 100644 --- a/src/opr/include/megbrain/opr/search_policy/profiler.h +++ b/src/opr/include/megbrain/opr/search_policy/profiler.h @@ -18,9 +18,7 @@ #include "megbrain/comp_node.h" #include "megdnn/basic_types.h" -#include "megdnn/oprs/base.h" -#include "megdnn/oprs/linalg.h" -#include "megdnn/oprs/nn.h" +#include "megdnn/oprs.h" namespace mgb { namespace opr { @@ -46,39 +44,6 @@ namespace opr { // clang-format on template -struct OprArityTrait; - -template -struct OprArityTraitTmpl { - static constexpr int arity_in = _arity_in; - static constexpr int arity_out = _arity_out; - static constexpr int arity = arity_in + arity_out; -}; - -#define INST_ARITY(_Opr, _in, _out) \ - template <> \ - struct OprArityTrait<_Opr> : public OprArityTraitTmpl<_Opr, _in, _out> {}; - -INST_ARITY(megdnn::ConvolutionBackwardData, 2, 1); -INST_ARITY(megdnn::ConvolutionBackwardFilter, 2, 1); -INST_ARITY(megdnn::Convolution3DForward, 2, 1); -INST_ARITY(megdnn::Convolution3DBackwardData, 2, 1); -INST_ARITY(megdnn::Convolution3DBackwardFilter, 2, 1); -INST_ARITY(megdnn::LocalShareForward, 2, 1); -INST_ARITY(megdnn::LocalShareBackwardData, 2, 1); -INST_ARITY(megdnn::LocalShareBackwardFilter, 2, 1); -INST_ARITY(megdnn::Convolution, 2, 1); -INST_ARITY(megdnn::DeformableConvForward, 4, 1); -INST_ARITY(megdnn::DeformableConvBackwardFilter, 4, 1); -INST_ARITY(megdnn::BatchConvBiasForward, 4, 1); -INST_ARITY(megdnn::ConvBias, 4, 1); -INST_ARITY(megdnn::DeformableConvBackwardData, 5, 3); -INST_ARITY(megdnn::MatrixMul, 2, 1); -INST_ARITY(megdnn::BatchedMatrixMul, 2, 1); - -#undef INST_ARITY - -template constexpr bool opr_supports_preprocess() { return std::is_same::value || std::is_same::value;