|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464 |
- /**
- * \file dnn/test/cuda/convolution3d.cpp
- * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
- *
- * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
- #include "test/common/convolution3d.h"
- #include "megdnn/opr_param_defs.h"
- #include "megdnn/oprs.h"
- #include "src/cuda/utils.h"
- #include "test/common/benchmarker.h"
- #include "test/common/checker.h"
- #include "test/common/rng.h"
- #include "test/common/tensor.h"
- #include "test/common/workspace_wrapper.h"
- #include "test/cuda/fixture.h"
-
- namespace megdnn {
- namespace test {
-
- #if 0
- TEST_F(CUDA, CONVOLUTION3D_8X8X32) {
- if (!cuda::is_compute_capability_required(6, 1)) {
- printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device"
- "doesn't support\n");
- return;
- }
- using namespace convolution3d;
- std::vector<TestArg> args;
- {
- auto v = get_args();
- for (auto&& a : v) {
- args.push_back(std::move(a));
- }
- }
- /*
- {
- auto v = get_dilated_args();
- for (auto &&a: v) {
- args.push_back(std::move(a));
- }
- }
- {
- auto v = get_chanwise_args();
- for (auto &&a: v) {
- args.push_back(std::move(a));
- }
- }
- */
- Checker<Convolution3DForward> checker(handle_cuda());
- UniformIntRNG rng(-4, 4);
- UniformIntRNG rng_same(1, 1);
- for (auto arg : args) {
- arg.param.format = param::Convolution3D::Format::NDHWC;
- arg.param.data_type = param::Convolution3D::DataType::INT8x8x32;
- arg.src = cvt_src_or_dst_ncdhw2ndhwc(arg.src);
- arg.filter = cvt_filter_ncdhw2ndhwc(arg.filter);
- checker.set_dtype(0, dtype::Int8())
- .set_dtype(1, dtype::Int8())
- .set_dtype(2, dtype::Int32())
- .set_param(arg.param)
- .set_rng(0, &rng)
- .set_rng(1, &rng)
- .execs({arg.src, arg.filter, {}});
- }
- }
- #endif
-
- TEST_F(CUDA, CONVOLUTION3D_FORWARD) {
- using namespace convolution3d;
- std::vector<TestArg> args = get_args();
- /*
- {
- auto v = get_chanwise_args();
- for (auto&& a : v) {
- args.push_back(std::move(a));
- }
- }
- {
- auto v = get_dilated_args();
- for (auto&& a : v) {
- args.push_back(std::move(a));
- }
- }
- */
- bool fp16_checked = false;
- Checker<Convolution3DForward> checker(handle_cuda());
- NormalRNG default_rng;
- for (auto&& arg : args) {
- float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
- arg.filter[4]);
- UniformFloatRNG rng(scale, 2 * scale);
- checker.set_dtype(0, dtype::Float32())
- .set_dtype(1, dtype::Float32())
- .set_rng(0, &default_rng)
- .set_rng(1, &default_rng)
- .set_epsilon(1e-3)
- .set_param(arg.param)
- .execs({arg.src, arg.filter, {}});
- if (!fp16_checked || arg.src.total_nr_elems() >= 1000)
- continue;
- checker.set_dtype(0, dtype::Float16())
- .set_dtype(1, dtype::Float16())
- .set_rng(0, &rng)
- .set_rng(1, &rng)
- .set_epsilon(1e-1)
- .set_param(arg.param)
- .execs({arg.src, arg.filter, {}});
- }
- }
-
- TEST_F(CUDA, CONVOLUTION3D_1X1X1_FORWARD) {
- using namespace convolution3d;
- std::vector<TestArg> args = get_1x1x1_args();
- Checker<Convolution3DForward> checker(handle_cuda());
- NormalRNG default_rng;
- for (auto&& arg : args) {
- float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
- arg.filter[4]);
- UniformFloatRNG rng(scale, 2 * scale);
- checker.set_dtype(0, dtype::Float32())
- .set_dtype(1, dtype::Float32())
- .set_rng(0, &default_rng)
- .set_rng(1, &default_rng)
- .set_epsilon(1e-3)
- .set_param(arg.param)
- .execs({arg.src, arg.filter, {}});
- }
- }
-
- TEST_F(CUDA, CONVOLUTION3D_MATMUL_FORWARD) {
- using namespace convolution3d;
- std::vector<TestArg> args = get_args();
- Checker<Convolution3DForward> checker(handle_cuda());
- NormalRNG default_rng;
- for (auto&& arg : args) {
- float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
- arg.filter[4]);
- UniformFloatRNG rng(scale, 2 * scale);
- checker.set_dtype(0, dtype::Float32())
- .set_dtype(1, dtype::Float32())
- .set_rng(0, &default_rng)
- .set_rng(1, &default_rng)
- .set_param(arg.param)
- .execs({arg.src, arg.filter, {}});
- }
- }
-
- TEST_F(CUDA, CONVOLUTION3D_FORWARD_NONCONTIG_CUDNN) {
- using namespace convolution3d;
- Checker<Convolution3DForward> checker(handle_cuda());
- checker.set_before_exec_callback(AlgoChecker<Convolution3DForward>("CUDNN"));
- param::Convolution3D param;
- param.pad_d = param.pad_h = param.pad_w = 1;
- checker.set_dtype(0, dtype::Float32())
- .set_dtype(1, dtype::Float32())
- .set_epsilon(1e-3);
-
- //! noncontiguous case
- {
- checker.set_param(param).execl(TensorLayoutArray{
- {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()},
- {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()},
- {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()}});
- }
- }
-
- TEST_F(CUDA, CONVOLUTION3D_FORWARD_NONCONTIG_INPLACE_MATMUL) {
- using namespace convolution3d;
- Checker<Convolution3DForward> checker(handle_cuda());
- checker.set_before_exec_callback(
- AlgoChecker<Convolution3DForward>("INPLACE_MATMUL"));
- param::Convolution3D param;
- param.pad_d = param.pad_h = param.pad_w = 1;
- checker.set_dtype(0, dtype::Float32())
- .set_dtype(1, dtype::Float32())
- .set_epsilon(1e-3);
-
- //! noncontiguous case
- {
- checker.set_param(param).execl(TensorLayoutArray{
- {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()},
- {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()},
- {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()}});
- }
- }
-
- TEST_F(CUDA, CONVOLUTION3D_FORWARD_NONCONTIG_1x1x1) {
- using namespace convolution3d;
- Checker<Convolution3DForward> checker(handle_cuda());
- checker.set_before_exec_callback(AlgoChecker<Convolution3DForward>("1x1x1"));
- param::Convolution3D param;
- checker.set_dtype(0, dtype::Float32())
- .set_dtype(1, dtype::Float32())
- .set_epsilon(1e-3);
-
- //! noncontiguous case
- {
- checker.set_param(param).execl(TensorLayoutArray{
- {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()},
- {{5, 5, 1, 1, 1}, {5, 1, 1, 1, 1}, dtype::Float32()},
- {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()}});
- }
- }
-
- #if MEGDNN_WITH_BENCHMARK
- TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_MATMUL_BACKWARD_FILTER) {
- using namespace convolution3d;
- std::vector<TestArg> args = get_speed_test_args();
- Benchmarker<Convolution3DBackwardFilter> marker(handle_cuda());
- NormalRNG default_rng;
- for (auto&& arg : args) {
- float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
- arg.filter[4]);
- auto src = TensorLayout(arg.src, dtype::Float32());
- auto filter = TensorLayout(arg.filter, dtype::Float32());
- TensorLayout dst;
- auto opr = handle_cuda()->create_operator<Convolution3D>();
- opr->param() = arg.param;
- opr->deduce_layout(src, filter, dst);
- UniformFloatRNG rng(scale, 2 * scale);
- marker.set_dtype(0, dtype::Float32())
- .set_dtype(1, dtype::Float32())
- .set_rng(0, &default_rng)
- .set_rng(1, &default_rng)
- .set_param(arg.param)
- .execs({src, dst, filter});
- }
- }
-
- TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_MATMUL_FORWARD) {
- using namespace convolution3d;
- std::vector<TestArg> args = get_speed_test_args();
- Benchmarker<Convolution3DForward> marker(handle_cuda());
- NormalRNG default_rng;
- for (auto&& arg : args) {
- float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
- arg.filter[4]);
- UniformFloatRNG rng(scale, 2 * scale);
- marker.set_dtype(0, dtype::Float32())
- .set_dtype(1, dtype::Float32())
- .set_rng(0, &default_rng)
- .set_rng(1, &default_rng)
- . // set_param(arg.param).
- execs({arg.src, arg.filter, {}});
- }
- }
-
- TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_1X1X1_FORWARD) {
- using namespace convolution3d;
- std::vector<TestArg> args = get_1x1x1_args();
- Benchmarker<Convolution3DForward> marker(handle_cuda());
- NormalRNG default_rng;
- for (auto&& arg : args) {
- float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
- arg.filter[4]);
- UniformFloatRNG rng(scale, 2 * scale);
- marker.set_dtype(0, dtype::Float32())
- .set_dtype(1, dtype::Float32())
- .set_rng(0, &default_rng)
- .set_rng(1, &default_rng)
- .
- // set_param(arg.param).
- execs({arg.src, arg.filter, {}});
- }
- }
-
- TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_FORWARD) {
- using namespace convolution3d;
- std::vector<TestArg> args = get_args();
- {
- auto v = get_chanwise_args();
- for (auto&& a : v)
- args.push_back(std::move(a));
- }
- {
- auto v = get_1x1x1_args();
- for (auto&& a : v)
- args.push_back(std::move(a));
- }
- {
- auto v = get_dilated_args();
- for (auto&& a : v)
- args.push_back(std::move(a));
- }
- Benchmarker<Convolution3DForward> marker(handle_cuda());
- NormalRNG default_rng;
- for (auto&& arg : args) {
- float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
- arg.filter[4]);
- UniformFloatRNG rng(scale, 2 * scale);
- marker.set_dtype(0, dtype::Float32())
- .set_dtype(1, dtype::Float32())
- .set_rng(0, &default_rng)
- .set_rng(1, &default_rng)
- .set_param(arg.param)
- .execs({arg.src, arg.filter, {}});
- marker.set_dtype(0, dtype::Float16())
- .set_dtype(1, dtype::Float16())
- .set_rng(0, &rng)
- .set_rng(1, &rng)
- .set_param(arg.param)
- .execs({arg.src, arg.filter, {}});
- }
- }
-
- #endif
-
- TEST_F(CUDA, CONVOLUTION3D_BACKWARD_DATA) {
- using namespace convolution3d;
- std::vector<TestArg> args = get_args();
- Checker<Convolution3DBackwardData> checker(handle_cuda());
- NormalRNG default_rng;
- for (auto&& arg : args) {
- float scale = 1.0f / sqrt(arg.filter[0] * arg.filter[2] * arg.filter[3] *
- arg.filter[4]);
- UniformFloatRNG rng(scale, 2 * scale);
- auto src = TensorLayout(arg.src, dtype::Float32());
- auto filter = TensorLayout(arg.filter, dtype::Float32());
- TensorLayout dst;
- {
- auto opr = handle_cuda()->create_operator<Convolution3D>();
- opr->param() = arg.param;
- opr->deduce_layout(src, filter, dst);
- }
- src.dtype = dst.dtype = filter.dtype = dtype::Float32();
- checker.set_rng(0, &default_rng)
- .set_rng(1, &default_rng)
- .set_epsilon(1e-3)
- .set_param(arg.param)
- .exec(TensorLayoutArray{filter, dst, src});
- src.dtype = dst.dtype = filter.dtype = dtype::Float16();
- checker.set_rng(0, &rng)
- .set_rng(1, &rng)
- .set_epsilon(1e-1)
- .set_param(arg.param)
- .exec(TensorLayoutArray{filter, dst, src});
- }
- }
-
- TEST_F(CUDA, CONVOLUTION3D_BACKWARD_FILTER) {
- using namespace convolution3d;
- std::vector<TestArg> args = get_args();
- Checker<Convolution3DBackwardFilter> checker(handle_cuda());
- NormalRNG default_rng;
- for (auto&& arg : args) {
- auto src = TensorLayout(arg.src, dtype::Float32());
- auto filter = TensorLayout(arg.filter, dtype::Float32());
- TensorLayout dst;
- {
- auto opr = handle_cuda()->create_operator<Convolution3D>();
- opr->param() = arg.param;
- opr->deduce_layout(src, filter, dst);
- }
- float scale = 1.0f / sqrt(dst[0] * dst[2] * dst[3] * dst[4]);
- UniformFloatRNG rng(scale, 2 * scale);
- src.dtype = dst.dtype = filter.dtype = dtype::Float32();
- checker.set_rng(0, &default_rng)
- .set_rng(1, &default_rng)
- .set_epsilon(1e-3)
- .set_param(arg.param)
- .exec(TensorLayoutArray{src, dst, filter});
-
- if (dst.total_nr_elems() >= 1000)
- continue;
- src.dtype = dst.dtype = filter.dtype = dtype::Float16();
- checker.set_rng(0, &rng)
- .set_rng(1, &rng)
- .set_epsilon(1e-1)
- .set_param(arg.param)
- .exec(TensorLayoutArray{src, dst, filter});
- }
- }
-
- TEST_F(CUDA, CONVOLUTION3D_MATMUL_BACKWARD_FILTER) {
- using namespace convolution3d;
- std::vector<TestArg> args = get_args();
- Checker<Convolution3DBackwardFilter> checker(handle_cuda());
- NormalRNG default_rng;
- for (auto&& arg : args) {
- float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3] *
- arg.filter[4]);
- UniformFloatRNG rng(scale, 2 * scale);
- auto src = TensorLayout(arg.src, dtype::Float32());
- auto filter = TensorLayout(arg.filter, dtype::Float32());
- TensorLayout dst;
- auto opr = handle_cuda()->create_operator<Convolution3D>();
- opr->param() = arg.param;
- opr->deduce_layout(src, filter, dst);
- src.dtype = dst.dtype = filter.dtype = dtype::Float32();
- checker.set_rng(0, &default_rng)
- .set_rng(1, &default_rng)
- .set_param(arg.param)
- .exec(TensorLayoutArray{src, dst, filter});
- }
- }
-
- TEST_F(CUDA, CONVOLUTION3D_BACKWARD_DATA_NONCONTIG_CUDNN) {
- using namespace convolution3d;
- Checker<Convolution3DBackwardData> checker(handle_cuda());
- checker.set_before_exec_callback(AlgoChecker<Convolution3DBackwardData>("CUDNN"));
- Convolution3DBackwardData::Param param;
- param.pad_d = param.pad_h = param.pad_w = 1;
- NormalRNG default_rng;
- checker.set_dtype(0, dtype::Float32())
- .set_dtype(1, dtype::Float32())
- .set_rng(0, &default_rng)
- .set_rng(1, &default_rng)
- .set_epsilon(1e-3)
- .set_param(param);
- //! noncontiguous case
- {
- checker.execl(TensorLayoutArray{
- {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()},
- {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()},
- {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()}});
- }
- }
-
- TEST_F(CUDA, CONVOLUTION3D_BACKWARD_FILTER_NONCONTIG_CUDNN) {
- using namespace convolution3d;
- Checker<Convolution3DBackwardFilter> checker(handle_cuda());
- checker.set_before_exec_callback(AlgoChecker<Convolution3DBackwardFilter>("CUDNN"));
- Convolution3DBackwardFilter::Param param;
- param.pad_d = param.pad_h = param.pad_w = 1;
- NormalRNG default_rng;
- checker.set_dtype(0, dtype::Float32())
- .set_dtype(1, dtype::Float32())
- .set_rng(0, &default_rng)
- .set_rng(1, &default_rng)
- .set_epsilon(1e-3)
- .set_param(param);
- //! noncontiguous case
- {
- checker.execl(TensorLayoutArray{
- {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()},
- {{4, 5, 16, 16, 16}, {40960, 4096, 256, 16, 1}, dtype::Float32()},
- {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()}});
- }
- }
-
- /*
- TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) {
- auto eps_getter = [](bool f16, int stage, const char *name) -> float {
- if (f16) {
- return stage == 2 ? 0.9 : 0.7;
- }
- if (strstr(name, "WINOGRAD_NONFUSED"))
- return 0.3;
- return 1e-3;
- };
- convolution3d::test_conv_config_combinations(handle_cuda(), false, true,
- true, eps_getter);
- }
- */
-
- } // namespace test
- } // namespace megdnn
-
- // vim: syntax=cpp.doxygen
|