|
- /**
- * \file dnn/test/cuda/dct.cpp
- * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
- *
- * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
- * implied.
- */
- #include "megdnn/oprs/nn.h"
- #include "test/common/benchmarker.h"
- #include "test/common/checker.h"
- #include "test/common/dct_ref.h"
- #include "test/common/rng.h"
- #include "test/cuda/fixture.h"
-
- namespace megdnn {
- namespace test {
-
- TEST_F(CUDA, DCT) {
- DctChannelSelectForward::Param param;
- Checker<DctChannelSelectForward> checker(handle_cuda());
- for (size_t n : {1, 3}) {
- for (size_t ic : {1, 3}) {
- for (size_t ih : {8, 16, 32, 512, 1024}) {
- for (size_t iw : {8, 16, 32, 64, 128, 256, 512, 1024}) {
- checker.set_param(param)
- .set_dtype(0, dtype::Uint8())
- .set_dtype(1, dtype::Int32())
- .set_dtype(2, dtype::Int32())
- .execs({TensorShape{n, ic, ih, iw}, {}, {}, {}});
- }
- }
- }
- }
- }
-
- TEST_F(CUDA, DCT_QINT8) {
- DctChannelSelectForward::Param param;
- Checker<DctChannelSelectForward> checker(handle_cuda());
- param.format = Param::Format::NCHW4;
- for (size_t n : {1, 3}) {
- for (size_t ic : {1, 3}) {
- for (size_t ih : {8, 16, 32, 512, 1024}) {
- for (size_t iw : {8, 16, 32, 64, 128, 256, 512, 1024}) {
- checker.set_param(param)
- .set_dtype(0, dtype::Uint8())
- .set_dtype(1, dtype::Int32())
- .set_dtype(2, dtype::Int32())
- .set_dtype(3, dtype::QuantizedS8(10.f))
- .set_epsilon(1)
- .execs({TensorShape{n, ic, ih, iw}, {}, {}, {}});
- }
- }
- }
- }
- }
-
- TEST_F(CUDA, DCT_WITH_FIX_32_MASK) {
- using Param = DctChannelSelectForward::Param;
- Param param;
- Checker<DctChannelSelectForward> checker(handle_cuda(), false);
- param.fastImpl = Param::FastImpl::FIX_32_MASK;
- auto test_case = gen_dct_case(3, 3, 1024, 768, 32, param);
- checker.set_param(param).exect(test_case->testcase_in,
- test_case->testcase_out);
- }
-
- TEST_F(CUDA, DCT_WITH_FIX_32_MASK_QINT8) {
- using Param = DctChannelSelectForward::Param;
- Param param;
- Checker<DctChannelSelectForward> checker(handle_cuda(), false);
- param.fastImpl = Param::FastImpl::FIX_32_MASK;
- param.format = Param::Format::NCHW4;
- auto test_case =
- gen_dct_case(3, 3, 1024, 768, 32, param, dtype::QuantizedS8(10.f));
- checker.set_param(param).set_epsilon(1).exect(test_case->testcase_in,
- test_case->testcase_out);
- }
-
- TEST_F(CUDA, DCT_WITH_MASK) {
- Checker<DctChannelSelectForward> checker(handle_cuda(), false);
- DctChannelSelectForward::Param param;
- checker.set_param(param).exect(
- Testcase{TensorValue(
- {1, 3, 8, 16}, dtype::Uint8(),
- {109, 39, 30, 115, 71, 15, 206, 139, 221, 5,
- 18, 16, 93, 185, 99, 102, 205, 172, 191, 29,
- 185, 6, 47, 84, 0, 47, 105, 203, 251, 73,
- 196, 83, 3, 211, 32, 181, 49, 111, 114, 83,
- 148, 232, 77, 17, 35, 2, 154, 100, 41, 135,
- 141, 206, 56, 91, 137, 199, 104, 192, 75, 122,
- 78, 65, 184, 69, 91, 82, 2, 172, 194, 240,
- 49, 145, 87, 210, 97, 190, 179, 93, 125, 105,
- 181, 207, 148, 178, 133, 53, 25, 198, 238, 151,
- 14, 120, 213, 195, 145, 20, 122, 107, 217, 185,
- 65, 5, 115, 110, 82, 206, 163, 86, 2, 2,
- 44, 125, 50, 38, 41, 106, 30, 5, 151, 243,
- 238, 181, 232, 191, 161, 57, 23, 204,
-
- 109, 39, 30, 115, 71, 15, 206, 139, 221, 5,
- 18, 16, 93, 185, 99, 102, 205, 172, 191, 29,
- 185, 6, 47, 84, 0, 47, 105, 203, 251, 73,
- 196, 83, 3, 211, 32, 181, 49, 111, 114, 83,
- 148, 232, 77, 17, 35, 2, 154, 100, 41, 135,
- 141, 206, 56, 91, 137, 199, 104, 192, 75, 122,
- 78, 65, 184, 69, 91, 82, 2, 172, 194, 240,
- 49, 145, 87, 210, 97, 190, 179, 93, 125, 105,
- 181, 207, 148, 178, 133, 53, 25, 198, 238, 151,
- 14, 120, 213, 195, 145, 20, 122, 107, 217, 185,
- 65, 5, 115, 110, 82, 206, 163, 86, 2, 2,
- 44, 125, 50, 38, 41, 106, 30, 5, 151, 243,
- 238, 181, 232, 191, 161, 57, 23, 204,
-
- 109, 39, 30, 115, 71, 15, 206, 139, 221, 5,
- 18, 16, 93, 185, 99, 102, 205, 172, 191, 29,
- 185, 6, 47, 84, 0, 47, 105, 203, 251, 73,
- 196, 83, 3, 211, 32, 181, 49, 111, 114, 83,
- 148, 232, 77, 17, 35, 2, 154, 100, 41, 135,
- 141, 206, 56, 91, 137, 199, 104, 192, 75, 122,
- 78, 65, 184, 69, 91, 82, 2, 172, 194, 240,
- 49, 145, 87, 210, 97, 190, 179, 93, 125, 105,
- 181, 207, 148, 178, 133, 53, 25, 198, 238, 151,
- 14, 120, 213, 195, 145, 20, 122, 107, 217, 185,
- 65, 5, 115, 110, 82, 206, 163, 86, 2, 2,
- 44, 125, 50, 38, 41, 106, 30, 5, 151, 243,
- 238, 181, 232, 191, 161, 57, 23, 204}),
- TensorValue({4}, dtype::Int32(), {0, 14, 22, 30}),
- TensorValue({30}, dtype::Int32(),
- {8, 16, 9, 2, 3, 10, 17, 24, 32, 25,
- 18, 11, 4, 5, 0, 1, 8, 16, 9, 2,
- 3, 10, 0, 1, 8, 16, 9, 2, 3, 10}),
- {}},
- Testcase{{},
- {},
- {},
- TensorValue({1, 30, 1, 2}, dtype::Float32(),
- {-22.850792, -97.862236, -101.043236,
- -4.727012, 28.275675, -157.96654,
- 42.1377, 45.06531, -149.77373,
- 24.487143, -8.054966, -13.990831,
- -6.9395194, -3.9211385, 64.79172,
- -12.363858, -47.875, 59.,
- 56.271786, -62.725567, 120.522675,
- 16.559765, 85.74334, 112.904495,
- 99.375, 29.499973, 2.0220923,
- -19.681704, 890.12494, 941.25,
- -7.0498576, 99.47632, -22.850792,
- -97.862236, -101.043236, -4.727012,
- 28.275675, -157.96654, 42.1377,
- 45.06531, -149.77373, 24.487143,
- -8.054966, -13.990831, 890.12494,
- 941.25, -7.0498576, 99.47632,
- -22.850792, -97.862236, -101.043236,
- -4.727012, 28.275675, -157.96654,
- 42.1377, 45.06531, -149.77373,
- 24.487143, -8.054966, -13.990831})});
- }
-
- TEST_F(CUDA, DCT_WITH_MASK2) {
- Checker<DctChannelSelectForward> checker(handle_cuda(), false);
- DctChannelSelectForward::Param param;
- UniformIntRNG rng_oc(0, 3 * 64);
- for (size_t n : {1, 3}) {
- for (size_t ic : {1, 3}) {
- for (size_t ih : {8, 16, 32, 512, 1024}) {
- for (size_t iw : {8, 16, 32, 64, 128, 256, 512, 1024}) {
- int random_oc = static_cast<int>(rng_oc.gen_single_val());
- int max_oc = ic * 64;
- int mask_oc = (random_oc % max_oc) + 1;
- auto test_case =
- gen_dct_case(n, ic, ih, iw, mask_oc, param);
- checker.set_param(param).exect(test_case->testcase_in,
- test_case->testcase_out);
- }
- }
- }
- }
- }
-
- TEST_F(CUDA, DCT_WITH_MASK2_QINT8) {
- Checker<DctChannelSelectForward> checker(handle_cuda(), false);
- DctChannelSelectForward::Param param;
- param.format = DctChannelSelectForward::Param::Format::NCHW4;
-
- UniformIntRNG rng_oc(0, 3 * 64);
- for (size_t n : {1, 3}) {
- for (size_t ic : {1, 3}) {
- for (size_t ih : {8, 16, 32, 512, 1024}) {
- for (size_t iw : {8, 16, 32, 64, 128, 256, 512, 1024}) {
- int random_oc = static_cast<int>(rng_oc.gen_single_val());
- int max_oc = ic * 64;
- int mask_oc = (random_oc % max_oc) + 1;
- mask_oc = (mask_oc + 3) / 4 * 4;
- auto test_case = gen_dct_case(n, ic, ih, iw, mask_oc, param,
- dtype::QuantizedS8(10.f));
- checker.set_param(param).set_epsilon(1).exect(
- test_case->testcase_in, test_case->testcase_out);
- }
- }
- }
- }
- }
- TEST_F(CUDA, DCT_WITH_MASK2_QINT8_CONSTRAINT) {
- DctChannelSelectForward::Param param;
- param.format = DctChannelSelectForward::Param::Format::NCHW4;
-
- Checker<DctChannelSelectForward> checker(handle_cuda(), false);
- checker.set_param(param)
- .set_dtype(0, dtype::Uint8())
- .set_dtype(1, dtype::Int32())
- .set_dtype(2, dtype::Int32())
- .set_dtype(3, dtype::QuantizedS8(10.f))
- .set_epsilon(1);
-
- UniformIntRNG rng_oc(0, 3 * 64);
- for (size_t n : {1, 3}) {
- for (size_t ic : {1, 3}) {
- for (size_t ih : {8, 16, 32, 512, 1024}) {
- for (size_t iw : {8, 16, 32, 64, 128, 256, 512, 1024}) {
- int random_oc = static_cast<int>(rng_oc.gen_single_val());
- int max_oc = ic * 64;
- int mask_oc = (random_oc % max_oc) + 1;
- mask_oc = (mask_oc + 3) / 4 * 4;
- if (mask_oc < max_oc) {
- checker
- .set_tensors_constraint(gen_dct_constriant(
- n, ic, ih, iw, mask_oc, param))
- .exec({TensorShape{n, ic, ih, iw},
- TensorShape{ic + 1},
- TensorShape{(size_t)mask_oc},
- {}});
- } else {
- checker.set_tensors_constraint({}).exec(
- {TensorShape{n, ic, ih, iw}, {}, {}, {}});
- }
- }
- }
- }
- }
- }
-
- #if MEGDNN_WITH_BENCHMARK
-
- TEST_F(CUDA, BENCHMARK_DCT) {
- using Param = DctChannelSelectForward::Param;
-
- auto run = [&](const TensorShapeArray& shapes, Param param) {
- Benchmarker<DctChannelSelectForward> benchmarker(handle_cuda());
- benchmarker.set_param(param);
- benchmarker.set_dtype(0, dtype::Uint8())
- .set_dtype(1, dtype::Int32())
- .set_dtype(2, dtype::Int32());
- for (auto&& shape : shapes) {
- double computation = double(shape[0]) * shape[1] * shape[2] *
- shape[3] * 32.0 * 1e-6;
- auto time_ms = benchmarker.execs({shape, {}, {}, {}});
- printf("execute %s, %.4f Gops\n", shape.to_string().c_str(),
- computation / time_ms);
- }
- };
-
- auto run_case = [&](const DctTestcase& testcase, Param param,
- std::string comment = "") {
- Benchmarker<DctChannelSelectForward> benchmarker(handle_cuda());
- benchmarker.set_param(param);
- benchmarker.set_dtype(0, dtype::Uint8())
- .set_dtype(1, dtype::Int32())
- .set_dtype(2, dtype::Int32())
- .set_dtype(3, testcase.testcase_out[3].layout.dtype);
-
- auto src_shape = testcase.testcase_in[0].layout;
- double computation = double(src_shape[0]) * src_shape[1] *
- src_shape[2] * src_shape[3] * 32.0 * 1e-6;
- auto time_ms = benchmarker.exect(testcase.testcase_in);
- printf("[%s] execute %s, %.4f Gops\n", comment.c_str(),
- src_shape.to_string().c_str(), computation / time_ms);
- };
-
- auto run_case_constraint =
- [&](const Benchmarker<DctChannelSelectForward>::TensorsConstriant&
- constraint,
- Param param, const TensorShapeArray& shapes,
- std::string comment = "", DType output_dtype) {
- Benchmarker<DctChannelSelectForward> benchmarker(handle_cuda());
- benchmarker.set_param(param)
- .set_dtype(0, dtype::Uint8())
- .set_dtype(1, dtype::Int32())
- .set_dtype(2, dtype::Int32())
- .set_dtype(3, output_dtype)
- .set_tensors_constraint(constraint);
-
- auto src_shape = shapes[0];
- double computation = double(src_shape[0]) * src_shape[1] *
- src_shape[2] * src_shape[3] * 32.0 * 1e-6;
- auto time_ms = benchmarker.exec(shapes);
- printf("[%s] execute %s, %.4f Gops\n", comment.c_str(),
- src_shape.to_string().c_str(), computation / time_ms);
- };
-
- TensorShapeArray shapes = {
- {1, 3, 512, 512},
- {8, 3, 2176, 3840},
- };
- {
- Param param;
- run(shapes, param);
- }
-
- Param fix_32_param;
- fix_32_param.fastImpl = Param::FastImpl::FIX_32_MASK;
- {
- auto test_case = gen_dct_case(8, 3, 2176, 3840, 32, fix_32_param);
- run_case(*test_case, fix_32_param, "FIX_32_MASK");
- }
-
- {
- Param param;
- auto test_case = gen_dct_case(8, 3, 2176, 3840, 32, fix_32_param);
- run_case(*test_case, param, "MASK 32");
- }
-
- {
- Param fix_32_nchw4_param;
- fix_32_nchw4_param.fastImpl = Param::FastImpl::FIX_32_MASK;
- fix_32_nchw4_param.format = Param::Format::NCHW4;
- auto test_case = gen_dct_case(8, 3, 2176, 3840, 32, fix_32_nchw4_param,
- dtype::QuantizedS8(10.f));
- run_case(*test_case, fix_32_nchw4_param, "FIX_32_MASK QINT8");
- }
-
- {
- Param fix_32_nchw4_param;
- fix_32_nchw4_param.fastImpl = Param::FastImpl::FIX_32_MASK;
- fix_32_nchw4_param.format = Param::Format::NCHW4;
- auto test_case = gen_dct_case(8, 3, 2176, 3840, 32, fix_32_nchw4_param,
- dtype::QuantizedS8(10.f));
- fix_32_nchw4_param.fastImpl = Param::FastImpl::NONE;
- run_case(*test_case, fix_32_nchw4_param, "MASK 32 QINT8");
- }
-
- {
- Param fix_32_nchw4_param;
- fix_32_nchw4_param.fastImpl = Param::FastImpl::FIX_32_MASK;
- fix_32_nchw4_param.format = Param::Format::NCHW4;
- TensorShapeArray shapes = {{8, 3, 2176, 3840}, {4}, {32}, {}};
- auto constraint =
- gen_dct_constriant(8, 3, 2176, 3840, 32, fix_32_nchw4_param);
- run_case_constraint(constraint, fix_32_nchw4_param, shapes,
- "FIX_32_MASK QINT8 Constraint",
- dtype::QuantizedS8(10.f));
- }
- }
- #endif
-
- } // namespace test
- } // namespace megdnn
- // vim: syntax=cpp.doxygen
|