|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317 |
- #include "test/common/remap.h"
- #include "test/common/benchmarker.h"
- #include "test/common/checker.h"
- #include "test/common/rng.h"
- #include "test/cuda/benchmark.h"
- #include "test/cuda/fixture.h"
-
- namespace megdnn {
- namespace test {
- namespace remap {
-
- TEST_F(CUDA, REMAP_NCHW_FLOAT) {
- Checker<Remap> checker(handle_cuda());
- std::vector<TestArg> args = get_nchw_args();
- UniformFloatRNG float_rng(0, 255);
- #define cb(data_type, data_rng) \
- for (auto arg : args) { \
- UniformFloatRNG map_rng( \
- -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
- checker.set_dtype(0, data_type) \
- .set_dtype(1, dtype::Float32()) \
- .set_dtype(2, data_type) \
- .set_rng(0, &data_rng) \
- .set_rng(1, &map_rng) \
- .set_rng(2, &data_rng) \
- .set_param(arg.param) \
- .execs({arg.src, arg.map_xy, arg.dst}); \
- }
- cb(dtype::Float32(), float_rng);
- cb(dtype::Float16(), float_rng);
- #undef cb
- #define cb(data_type, data_rng) \
- for (auto arg : args) { \
- UniformFloatRNG map_rng( \
- -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
- checker.set_dtype(0, data_type) \
- .set_dtype(1, dtype::Float32()) \
- .set_dtype(2, data_type) \
- .set_rng(0, &data_rng) \
- .set_rng(1, &map_rng) \
- .set_rng(2, &data_rng) \
- .set_param(arg.param) \
- .set_epsilon(1e-2) \
- .execs({arg.src, arg.map_xy, arg.dst}); \
- }
- cb(dtype::BFloat16(), float_rng);
- #undef cb
- }
-
- TEST_F(CUDA, REMAP_NCHW_INT) {
- Checker<Remap> checker(handle_cuda());
- std::vector<TestArg> args = get_nchw_args();
- UniformIntRNG uint8_rng(0, 255);
- UniformIntRNG int8_rng(-128, 127);
-
- #define cb(data_type, data_rng) \
- for (auto arg : args) { \
- UniformFloatRNG map_rng( \
- -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
- checker.set_dtype(0, data_type) \
- .set_dtype(1, dtype::Float32()) \
- .set_dtype(2, data_type) \
- .set_rng(0, &data_rng) \
- .set_rng(1, &map_rng) \
- .set_rng(2, &data_rng) \
- .set_epsilon(1) \
- .set_param(arg.param) \
- .execs({arg.src, arg.map_xy, arg.dst}); \
- }
- cb(dtype::Int8(), int8_rng);
- cb(dtype::Uint8(), uint8_rng);
- #undef cb
- }
-
- TEST_F(CUDA, REMAP_NHWC_FLOAT) {
- Checker<Remap> checker(handle_cuda());
- std::vector<TestArg> args = get_nhwc_args();
- UniformFloatRNG float_rng(0, 255);
- #define cb(data_type, data_rng) \
- for (auto arg : args) { \
- UniformFloatRNG map_rng( \
- -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
- checker.set_dtype(0, data_type) \
- .set_dtype(1, dtype::Float32()) \
- .set_dtype(2, data_type) \
- .set_rng(0, &data_rng) \
- .set_rng(1, &map_rng) \
- .set_rng(2, &data_rng) \
- .set_param(arg.param) \
- .execs({arg.src, arg.map_xy, arg.dst}); \
- }
- cb(dtype::Float32(), float_rng);
- cb(dtype::Float16(), float_rng);
- #undef cb
- #define cb(data_type, data_rng) \
- for (auto arg : args) { \
- UniformFloatRNG map_rng( \
- -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
- checker.set_dtype(0, data_type) \
- .set_dtype(1, dtype::Float32()) \
- .set_dtype(2, data_type) \
- .set_rng(0, &data_rng) \
- .set_rng(1, &map_rng) \
- .set_rng(2, &data_rng) \
- .set_param(arg.param) \
- .set_epsilon(1e-2) \
- .execs({arg.src, arg.map_xy, arg.dst}); \
- }
- cb(dtype::BFloat16(), float_rng);
- #undef cb
- }
-
- TEST_F(CUDA, REMAP_NHWC_INT) {
- Checker<Remap> checker(handle_cuda());
- std::vector<TestArg> args = get_nhwc_args();
- UniformIntRNG uint8_rng(0, 255);
- UniformIntRNG int8_rng(-128, 127);
-
- #define cb(data_type, data_rng) \
- for (auto arg : args) { \
- UniformFloatRNG map_rng( \
- -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
- checker.set_dtype(0, data_type) \
- .set_dtype(1, dtype::Float32()) \
- .set_dtype(2, data_type) \
- .set_rng(0, &data_rng) \
- .set_rng(1, &map_rng) \
- .set_rng(2, &data_rng) \
- .set_epsilon(1) \
- .set_param(arg.param) \
- .execs({arg.src, arg.map_xy, arg.dst}); \
- }
- cb(dtype::Int8(), int8_rng);
- cb(dtype::Uint8(), uint8_rng);
- #undef cb
- }
-
- TEST_F(CUDA, REMAP_BACKWARD_DATA) {
- Checker<RemapBackwardData> checker(handle_cuda());
- std::vector<TestArg> args = get_nchw_args();
- UniformFloatRNG float_rng(0, 255);
- #define cb(data_type, data_rng) \
- for (auto arg : args) { \
- UniformFloatRNG map_rng( \
- -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
- checker.set_dtype(1, data_type) \
- .set_dtype(0, dtype::Float32()) \
- .set_dtype(2, data_type) \
- .set_rng(1, &data_rng) \
- .set_rng(0, &map_rng) \
- .set_rng(2, &data_rng) \
- .set_param(arg.param) \
- .execs({arg.map_xy, arg.dst, arg.src}); \
- }
- cb(dtype::Float32(), float_rng);
- #undef cb
- #define cb(data_type, data_rng) \
- for (auto arg : args) { \
- UniformFloatRNG map_rng( \
- -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
- checker.set_dtype(1, data_type) \
- .set_dtype(0, dtype::Float32()) \
- .set_dtype(2, data_type) \
- .set_rng(1, &data_rng) \
- .set_rng(0, &map_rng) \
- .set_rng(2, &data_rng) \
- .set_param(arg.param) \
- .set_epsilon(1e-1) \
- .execs({arg.map_xy, arg.dst, arg.src}); \
- }
- cb(dtype::BFloat16(), float_rng);
- cb(dtype::Float16(), float_rng);
- #undef cb
- }
-
- TEST_F(CUDA, REMAP_BACKWARD_MAT) {
- Checker<RemapBackwardMat> checker(handle_cuda());
- std::vector<TestArg> args = get_nchw_args();
- UniformFloatRNG float_rng(0, 255);
- #define cb(data_type, data_rng) \
- for (auto arg : args) { \
- UniformFloatRNG map_rng( \
- -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
- checker.set_dtype(0, data_type) \
- .set_dtype(1, dtype::Float32()) \
- .set_dtype(2, data_type) \
- .set_dtype(3, dtype::Float32()) \
- .set_rng(0, &data_rng) \
- .set_rng(1, &map_rng) \
- .set_rng(2, &data_rng) \
- .set_rng(3, &map_rng) \
- .set_param(arg.param) \
- .set_epsilon(2e-2) \
- .execs({arg.src, arg.map_xy, arg.dst, arg.map_xy}); \
- }
- cb(dtype::Float32(), float_rng);
- #undef cb
- #define cb(data_type, data_rng) \
- for (auto arg : args) { \
- UniformFloatRNG map_rng( \
- -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
- checker.set_dtype(0, data_type) \
- .set_dtype(1, dtype::Float32()) \
- .set_dtype(2, data_type) \
- .set_dtype(3, dtype::Float32()) \
- .set_rng(0, &data_rng) \
- .set_rng(1, &map_rng) \
- .set_rng(2, &data_rng) \
- .set_rng(3, &map_rng) \
- .set_param(arg.param) \
- .set_epsilon(1e-1) \
- .execs({arg.src, arg.map_xy, arg.dst, arg.map_xy}); \
- }
- cb(dtype::BFloat16(), float_rng);
- cb(dtype::Float16(), float_rng);
- #undef cb
- }
-
- #if MEGDNN_WITH_BENCHMARK
-
- TEST_F(CUDA, BENCHMARK_REMAP) {
- using Param = param::Remap;
- auto run = [&](const TensorShapeArray& shapes, Param param, DType dtype) {
- auto handle_cpu = create_cpu_handle(2);
- Benchmarker<Remap> benchmarker_naive(handle_cpu.get());
- CUBenchmarker<Remap> benchmarker_cuda(handle_cuda());
- UniformIntRNG rng(0, 0xff);
- UniformFloatRNG map_rng(
- -2, std::max(shapes[1].shape[1], shapes[1].shape[2]) + 2);
- benchmarker_naive.set_rng(0, &rng);
- benchmarker_cuda.set_rng(0, &rng);
- benchmarker_naive.set_rng(1, &map_rng);
- benchmarker_cuda.set_rng(1, &map_rng);
- benchmarker_naive.set_rng(2, &rng);
- benchmarker_cuda.set_rng(2, &rng);
-
- benchmarker_naive.set_dtype(1, dtype::Float32());
- benchmarker_cuda.set_dtype(1, dtype::Float32());
- benchmarker_naive.set_dtype(0, dtype).set_dtype(2, dtype);
- benchmarker_cuda.set_dtype(0, dtype).set_dtype(2, dtype);
-
- size_t RUN = 10;
- auto t1 = benchmarker_naive.set_display(false)
- .set_times(RUN)
- .set_param(param)
- .execs(shapes);
- auto t2 = benchmarker_cuda.set_display(false).set_param(param).execs(shapes);
-
- int size = 0;
- if (dtype == dtype::Float32{}) {
- size = sizeof(float);
- printf("float32: ");
- } else if (dtype == dtype::Float16{}) {
- size = sizeof(dt_float16);
- printf("float16: ");
- } else if (dtype == dtype::Int8{}) {
- size = sizeof(dt_int8);
- printf("int8: ");
- } else if (dtype == dtype::Uint8{}) {
- size = sizeof(dt_uint8);
- printf("uint8: ");
- }
- const TensorShape map_xy = shapes[1];
- const TensorShape dst_layout = shapes[2];
-
- float calc_amount = (dst_layout.total_nr_elems() * (4.f + 1.f) * size +
- map_xy.total_nr_elems() * sizeof(float)) /
- (1024 * 1024 * 1024);
- printf("naive={%.3fms, %.3fGBPS}, "
- "cuda={%.3fms, %.3fGBPS}\n",
- t1 / RUN, calc_amount / (t1 / RUN) * 1e3, t2, calc_amount / t2 * 1e3);
- };
- Param param;
- param.imode = param::Remap::InterpolationMode::LINEAR;
- param.format = param::Remap::Format::NHWC;
- param.border_type = param::Remap::BorderMode::CONSTANT;
- run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
- dtype::Float32{});
- run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
- dtype::Float16{});
- run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
- dtype::Uint8{});
- run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param, dtype::Int8{});
- param.border_type = param::Remap::BorderMode::REPLICATE;
- run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
- dtype::Float32{});
- run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
- dtype::Float16{});
- run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
- dtype::Uint8{});
- run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param, dtype::Int8{});
- param.format = param::Remap::Format::NCHW;
- param.border_type = param::Remap::BorderMode::CONSTANT;
- run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
- dtype::Float32{});
- run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
- dtype::Float16{});
- run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
- dtype::Uint8{});
- run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param, dtype::Int8{});
- param.border_type = param::Remap::BorderMode::REPLICATE;
- run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
- dtype::Float32{});
- run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
- dtype::Float16{});
- run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
- dtype::Uint8{});
- run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param, dtype::Int8{});
- }
-
- #endif
-
- } // namespace remap
- } // namespace test
- } // namespace megdnn
-
- // vim: syntax=cpp.doxygen
|