|
- #include "test/arm_common/fixture.h"
-
- #include "megdnn/oprs.h"
- #include "test/common/benchmarker.h"
- #include "test/common/checker.h"
- #include "test/common/task_record_check.h"
-
- using namespace megdnn;
- using namespace test;
-
- TEST_F(ARM_COMMON, REDUCE) {
- using Param = Reduce::Param;
- using Mode = Param::Mode;
- Checker<Reduce> checker(handle());
- UniformIntRNG rng{INT8_MIN >> 1, INT8_MAX >> 1};
- checker.set_rng(0, &rng);
- struct Config {
- Param param;
- DType dtype;
- TensorShape shape;
- Config(Param param, DType dtype, TensorShape shape)
- : param(param), dtype(dtype), shape(shape) {}
- };
- std::vector<Config> configs;
- for (auto mode : {Mode::MEAN, Mode::MAX, Mode::MIN})
- for (auto dtype : std::vector<DType>{
- dtype::Float32(), dtype::Float16(), dtype::QuantizedS8(1.3f),
- dtype::Quantized8Asymm(1.3f, static_cast<uint8_t>(3))})
- for (int32_t axis : {0, 1, 2}) {
- for (size_t A : {1, 3, 5}) {
- for (size_t B : {4, 6, 9, 16, 33, 45}) {
- for (size_t C : {2, 3, 4, 6, 9, 16, 33, 45}) {
- TensorShape shape{A, B, C};
- Param param(mode, axis);
- Config config(param, dtype, shape);
- configs.push_back(config);
- }
- }
- }
- }
- for (auto&& config : configs) {
- auto&& dtype = config.dtype;
- auto&& param = config.param;
- auto&& shape = config.shape;
-
- checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
- }
- configs.clear();
- for (auto mode : {Mode::SUM, Mode::PRODUCT, Mode::SUM_SQR})
- for (auto dtype : std::vector<DType>{dtype::Float32(), dtype::Float16()})
- for (int32_t axis : {0, 1, 2}) {
- for (size_t A : {1, 3, 5}) {
- for (size_t B : {4, 6, 9, 16, 33, 45}) {
- for (size_t C : {2, 3, 4, 6, 9, 16, 33, 45}) {
- TensorShape shape{A, B, C};
- Param param(mode, axis);
- Config config(param, dtype, shape);
- configs.push_back(config);
- }
- }
- }
- }
-
- UniformFloatRNG rng_float(-2, 2);
- checker.set_rng(0, &rng_float);
- checker.set_epsilon(1e-1);
- for (auto&& config : configs) {
- auto&& dtype = config.dtype;
- auto&& param = config.param;
- auto&& shape = config.shape;
- if (dtype == dtype::Float16())
- checker.set_epsilon(1e-1);
- else
- checker.set_epsilon(1e-3);
-
- checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
- }
- }
-
- TEST_F(ARM_COMMON, REDUCE_RECORD) {
- using Param = Reduce::Param;
- using Mode = Param::Mode;
- TaskRecordChecker<Reduce> checker(0);
- UniformIntRNG rng{INT8_MIN >> 1, INT8_MAX >> 1};
- checker.set_rng(0, &rng);
- struct Config {
- Param param;
- DType dtype;
- TensorShape shape;
- Config(Param param, DType dtype, TensorShape shape)
- : param(param), dtype(dtype), shape(shape) {}
- };
- std::vector<Config> configs;
- for (auto mode : {Mode::MEAN, Mode::MAX, Mode::MIN})
- for (auto dtype : std::vector<DType>{
- dtype::Float32(), dtype::Float16(), dtype::QuantizedS8(1.3f),
- dtype::Quantized8Asymm(1.3f, static_cast<uint8_t>(3))})
- for (int32_t axis : {0, 1, 2}) {
- for (size_t A : {1, 3, 5}) {
- for (size_t B : {4, 6, 9, 16, 33, 45}) {
- for (size_t C : {4, 6, 9, 16, 33, 45}) {
- TensorShape shape{A, B, C};
- Param param(mode, axis);
- Config config(param, dtype, shape);
- configs.push_back(config);
- }
- }
- }
- }
- for (auto&& config : configs) {
- auto&& dtype = config.dtype;
- auto&& param = config.param;
- auto&& shape = config.shape;
-
- checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
- }
- configs.clear();
- for (auto mode : {Mode::SUM, Mode::PRODUCT, Mode::SUM_SQR})
- for (auto dtype : std::vector<DType>{dtype::Float32(), dtype::Float16()})
- for (int32_t axis : {0, 1, 2}) {
- for (size_t A : {1, 3, 5}) {
- for (size_t B : {4, 6, 9, 16, 33, 45}) {
- for (size_t C : {4, 6, 9, 16, 33, 45}) {
- TensorShape shape{A, B, C};
- Param param(mode, axis);
- Config config(param, dtype, shape);
- configs.push_back(config);
- }
- }
- }
- }
-
- UniformFloatRNG rng_float(-2, 2);
- checker.set_rng(0, &rng_float);
- checker.set_epsilon(1e-1);
- for (auto&& config : configs) {
- auto&& dtype = config.dtype;
- auto&& param = config.param;
- auto&& shape = config.shape;
- if (dtype == dtype::Float16())
- checker.set_epsilon(1e-1);
- else
- checker.set_epsilon(1e-3);
-
- checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
- }
- }
-
- #if MEGDNN_WITH_BENCHMARK
- TEST_F(ARM_COMMON, BENCHMARK_REDUCE) {
- auto run = [&](size_t A, size_t B, size_t C, size_t axis,
- megdnn::param::Reduce::Mode mode, megdnn::DType& dtype) {
- auto handle_fallback = create_cpu_handle(1);
- Benchmarker<Reduce> benchmarker(handle());
- Benchmarker<Reduce> benchmarker_fallback(handle_fallback.get());
- benchmarker_fallback.set_display(false);
- benchmarker.set_display(false);
- constexpr size_t RUNS = 50;
- benchmarker_fallback.set_times(RUNS);
- benchmarker.set_times(RUNS);
- param::Reduce param;
- param.axis = axis;
- param.mode = mode;
- benchmarker.set_param(param);
- benchmarker_fallback.set_param(param);
-
- TensorLayout src({A, B, C}, dtype), dst;
- auto opr = handle()->create_operator<Reduce>();
- opr->param() = param;
- opr->deduce_layout(src, dst);
-
- auto bench = [&](const char* msg) {
- auto cur = benchmarker.execs({src, dst}) / RUNS;
- auto fallback = benchmarker_fallback.execs({src, dst}) / RUNS;
- float computation = src.total_nr_elems() / 1024.0 / 1024.0 / 1024.0 * 1e3;
- printf("run %s->%s %s: fallback: %fms %fGflops "
- "cur: %fms %fGflops speedup=%f\n",
- src.to_string().c_str(), dst.to_string().c_str(), msg, fallback,
- computation / fallback, cur, computation / cur, fallback / cur);
- };
-
- benchmarker_fallback.set_dtype(0, dtype);
- benchmarker.set_dtype(0, dtype);
- bench(dtype.name());
- };
-
- for (auto mode :
- {param::Reduce::Mode::MEAN, param::Reduce::Mode::MAX,
- param::Reduce::Mode::MIN})
- for (int32_t axis : {1, 2}) {
- if (mode == param::Reduce::Mode::MEAN)
- printf("testcase mean %s\n", axis == 2 ? "c == 1" : "c > 1");
- else if (mode == param::Reduce::Mode::MAX)
- printf("testcase max %s\n", axis == 2 ? "c == 1" : "c > 1");
- else if (mode == param::Reduce::Mode::MIN)
- printf("testcase min %s\n", axis == 2 ? "c == 1" : "c > 1");
- for (auto dtype : std::vector<megdnn::DType>{
- dtype::Float16(), dtype::Float32(), dtype::QuantizedS8(4.2f),
- dtype::Quantized8Asymm(3.2f, static_cast<uint8_t>(10))}) {
- run(1, 1024, 49, axis, mode, dtype);
- run(2, 10, 10000, axis, mode, dtype);
- run(2, 100, 10000, axis, mode, dtype);
- run(2, 10, 100000, axis, mode, dtype);
- }
- }
- for (auto mode :
- {param::Reduce::Mode::SUM, param::Reduce::Mode::PRODUCT,
- param::Reduce::Mode::SUM_SQR})
- for (int32_t axis : {1, 2}) {
- if (mode == param::Reduce::Mode::SUM)
- printf("testcase sum %s\n", axis == 2 ? "c == 1" : "c > 1");
- else if (mode == param::Reduce::Mode::PRODUCT)
- printf("testcase product %s\n", axis == 2 ? "c == 1" : "c > 1");
- else if (mode == param::Reduce::Mode::SUM_SQR)
- printf("testcase sum SumSqr %s\n", axis == 2 ? "c == 1" : "c > 1");
- for (auto dtype :
- std::vector<megdnn::DType>{dtype::Float16(), dtype::Float32()}) {
- run(1, 1024, 49, axis, mode, dtype);
- run(2, 10, 10000, axis, mode, dtype);
- run(2, 100, 10000, axis, mode, dtype);
- run(2, 10, 100000, axis, mode, dtype);
- }
- }
- }
- #endif
- // vim: syntax=cpp.doxygen
|