|
- #include "test/common/norm.h"
- #include "megdnn/dtype.h"
- #include "megdnn/oprs.h"
- #include "test/common/checker.h"
- // #include "test/naive/fixture.h"
- // #include "test/common/benchmarker.h"
- #include <iostream>
- #include "test/cuda/benchmark.h"
- #include "test/cuda/fixture.h"
- #include "test/cuda/utils.h"
-
- namespace megdnn {
- namespace test {
- // CORRECT
- // L2, fp32, dim
- TEST_F(CUDA, L2NORM_FP32_DIM0) {
- Checker<Norm> checker(handle_cuda());
- Norm::Param param;
- param.p = 2;
- param.dim = 0;
- checker.set_param(param);
- checker.exect(
- Testcase{
- TensorValue(
- {1, 2, 3, 4}, dtype::Float32(),
- {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
- {}},
- Testcase{
- {},
- TensorValue(
- {1, 2, 3, 4}, dtype::Float32(),
- {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
- });
- }
- TEST_F(CUDA, L2NORM_FP32_DIM1) {
- Checker<Norm> checker(handle_cuda());
- Norm::Param param;
- param.p = 2;
- param.dim = 1;
- checker.set_param(param);
- checker.exect(
- Testcase{
- TensorValue(
- {1, 2, 3, 4}, dtype::Float32(),
- {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
- {}},
- Testcase{
- {},
- TensorValue(
- {1, 1, 3, 4}, dtype::Float32(),
- {12.000, 13.0384, 14.1421, 15.2971, 16.4924, 17.7200,
- 18.9737, 20.2485, 21.5407, 22.8473, 24.1661, 25.4951}),
- });
- }
- TEST_F(CUDA, L2NORM_FP32_DIM3) {
- Checker<Norm> checker(handle_cuda());
- Norm::Param param;
- param.p = 2;
- param.dim = 3;
- checker.set_param(param).exect(
- Testcase{
- TensorValue(
- {1, 2, 3, 4}, dtype::Float32(),
- {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
- {}},
- Testcase{
- {},
- TensorValue(
- {1, 2, 3, 1}, dtype::Float32(),
- {3.7417, 11.2250, 19.1311, 27.0924, 35.0714, 43.0581})});
- }
- // TODO: support -1 dim param, or test for assert
- // l2, fp16
- TEST_F(CUDA, L2NORM_FP16_DIM3) {
- Checker<Norm> checker(handle_cuda());
- Norm::Param param;
- param.p = 2;
- param.dim = 3;
- checker.set_param(param).exect(
- Testcase{
- TensorValue(
- {1, 2, 3, 4}, dtype::Float16(),
- {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
- {}},
- Testcase{
- {},
- TensorValue(
- {1, 2, 3, 1}, dtype::Float16(),
- {3.7422, 11.2266, 19.1250, 27.0938, 35.0625, 43.0625})});
- }
- // l1, fp32,fp16
- TEST_F(CUDA, L1NORM_FP32_DIM3) {
- Checker<Norm> checker(handle_cuda());
- Norm::Param param;
- param.p = 1;
- param.dim = 3;
- checker.set_param(param).exect(
- Testcase{
- TensorValue(
- {1, 2, 3, 4}, dtype::Float32(),
- {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
- {}},
- Testcase{
- {},
- TensorValue(
- {1, 2, 3, 1}, dtype::Float32(), {6, 22, 38, 54, 70, 86}),
- });
- }
- TEST_F(CUDA, L1NORM_FP16_DIM3) {
- Checker<Norm> checker(handle_cuda());
- Norm::Param param;
- param.p = 1;
- param.dim = 3;
- checker.set_param(param).exect(
- Testcase{
- TensorValue(
- {1, 2, 3, 4}, dtype::Float16(),
- {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
- {}},
- Testcase{
- {},
- TensorValue(
- {1, 2, 3, 1}, dtype::Float16(), {6, 22, 38, 54, 70, 86}),
- });
- }
- // l0, fp32,fp16
- TEST_F(CUDA, L0NORM_FP32_DIM3) {
- Checker<Norm> checker(handle_cuda());
- Norm::Param param;
- param.p = 0;
- param.dim = 3;
- checker.set_param(param).exect(
- Testcase{
- TensorValue(
- {1, 2, 3, 4}, dtype::Float32(),
- {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
- {}},
- Testcase{
- {},
- TensorValue({1, 2, 3, 1}, dtype::Float32(), {3, 4, 4, 4, 4, 4}),
- });
- }
- TEST_F(CUDA, L0NORM_FP16_DIM3) {
- Checker<Norm> checker(handle_cuda());
- Norm::Param param;
- param.p = 0;
- param.dim = 3;
- checker.set_param(param).exect(
- Testcase{
- TensorValue(
- {1, 2, 3, 4}, dtype::Float16(),
- {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
- {}},
- Testcase{
- {},
- TensorValue({1, 2, 3, 1}, dtype::Float16(), {3, 4, 4, 4, 4, 4}),
- });
- }
- // inf
- TEST_F(CUDA, INF_NORM_FP32_DIM3) {
- Checker<Norm> checker(handle_cuda());
- Norm::Param param;
- using Mode = Norm::Param::Mode;
-
- param.dim = 3;
- param.mode = Mode::INF_NORM;
- checker.set_param(param).exect(
- Testcase{
- TensorValue(
- {1, 2, 3, 4}, dtype::Float32(),
- {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
- {}},
- Testcase{
- {},
- TensorValue({1, 2, 3, 1}, dtype::Float32(), {3, 7, 11, 15, 19, 23}),
- });
- }
- TEST_F(CUDA, INF_NORM_FP16_DIM3) {
- Checker<Norm> checker(handle_cuda());
- Norm::Param param;
- using Mode = Norm::Param::Mode;
-
- param.dim = 3;
- param.mode = Mode::INF_NORM;
- checker.set_param(param).exect(
- Testcase{
- TensorValue(
- {1, 2, 3, 4}, dtype::Float16(),
- {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
- {}},
- Testcase{
- {},
- TensorValue({1, 2, 3, 1}, dtype::Float16(), {3, 7, 11, 15, 19, 23}),
- });
- }
- // -inf
- TEST_F(CUDA, NEG_INF_NORM_FP32_DIM3) {
- Checker<Norm> checker(handle_cuda());
- Norm::Param param;
- param.mode = Norm::Param::Mode::NEG_INF_NORM;
- param.dim = 3;
- checker.set_param(param).exect(
- Testcase{
- TensorValue(
- {1, 2, 3, 4}, dtype::Float32(),
- {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
- {}},
- Testcase{
- {},
- TensorValue({1, 2, 3, 1}, dtype::Float32(), {0, 4, 8, 12, 16, 20}),
- });
- }
- TEST_F(CUDA, NEG_INF_NORM_FP16_DIM3) {
- Checker<Norm> checker(handle_cuda());
- Norm::Param param;
- param.mode = Norm::Param::Mode::NEG_INF_NORM;
- param.dim = 3;
- checker.set_param(param).exect(
- Testcase{
- TensorValue(
- {1, 2, 3, 4}, dtype::Float16(),
- {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
- {}},
- Testcase{
- {},
- TensorValue({1, 2, 3, 1}, dtype::Float16(), {0, 4, 8, 12, 16, 20}),
- });
- }
-
- // PERF
- TEST_F(CUDA, L2NORM_SPEED_FP32) {
- auto benchmarker = Benchmarker<Norm>(handle_cuda());
- benchmarker.set_dtype(0, dtype::Float32());
- benchmarker.set_dtype(1, dtype::Float32());
- Norm::Param param;
- param.mode = Norm::Param::Mode::P_NORM;
- param.dim = 0;
- param.p = 2;
- SmallVector<TensorShape> shapes{{4194304}, {}};
- NormalRNG rng(0, 1);
- float eachTime;
- float totalTime = 0.f;
- #define ITER 10
- for (auto i = 0; i < ITER; i++) {
- eachTime = benchmarker.set_param(param).set_rng(0, &rng).exec(shapes);
- // printf("PNORM_SPEED_FP32 cuda time: %.6fms\n", eachTime);
- totalTime += eachTime;
- }
- totalTime /= ITER;
- printf("PNORM_SPEED_FP32 AVG TIME: %.6fms\n", totalTime);
- #undef ITER
- }
- TEST_F(CUDA, INFNORM_SPEED_FP32) {
- auto benchmarker = Benchmarker<Norm>(handle_cuda());
- benchmarker.set_dtype(0, dtype::Float32());
- benchmarker.set_dtype(1, dtype::Float32());
- Norm::Param param;
- param.mode = Norm::Param::Mode::INF_NORM;
- param.dim = 0;
- SmallVector<TensorShape> shapes{{4194304}, {}};
- NormalRNG rng(0, 1);
- float time_fp32 = benchmarker.set_param(param).set_rng(0, &rng).exec(shapes);
- printf("INF_SPEED_FP32 cuda time: float=%.6fms\n", time_fp32);
- }
- TEST_F(CUDA, NEG_INFNORM_SPEED_FP32) {
- auto benchmarker = Benchmarker<Norm>(handle_cuda());
- benchmarker.set_dtype(0, dtype::Float32());
- benchmarker.set_dtype(1, dtype::Float32());
- Norm::Param param;
- param.mode = Norm::Param::Mode::NEG_INF_NORM;
- param.dim = 0;
- SmallVector<TensorShape> shapes{{4194304}, {}};
- NormalRNG rng(0, 1);
- float time_fp32 = benchmarker.set_param(param).set_rng(0, &rng).exec(shapes);
- printf("NEG_INF_SPEED_FP32 cuda time: float=%.6fms\n", time_fp32);
- }
- } // namespace test
- } // namespace megdnn
|