Browse Source

feat(fallback): add FB_GI_F32_4x12 benchmark

GitOrigin-RevId: cfacf31b28
release-1.10
Megvii Engine Team 3 years ago
parent
commit
5f0e7ffb64
5 changed files with 91 additions and 9 deletions
  1. +9
    -9
      dnn/src/fallback/matrix_mul/gi/fp32/strategy_4x12.cpp
  2. +6
    -0
      dnn/test/armv7/matrix_mul.cpp
  3. +62
    -0
      dnn/test/common/matrix_mul.cpp
  4. +4
    -0
      dnn/test/common/matrix_mul.h
  5. +10
    -0
      dnn/test/fallback/matrix_mul.cpp

+ 9
- 9
dnn/src/fallback/matrix_mul/gi/fp32/strategy_4x12.cpp View File

@@ -1,11 +1,3 @@
#include "src/fallback/matrix_mul/generic_strategy.h"
#include "src/fallback/matrix_mul/gi/fp32/common.h"

using namespace megdnn;
using namespace matmul::fallback;

namespace {

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wuninitialized"

@@ -18,6 +10,15 @@ namespace {
#endif
#endif
#endif

#include "src/fallback/matrix_mul/generic_strategy.h"
#include "src/fallback/matrix_mul/gi/fp32/common.h"

using namespace megdnn;
using namespace matmul::fallback;

namespace {

void kern_4x12(
const float* packA, const float* packB, int K, float* output, int LDC,
bool is_first_k, int m_remain) {
@@ -615,7 +616,6 @@ void kern_4x4(
}
}
}
#pragma GCC diagnostic pop

void gi_sgemm_4x12_pack_A_n(
float* outptr, const float* inptr, int ldin, int y0, int ymax, int k0,


+ 6
- 0
dnn/test/armv7/matrix_mul.cpp View File

@@ -571,6 +571,12 @@ TEST_F(ARMV7, BENCHMARK_MATRIX_MUL_INT32_MK_4X2X16) {
}
}

TEST_F(ARMV7, BENCHMARK_MATRIX_MUL_ARMV7_F32) {
auto args = matrix_mul::get_benchmark_matmul_args();
matrix_mul::benchmark_single_algo(
handle(), args, dtype::Float32{}, dtype::Float32{}, dtype::Float32{},
"ARMV7_F32", param::MatrixMul::Format::DEFAULT);
}
#endif

// vim: syntax=cpp.doxygen

+ 62
- 0
dnn/test/common/matrix_mul.cpp View File

@@ -429,6 +429,68 @@ void matrix_mul::benchmark_with_contrast(
}
}

void matrix_mul::benchmark_single_algo(
Handle* handle, const std::vector<TestArg>& args, DType A_dtype, DType B_dtype,
DType C_dtype, const char* algo, param::MatrixMul::Format format) {
using Param = MatrixMul::Param;

megdnn_assert(A_dtype.enumv() == B_dtype.enumv());
Benchmarker<MatrixMul> benchmark(handle);
constexpr size_t RUNS = 50;
if (algo) {
benchmark.set_before_exec_callback(AlgoChecker<MatrixMul>(algo));
}
benchmark.set_dtype(0, A_dtype).set_dtype(1, B_dtype).set_dtype(2, C_dtype);
benchmark.set_times(RUNS);

auto bench = [](Benchmarker<MatrixMul>& benchmark, Param param,
param::MatrixMul::Format format, size_t m, size_t n, size_t k,
size_t pack_size) -> float {
param.format = format;
benchmark.set_param(param);
float used_algo = 1.0;
if (format == param::MatrixMul::Format::DEFAULT) {
size_t A0 = m * pack_size, A1 = k * pack_size, B0 = k * pack_size, B1 = n;
TensorShape A, B;
if (param.transposeA) {
std::swap(A0, A1);
}
if (param.transposeB) {
std::swap(B0, B1);
}
used_algo = benchmark.execs({{A0, A1}, {B0, B1}, {}}) / RUNS;
} else {
size_t A0 = m, A1 = k, B0 = k, B1 = n;
if (param.transposeA) {
std::swap(A0, A1);
}
if (param.transposeB) {
std::swap(B0, B1);
}

used_algo =
benchmark.execs(
{{A0, A1, pack_size, pack_size}, {B0, B1, pack_size}, {}}) /
RUNS;
}
return used_algo;
};

size_t pack_size = MatrixMulForward::pack_size(format);
for (auto& arg : args) {
Param param;
param.transposeA = arg.mask & 0x1;
param.transposeB = arg.mask & 0x2;

auto used_algo =
bench(benchmark, param, format, arg.m, arg.n, arg.k, pack_size);

float computations = 2.f * arg.m * pack_size * arg.k * pack_size * arg.n * 1e-6;
printf("run: {(%zu, %zu) x (%zu, %zu)} %f ms %f Gflops\n", arg.m * pack_size,
arg.k * pack_size, arg.k * pack_size, arg.n, used_algo,
computations / used_algo);
}
}
#endif

// vim: syntax=cpp.doxygen

+ 4
- 0
dnn/test/common/matrix_mul.h View File

@@ -85,6 +85,10 @@ void benchmark_with_contrast(
DType contrast_B_dtype = dtype::Float32{},
DType contrast_C_dtype = dtype::Float32{}, const char* contrast_algo = nullptr,
param::MatrixMul::Format contrast_format = param::MatrixMul::Format::DEFAULT);
void benchmark_single_algo(
Handle* handle, const std::vector<TestArg>& args, DType A_dtype, DType B_dtype,
DType C_dtype, const char* algo = nullptr,
param::MatrixMul::Format format = param::MatrixMul::Format::DEFAULT);
#endif

} // namespace matrix_mul


+ 10
- 0
dnn/test/fallback/matrix_mul.cpp View File

@@ -154,6 +154,16 @@ TEST_F(FALLBACK, BATCHED_MATRIX_MUL) {
checker.execs({AL, BL, {}});
}
}

#if MEGDNN_WITH_BENCHMARK
TEST_F(FALLBACK, BENCHMARK_MATRIX_MUL_FB_GI_F32_4x12) {
auto args = matrix_mul::get_benchmark_matmul_args();
matrix_mul::benchmark_single_algo(
handle(), args, dtype::Float32{}, dtype::Float32{}, dtype::Float32{},
"FB_GI_F32_4x12", param::MatrixMul::Format::DEFAULT);
}

#endif
} // namespace test
} // namespace megdnn



Loading…
Cancel
Save