You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

matrix_mul.cpp 2.4 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. #include "test/cpu/fixture.h"
  2. #include <chrono>
  3. #include "test/common/benchmarker.h"
  4. #include "test/common/checker.h"
  5. #include "test/common/matrix_mul.h"
  6. using namespace megdnn;
  7. using namespace test;
  8. #if MEGDNN_WITH_BENCHMARK
  9. namespace {
  10. void sgemm_sgemv_like(
  11. const float* __restrict A, const float* __restrict B, float* __restrict C,
  12. size_t M, size_t N, size_t K, size_t Astride, size_t Bstride, size_t Cstride) {
  13. for (size_t m = 0; m < M; ++m) {
  14. memset(C + m * Cstride, 0, sizeof(float) * N);
  15. for (size_t k = 0; k < K; ++k)
  16. for (size_t n = 0; n < N; ++n) {
  17. C[m * Cstride + n] += A[m * Astride + k] * B[k * Bstride + n];
  18. }
  19. }
  20. }
  21. float benchmark_sgemm_sgemv_like(size_t M, size_t N, size_t K) {
  22. float *A = (float*)malloc(sizeof(float) * M * K),
  23. *B = (float*)malloc(sizeof(float) * K * N),
  24. *C = (float*)malloc(sizeof(float) * M * N);
  25. for (size_t i = 0; i < M * K; ++i)
  26. A[i] = (float)rand() / RAND_MAX;
  27. for (size_t i = 0; i < K * N; ++i)
  28. B[i] = (float)rand() / RAND_MAX;
  29. sgemm_sgemv_like(A, B, C, M, N, K, K, N, N);
  30. auto start = std::chrono::high_resolution_clock::now();
  31. for (size_t i = 0; i < 100; ++i) {
  32. sgemm_sgemv_like(A, B, C, M, N, K, K, N, N);
  33. }
  34. auto stop = std::chrono::high_resolution_clock::now();
  35. free(A);
  36. free(B);
  37. free(C);
  38. return std::chrono::duration_cast<std::chrono::milliseconds>(stop - start).count();
  39. }
  40. } // namespace
  41. TEST_F(CPU, BENCHMARK_MATRIX_MUL) {
  42. Benchmarker<MatrixMul> benchmarker(handle());
  43. benchmarker.set_times(100);
  44. benchmarker.set_display(false);
  45. auto run = [&](size_t M, size_t N, size_t K) {
  46. std::cout << M << "x" << N << "x" << K << " ";
  47. auto time_in_ms_megdnn = benchmarker.exec({{M, K}, {K, N}, {}});
  48. auto time_in_ms_our = benchmark_sgemm_sgemv_like(M, N, K);
  49. std::cout << "megdnn=" << (int)time_in_ms_megdnn
  50. << " sgemv_like=" << time_in_ms_our << std::endl;
  51. };
  52. for (size_t m = 1; m <= 8; m *= 2)
  53. for (size_t nk = 128; nk <= 1024; nk *= 2) {
  54. run(m, nk, nk);
  55. }
  56. }
  57. #endif
  58. TEST_F(CPU, MATRIX_MUL) {
  59. matrix_mul::check_matrix_mul(
  60. dtype::Float32{}, dtype::Float32{}, dtype::Float32{}, handle());
  61. }
  62. // vim: syntax=cpp.doxygen