You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

matrix_mul.cpp 2.8 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. /**
  2. * \file dnn/test/cpu/matrix_mul.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/cpu/fixture.h"
  12. #include <chrono>
  13. #include "test/common/benchmarker.h"
  14. #include "test/common/checker.h"
  15. #include "test/common/matrix_mul.h"
  16. using namespace megdnn;
  17. using namespace test;
  18. #if MEGDNN_WITH_BENCHMARK
  19. namespace {
  20. void sgemm_sgemv_like(
  21. const float* __restrict A, const float* __restrict B, float* __restrict C,
  22. size_t M, size_t N, size_t K, size_t Astride, size_t Bstride, size_t Cstride) {
  23. for (size_t m = 0; m < M; ++m) {
  24. memset(C + m * Cstride, 0, sizeof(float) * N);
  25. for (size_t k = 0; k < K; ++k)
  26. for (size_t n = 0; n < N; ++n) {
  27. C[m * Cstride + n] += A[m * Astride + k] * B[k * Bstride + n];
  28. }
  29. }
  30. }
  31. float benchmark_sgemm_sgemv_like(size_t M, size_t N, size_t K) {
  32. float *A = (float*)malloc(sizeof(float) * M * K),
  33. *B = (float*)malloc(sizeof(float) * K * N),
  34. *C = (float*)malloc(sizeof(float) * M * N);
  35. for (size_t i = 0; i < M * K; ++i)
  36. A[i] = (float)rand() / RAND_MAX;
  37. for (size_t i = 0; i < K * N; ++i)
  38. B[i] = (float)rand() / RAND_MAX;
  39. sgemm_sgemv_like(A, B, C, M, N, K, K, N, N);
  40. auto start = std::chrono::high_resolution_clock::now();
  41. for (size_t i = 0; i < 100; ++i) {
  42. sgemm_sgemv_like(A, B, C, M, N, K, K, N, N);
  43. }
  44. auto stop = std::chrono::high_resolution_clock::now();
  45. free(A);
  46. free(B);
  47. free(C);
  48. return std::chrono::duration_cast<std::chrono::milliseconds>(stop - start).count();
  49. }
  50. } // namespace
  51. TEST_F(CPU, BENCHMARK_MATRIX_MUL) {
  52. Benchmarker<MatrixMul> benchmarker(handle());
  53. benchmarker.set_times(100);
  54. benchmarker.set_display(false);
  55. auto run = [&](size_t M, size_t N, size_t K) {
  56. std::cout << M << "x" << N << "x" << K << " ";
  57. auto time_in_ms_megdnn = benchmarker.exec({{M, K}, {K, N}, {}});
  58. auto time_in_ms_our = benchmark_sgemm_sgemv_like(M, N, K);
  59. std::cout << "megdnn=" << (int)time_in_ms_megdnn
  60. << " sgemv_like=" << time_in_ms_our << std::endl;
  61. };
  62. for (size_t m = 1; m <= 8; m *= 2)
  63. for (size_t nk = 128; nk <= 1024; nk *= 2) {
  64. run(m, nk, nk);
  65. }
  66. }
  67. #endif
  68. TEST_F(CPU, MATRIX_MUL) {
  69. matrix_mul::check_matrix_mul(
  70. dtype::Float32{}, dtype::Float32{}, dtype::Float32{}, handle());
  71. }
  72. // vim: syntax=cpp.doxygen