You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

matrix_mul_int_8x8x16.cpp 4.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. #include "test/cpu/fixture.h"
  2. #include "test/common/benchmarker.h"
  3. #include "test/common/checker.h"
  4. #include "test/common/convolution.h"
  5. namespace megdnn {
  6. namespace test {
  7. TEST_F(CPU, MATRIX_MUL_INT_8_8_16) {
  8. Checker<MatrixMul> checker(handle());
  9. param::MatrixMul param;
  10. checker.set_dtype(0, dtype::Int8());
  11. checker.set_dtype(1, dtype::Int8());
  12. checker.set_dtype(2, dtype::Int16());
  13. checker.set_param(param);
  14. for (size_t b : {1, 2, 3})
  15. for (size_t i : {10, 20})
  16. for (size_t o : {11, 22}) {
  17. checker.exec({{b, i}, {i, o}, {}});
  18. }
  19. for (size_t m = 16; m <= 512; m *= 4)
  20. for (size_t n = 16; n <= 512; n *= 4)
  21. for (size_t k = 16; k <= 512; k *= 4) {
  22. checker.exec({{m, k}, {k, n}, {}});
  23. checker.exec({{m + 1, k}, {k, n}, {}});
  24. checker.exec({{m + 5, k}, {k, n}, {}});
  25. checker.exec({{m + 7, k}, {k, n}, {}});
  26. checker.exec({{m, k}, {k, n + 15}, {}});
  27. checker.exec({{m, k}, {k, n + 9}, {}});
  28. checker.exec({{m, k}, {k, n + 8}, {}});
  29. checker.exec({{m, k}, {k, n + 7}, {}});
  30. checker.exec({{m, k}, {k, n + 1}, {}});
  31. checker.exec({{m + 1, k}, {k, n + 9}, {}});
  32. checker.exec({{m + 7, k}, {k, n + 15}, {}});
  33. checker.exec({{m + 7, k}, {k, n + 7}, {}});
  34. }
  35. // test transpose scenerio
  36. {
  37. for (int mask = 0; mask < 4; ++mask) {
  38. param::MatrixMul param;
  39. param.transposeA = (mask & 1);
  40. param.transposeB = (mask & 2);
  41. checker.set_param(param);
  42. size_t m = 100, n = 101, k = 102;
  43. TensorShape A =
  44. param.transposeA ? TensorShape({k, m}) : TensorShape({m, k});
  45. TensorShape B =
  46. param.transposeB ? TensorShape({n, k}) : TensorShape({k, n});
  47. checker.exec({A, B, {}});
  48. }
  49. }
  50. }
  51. #if MEGDNN_WITH_BENCHMARK
  52. TEST_F(CPU, BENCHMARK_MATRIX_MUL_INT8_INT8_INT16) {
  53. bool verbose = getenv("MEGDNN_BENCH_VERBOSE");
  54. using Param = param::MatrixMul;
  55. double speedup_sum = 0, speedup_wsum = 0;
  56. auto run = [&](const TensorShapeArray& shapes, const Param& param) {
  57. TensorLayoutArray layouts;
  58. layouts.emplace_back(shapes[0], dtype::Int8());
  59. layouts.emplace_back(shapes[1], dtype::Int8());
  60. layouts.emplace_back(shapes[2], dtype::Int16());
  61. Benchmarker<MatrixMul> benchmarker_cpu(handle());
  62. param::MatrixMul param_int(param);
  63. benchmarker_cpu.set_param(param_int);
  64. Benchmarker<MatrixMul> benchmarker_float(handle());
  65. benchmarker_float.set_param(param);
  66. auto t2 = benchmarker_cpu.set_display(false).set_adaptive_benchmark(0.01).execl(
  67. layouts);
  68. auto t4 =
  69. benchmarker_float.set_display(false).set_adaptive_benchmark(0.01).exec(
  70. shapes);
  71. if (t2 > t4 || verbose) {
  72. std::cout << "MatA=" << shapes[0].to_string()
  73. << " MatB=" << shapes[1].to_string() << " float=" << t4 << "ms"
  74. << " int=" << t2 << "ms"
  75. << " speedup=" << t4 / t2 << std::endl;
  76. }
  77. speedup_sum += t4 / t2;
  78. speedup_wsum += 1;
  79. };
  80. for (size_t m = 16; m <= 256; m *= 4)
  81. for (size_t k = 16; k <= 256; k *= 4)
  82. for (size_t n = 16; n <= 1024; n *= 4) {
  83. Param param;
  84. run({{m, k}, {k, n}, {}}, param);
  85. run({{m, k}, {k, n + 8}, {}}, param);
  86. run({{m, k}, {k, n + 15}, {}}, param);
  87. run({{m + 5, k}, {k, n}, {}}, param);
  88. run({{m + 7, k}, {k, n}, {}}, param);
  89. }
  90. printf("average speedup: %.3f\n", speedup_sum / speedup_wsum);
  91. }
  92. #endif
  93. } // namespace test
  94. } // namespace megdnn
  95. // vim: syntax=cpp.doxygen