You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

matrix_mul_int_8x8x16.cpp 4.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. /**
  2. * \file dnn/test/cpu/matrix_mul_int_8x8x16.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/cpu/fixture.h"
  12. #include "test/common/convolution.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/benchmarker.h"
  15. namespace megdnn {
  16. namespace test {
  17. TEST_F(CPU, MATRIX_MUL_INT_8_8_16)
  18. {
  19. Checker<MatrixMul> checker(handle());
  20. param::MatrixMul param;
  21. checker.set_dtype(0, dtype::Int8());
  22. checker.set_dtype(1, dtype::Int8());
  23. checker.set_dtype(2, dtype::Int16());
  24. checker.set_param(param);
  25. for (size_t b: {1, 2, 3})
  26. for (size_t i: {10, 20})
  27. for (size_t o: {11, 22})
  28. {
  29. checker.exec({{b, i}, {i, o}, {}});
  30. }
  31. for (size_t m = 16; m <= 512; m*=4)
  32. for (size_t n = 16; n <= 512; n*=4)
  33. for (size_t k = 16; k <= 512; k*=4)
  34. {
  35. checker.exec({{m, k}, {k, n}, {}});
  36. checker.exec({{m + 1, k}, {k, n}, {}});
  37. checker.exec({{m + 5, k}, {k, n}, {}});
  38. checker.exec({{m + 7, k}, {k, n}, {}});
  39. checker.exec({{m, k}, {k, n + 15}, {}});
  40. checker.exec({{m, k}, {k, n + 9}, {}});
  41. checker.exec({{m, k}, {k, n + 8}, {}});
  42. checker.exec({{m, k}, {k, n + 7}, {}});
  43. checker.exec({{m, k}, {k, n + 1}, {}});
  44. checker.exec({{m+1, k}, {k, n + 9}, {}});
  45. checker.exec({{m+7, k}, {k, n + 15}, {}});
  46. checker.exec({{m+7, k}, {k, n + 7}, {}});
  47. }
  48. // test transpose scenerio
  49. {
  50. for (int mask = 0; mask < 4; ++mask) {
  51. param::MatrixMul param;
  52. param.transposeA = (mask & 1);
  53. param.transposeB = (mask & 2);
  54. checker.set_param(param);
  55. size_t m = 100, n = 101, k = 102;
  56. TensorShape A = param.transposeA ?
  57. TensorShape({k, m}) : TensorShape({m, k});
  58. TensorShape B = param.transposeB ?
  59. TensorShape({n, k}) : TensorShape({k, n});
  60. checker.exec({A, B, {}});
  61. }
  62. }
  63. }
  64. #if MEGDNN_WITH_BENCHMARK
  65. TEST_F(CPU, BENCHMARK_MATRIX_MUL_INT8_INT8_INT16)
  66. {
  67. bool verbose = getenv("MEGDNN_BENCH_VERBOSE");
  68. using Param = param::MatrixMul;
  69. double speedup_sum = 0, speedup_wsum = 0;
  70. auto run = [&](const TensorShapeArray &shapes,
  71. const Param& param) {
  72. TensorLayoutArray layouts;
  73. layouts.emplace_back(shapes[0], dtype::Int8());
  74. layouts.emplace_back(shapes[1], dtype::Int8());
  75. layouts.emplace_back(shapes[2], dtype::Int16());
  76. Benchmarker<MatrixMul>
  77. benchmarker_cpu(handle());
  78. param::MatrixMul param_int(param);
  79. benchmarker_cpu.set_param(param_int);
  80. Benchmarker<MatrixMul> benchmarker_float(handle());
  81. benchmarker_float.set_param(param);
  82. auto t2 = benchmarker_cpu.set_display(false).
  83. set_adaptive_benchmark(0.01).execl(layouts);
  84. auto t4 = benchmarker_float.set_display(false).
  85. set_adaptive_benchmark(0.01).exec(shapes);
  86. if (t2 > t4 || verbose) {
  87. std::cout << "MatA=" << shapes[0].to_string()
  88. << " MatB=" << shapes[1].to_string()
  89. << " float=" << t4 << "ms"
  90. << " int=" << t2 << "ms"
  91. << " speedup=" << t4/t2 << std::endl;
  92. }
  93. speedup_sum += t4 / t2;
  94. speedup_wsum += 1;
  95. };
  96. for (size_t m = 16; m <= 256; m*=4)
  97. for (size_t k = 16; k <= 256; k*=4)
  98. for (size_t n = 16; n <= 1024; n*=4)
  99. {
  100. Param param;
  101. run({{m, k}, {k, n}, {}}, param);
  102. run({{m, k}, {k, n + 8}, {}}, param);
  103. run({{m, k}, {k, n + 15}, {}}, param);
  104. run({{m + 5, k}, {k, n}, {}}, param);
  105. run({{m + 7, k}, {k, n}, {}}, param);
  106. }
  107. printf("average speedup: %.3f\n", speedup_sum / speedup_wsum);
  108. }
  109. #endif
  110. } // namespace test
  111. } // namespace megdnn
  112. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台