You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

convolution.cpp 5.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. /**
  2. * \file dnn/test/armv7/convolution.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/armv7/fixture.h"
  12. #include "test/common/convolution.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/benchmarker.h"
  15. #include "test/common/rng.h"
  16. using namespace megdnn;
  17. using namespace test;
  18. #if MEGDNN_WITH_BENCHMARK
  19. TEST_F(ARMV7, BENCHMARK_CONVOLUTION_STRIDE2)
  20. {
  21. using Param = param::Convolution;
  22. auto run = [&](const TensorShapeArray& shapes, Param param) {
  23. Benchmarker<Convolution> benchmarker_float(handle());
  24. size_t RUN = 100;
  25. auto tfloat = benchmarker_float.set_display(false)
  26. .set_times(RUN)
  27. .set_param(param)
  28. .exec(shapes);
  29. size_t IC = shapes[1][1];
  30. size_t FH = shapes[1][2];
  31. size_t FW = shapes[1][3];
  32. TensorLayout dst_layout;
  33. auto opr = handle()->create_operator<Convolution>();
  34. opr->param() = param;
  35. opr->deduce_layout({shapes[0], dtype::Float32()},
  36. {shapes[1], dtype::Float32()}, dst_layout);
  37. printf("flops: %.3f mflops\n",
  38. (IC * dst_layout.total_nr_elems() * FH * FW * 2) /
  39. (tfloat / RUN * 1000));
  40. };
  41. auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel,
  42. size_t stride) {
  43. Param param;
  44. param.stride_h = stride;
  45. param.stride_w = stride;
  46. param.pad_h = kernel / 2;
  47. param.pad_w = kernel / 2;
  48. printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n",
  49. oc, ic, w, h, stride, kernel);
  50. run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}},
  51. param);
  52. };
  53. for (size_t kernel : {2, 3, 5, 7}) {
  54. for (size_t ic : {3, 6, 12, 24}) {
  55. for (size_t oc : {3, 6, 12, 24}) {
  56. for (size_t size : {4, 7, 8, 14, 16, 17, 28, 32, 34, 64, 112}) {
  57. profile(oc, ic, size, size, kernel, 2);
  58. }
  59. }
  60. }
  61. }
  62. }
  63. #endif
  64. TEST_F(ARMV7, BENCHMARK_CONVOLUTION_1X1)
  65. {
  66. int exec_times = 50;
  67. Benchmarker<MatrixMul> benchmarker_gemm(handle());
  68. benchmarker_gemm.set_times(exec_times);
  69. Benchmarker<Convolution> benchmarker(handle());
  70. benchmarker.set_times(exec_times);
  71. float mod = 1000 * exec_times / 1e9;
  72. auto run = [&](size_t IC, size_t OC, size_t H, size_t W) {
  73. float time = 1.f, perf = 1.f;
  74. std::cout<<std::endl;
  75. std::cout<< "CONV: IC " << IC << ", OC " << OC <<
  76. ", H " << H << ", W " << W <<std::endl;
  77. time = benchmarker.exec({{1, IC, H, W}, {OC, IC, 1, 1}, {1, OC, H, W}});
  78. perf = OC * (2 * H * W - 1) * IC / time * mod;
  79. std::cout<<"Performance is " << perf <<" Gflops" <<std::endl;
  80. std::cout<<"GEMM: (" << OC <<", "<< H*W << ", " <<IC <<")"<<std::endl;
  81. //time = benchmarker_gemm.exec({{OC, H*W}, {H*W, IC}, {}});
  82. //perf = OC * (2 * H * W - 1) * IC / time * mod;
  83. time = benchmarker_gemm.exec({{OC, IC}, {IC, H*W}, {}});
  84. perf = OC * (2 * IC -1) * H * W / time * mod;
  85. std::cout<<"Performance is " << perf <<" Gflops" <<std::endl;
  86. };
  87. //run(32, 32, 64, 64);
  88. //run(8, 8, 32, 32);
  89. //run(32, 32, 128, 128);
  90. //run(32, 32, 512, 512);
  91. //run(10,10,2,5);
  92. //run(100,100,2,50);
  93. run(16,4,240,135);
  94. run(8,32,120,67);
  95. run(16,64,60,33);
  96. run(1,1,28,28);
  97. run(8,1,28,28);
  98. run(2,2,28,28);
  99. run(8,2,28,28);
  100. run(4,4,28,28);
  101. run(16,4,28,28);
  102. }
  103. TEST_F(ARMV7, BENCHMARK_GROUP_CONVOLUTION_1X1) {
  104. int exec_times = 50;
  105. Benchmarker<Convolution> benchmarker_gconv1x1(handle());
  106. benchmarker_gconv1x1.set_times(exec_times);
  107. float mod = 1000 * exec_times / 1e9;
  108. auto run = [&](size_t IC, size_t OC, size_t H, size_t W, size_t group){
  109. float time = 1.f, perf = 1.f;
  110. std::cout<<std::endl;
  111. std::cout<< "GCONV: IC " << IC << ", OC " << OC <<
  112. ", H " << H << ", W " << W <<", GROUP "<<group << std::endl;
  113. auto ICg = IC / group;
  114. auto OCg = OC / group;
  115. param::Convolution param;
  116. param.sparse = param::Convolution::Sparse::GROUP;
  117. time = benchmarker_gconv1x1.set_param(param).exec({{1, IC, H, W},
  118. {group, OCg, ICg, 1, 1},{}});
  119. perf = group * OCg * ICg * H * W / time * mod;
  120. std::cout<<"Performance is " << perf <<" Gflops" <<std::endl;
  121. };
  122. run(8*4, 1*4, 28, 28, 4);
  123. run(2*4, 2*4, 28, 28, 4);
  124. run(8*4, 2*4, 28, 28, 4);
  125. run(4*4, 4*4, 28, 28, 4);
  126. run(16*4, 4*4, 28, 28, 4);
  127. }
  128. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台