You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

local.cpp 4.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. /**
  2. * \file dnn/test/arm_common/local.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/arm_common/fixture.h"
  12. #include "test/common/benchmarker.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/local.h"
  15. #include "test/common/timer.h"
  16. namespace megdnn {
  17. namespace test {
  18. using Param = param::Convolution;
  19. TEST_F(ARM_COMMON, LOCAL_FORWARD) {
  20. auto args = local::get_args();
  21. Checker<LocalForward> checker(handle());
  22. for (auto&& arg : args) {
  23. checker.set_param(arg.param).execs({arg.sshape(), arg.fshape(), arg.dshape()});
  24. }
  25. NormalRNG rng(10.f);
  26. checker.set_rng(0, &rng).set_rng(1, &rng);
  27. args = local::get_args_for_fp16();
  28. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  29. for (auto&& arg : args) {
  30. checker.set_dtype(0, dtype::Float16())
  31. .set_dtype(1, dtype::Float16())
  32. .set_dtype(2, dtype::Float16());
  33. checker.set_epsilon(1e-2);
  34. checker.set_param(arg.param).execs({arg.sshape(), arg.fshape(), arg.dshape()});
  35. }
  36. #endif
  37. }
  38. #if MEGDNN_WITH_BENCHMARK
  39. TEST_F(ARM_COMMON, BENCHMARK_LOCAL_FORWARD) {
  40. auto run = [&](const TensorShapeArray& shapes, Param param) {
  41. Benchmarker<LocalForward> benchmarker(handle());
  42. size_t RUN = 50;
  43. benchmarker.set_dtype(0, dtype::Float32())
  44. .set_dtype(1, dtype::Float32())
  45. .set_dtype(2, dtype::Float32());
  46. auto tfloat32 =
  47. benchmarker.set_display(true).set_times(RUN).set_param(param).exec(
  48. shapes);
  49. int N = shapes[0][0];
  50. int IC = shapes[0][1];
  51. int IH = shapes[0][2];
  52. int IW = shapes[0][3];
  53. int OH = shapes[1][0];
  54. int OW = shapes[1][1];
  55. int FH = shapes[1][3];
  56. int FW = shapes[1][4];
  57. int OC = shapes[1][5];
  58. std::cout << "LOCAL FORWARD, src: {" << N << ", " << IC << ", " << IH << ", "
  59. << IW << "}" << std::endl;
  60. std::cout << "LOCAL FORWARD, filter: {" << OH << ", " << OW << ", " << IC
  61. << ", " << FH << ", " << FW << ", " << OC << "}" << std::endl;
  62. std::cout << "LOCAL FORWARD (f32), bandwidth: "
  63. << (1.f * N * OC * OH * OW * FH * FW * IC + 1.f * N * IC * IH * IW) *
  64. sizeof(float) * 1e-9 / (tfloat32 / RUN * 1e-3)
  65. << "GBPS" << std::endl;
  66. #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
  67. benchmarker.set_dtype(0, dtype::Float16())
  68. .set_dtype(1, dtype::Float16())
  69. .set_dtype(2, dtype::Float16());
  70. auto tfloat16 =
  71. benchmarker.set_display(true).set_times(RUN).set_param(param).exec(
  72. shapes);
  73. std::cout << "LOCAL FORWARD (f16), bandwidth: "
  74. << (1.f * N * OC * OH * OW * FH * FW * IC + 1.f * N * IC * IH * IW) *
  75. sizeof(dt_float16) * 1e-9 / (tfloat16 / RUN * 1e-3)
  76. << "GBPS" << std::endl;
  77. #endif
  78. };
  79. Param param;
  80. param.mode = param::Convolution::Mode::CONVOLUTION;
  81. param.pad_h = param.pad_w = 1;
  82. param.stride_h = param.stride_w = 1;
  83. run({{1, 4, 320, 256}, {320, 256, 4, 3, 3, 24}, {}}, param);
  84. param.stride_h = param.stride_w = 2;
  85. run({{1, 4, 320, 256}, {160, 128, 4, 3, 3, 24}, {}}, param);
  86. param.pad_h = param.pad_w = 2;
  87. param.stride_h = param.stride_w = 1;
  88. run({{1, 4, 64, 64}, {64, 64, 4, 5, 5, 24}, {}}, param);
  89. param.stride_h = param.stride_w = 2;
  90. run({{1, 4, 64, 64}, {32, 32, 4, 5, 5, 24}, {}}, param);
  91. param.pad_h = param.pad_w = 3;
  92. param.stride_h = param.stride_w = 1;
  93. run({{1, 4, 64, 64}, {64, 64, 4, 7, 7, 24}, {}}, param);
  94. param.stride_h = param.stride_w = 2;
  95. run({{1, 4, 64, 64}, {32, 32, 4, 7, 7, 24}, {}}, param);
  96. param.pad_h = param.pad_w = 1;
  97. param.stride_h = param.stride_w = 1;
  98. run({{2, 128, 8, 8}, {8, 8, 128, 3, 3, 128}, {}}, param);
  99. run({{1, 16, 64, 64}, {64, 64, 16, 3, 3, 16}, {}}, param);
  100. }
  101. #endif
  102. } // namespace test
  103. } // namespace megdnn
  104. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台