You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

warp_affine.cpp 4.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. /**
  2. * \file dnn/test/cuda/warp_affine.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "test/cuda/fixture.h"
  12. #include "test/common/warp_affine.h"
  13. #include "test/common/checker.h"
  14. #include "test/common/benchmarker.h"
  15. #include "include/megdnn/thin/small_vector.h"
  16. namespace megdnn {
  17. namespace test {
  18. // FIXME test WARP_PERSPECTIVE_CV failed here
  19. #if 0
  20. TEST_F(CUDA, WARP_AFFINE_CV)
  21. {
  22. using namespace warp_affine;
  23. std::vector<TestArg> args = get_cv_args();
  24. Checker<WarpAffine> checker(handle_cuda());
  25. for (auto &&arg: args) {
  26. if (arg.src[3] == 2) continue;
  27. checker.set_param(arg.param)
  28. .set_epsilon(1 + 1e-3)
  29. .set_dtype(0, dtype::Uint8())
  30. .set_dtype(1, dtype::Float32())
  31. .set_dtype(2, dtype::Uint8())
  32. .execs({arg.src, arg.trans, arg.dst});
  33. }
  34. for (auto &&arg: args) {
  35. if (arg.src[3] == 2) continue;
  36. checker.set_param(arg.param)
  37. .set_dtype(0, dtype::Float32())
  38. .set_dtype(1, dtype::Float32())
  39. .set_dtype(2, dtype::Float32())
  40. .execs({arg.src, arg.trans, arg.dst});
  41. }
  42. }
  43. #endif
  44. TEST_F(CUDA, WARP_AFFINE) {
  45. //! NCHW
  46. for (auto dtype : std::vector<DType>{dtype::Float32()}) {
  47. for (auto bmode :
  48. {WarpAffine::BorderMode::WRAP, WarpAffine::BorderMode::REFLECT,
  49. WarpAffine::BorderMode::CONSTANT,
  50. WarpAffine::BorderMode::REPLICATE,
  51. WarpAffine::BorderMode::CONSTANT}) {
  52. Checker<WarpAffine> checker(handle_cuda());
  53. NormalRNG rng;
  54. checker.set_rng(1, &rng);
  55. WarpAffine::Param param;
  56. param.border_val = 0.3f;
  57. param.border_mode = bmode;
  58. param.imode = param::WarpAffine::InterpolationMode::LINEAR;
  59. param.format = param::WarpAffine::Format::NCHW;
  60. checker.set_param(param);
  61. checker.set_dtype(0, dtype);
  62. checker.set_dtype(1, dtype);
  63. checker.set_dtype(2, dtype);
  64. checker.execs({{2, 3, 10, 11}, {2, 2, 3}, {2, 3, 11, 12}});
  65. checker.execs({{22, 3, 10, 11}, {22, 2, 3}, {22, 3, 11, 12}});
  66. }
  67. }
  68. //! NHWC
  69. for (auto dtype : std::vector<DType>{dtype::Float32()}) {
  70. for (auto bmode :
  71. {WarpAffine::BorderMode::WRAP, WarpAffine::BorderMode::REFLECT,
  72. WarpAffine::BorderMode::CONSTANT,
  73. WarpAffine::BorderMode::REPLICATE,
  74. WarpAffine::BorderMode::CONSTANT}) {
  75. Checker<WarpAffine> checker(handle_cuda());
  76. NormalRNG rng;
  77. checker.set_rng(1, &rng);
  78. WarpAffine::Param param;
  79. param.format = param::WarpAffine::Format::NHWC;
  80. param.border_val = 0.3f;
  81. param.border_mode = bmode;
  82. param.imode = param::WarpAffine::InterpolationMode::LINEAR;
  83. checker.set_param(param);
  84. checker.set_dtype(0, dtype);
  85. checker.set_dtype(1, dtype);
  86. checker.set_dtype(2, dtype);
  87. checker.execs({{2, 3, 10, 11}, {2, 2, 3}, {2, 12, 11, 11}});
  88. checker.execs({{22, 3, 10, 12}, {22, 2, 3}, {22, 3, 11, 12}});
  89. }
  90. }
  91. }
  92. #if MEGDNN_WITH_BENCHMARK
  93. TEST_F(CUDA, WARP_AFFINE_BENCHMARK) {
  94. const size_t RUNS = 50;
  95. Benchmarker<WarpAffine> benchmark(handle_cuda());
  96. benchmark.set_display(false);
  97. benchmark.set_times(RUNS);
  98. using Param = param::WarpAffine;
  99. Param param;
  100. auto run = [&benchmark, &param](TensorShape src, TensorShape mat,
  101. TensorShape dst) {
  102. benchmark.set_param(param);
  103. auto used = benchmark.execs({src, mat, dst});
  104. printf("src: %s dst: %s used: %.2f ms %.2f Gflops\n",
  105. src.to_string().c_str(), dst.to_string().c_str(), used,
  106. //! 8 mul + 3 add
  107. 11 * dst.total_nr_elems() / (used / RUNS) / 1e6);
  108. };
  109. run({1, 100, 100, 1}, {1, 2, 3}, {1, 112, 112, 1});
  110. run({512, 100, 100, 1}, {512, 2, 3}, {512, 112, 112, 1});
  111. }
  112. #endif
  113. } // namespace test
  114. } // namespace megdnn
  115. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台