You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

remap.cpp 9.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. /**
  2. * \file dnn/test/cuda/remap.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "test/common/remap.h"
  13. #include "test/common/benchmarker.h"
  14. #include "test/common/checker.h"
  15. #include "test/common/rng.h"
  16. #include "test/cuda/benchmark.h"
  17. #include "test/cuda/fixture.h"
  18. namespace megdnn {
  19. namespace test {
  20. namespace remap {
  21. TEST_F(CUDA, REMAP_NCHW_FLOAT) {
  22. Checker<Remap> checker(handle_cuda());
  23. std::vector<TestArg> args = get_nchw_args();
  24. UniformFloatRNG float_rng(0, 255);
  25. #define cb(data_type, data_rng) \
  26. for (auto arg : args) { \
  27. UniformFloatRNG map_rng( \
  28. -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
  29. checker.set_dtype(0, data_type) \
  30. .set_dtype(1, dtype::Float32()) \
  31. .set_dtype(2, data_type) \
  32. .set_rng(0, &data_rng) \
  33. .set_rng(1, &map_rng) \
  34. .set_rng(2, &data_rng) \
  35. .set_param(arg.param) \
  36. .execs({arg.src, arg.map_xy, arg.dst}); \
  37. }
  38. cb(dtype::Float32(), float_rng);
  39. cb(dtype::Float16(), float_rng);
  40. #undef cb
  41. }
  42. TEST_F(CUDA, REMAP_NCHW_INT) {
  43. Checker<Remap> checker(handle_cuda());
  44. std::vector<TestArg> args = get_nchw_args();
  45. UniformIntRNG uint8_rng(0, 255);
  46. UniformIntRNG int8_rng(-128, 127);
  47. #define cb(data_type, data_rng) \
  48. for (auto arg : args) { \
  49. UniformFloatRNG map_rng( \
  50. -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
  51. checker.set_dtype(0, data_type) \
  52. .set_dtype(1, dtype::Float32()) \
  53. .set_dtype(2, data_type) \
  54. .set_rng(0, &data_rng) \
  55. .set_rng(1, &map_rng) \
  56. .set_rng(2, &data_rng) \
  57. .set_epsilon(1) \
  58. .set_param(arg.param) \
  59. .execs({arg.src, arg.map_xy, arg.dst}); \
  60. }
  61. cb(dtype::Int8(), int8_rng);
  62. cb(dtype::Uint8(), uint8_rng);
  63. #undef cb
  64. }
  65. TEST_F(CUDA, REMAP_NHWC_FLOAT) {
  66. Checker<Remap> checker(handle_cuda());
  67. std::vector<TestArg> args = get_nhwc_args();
  68. UniformFloatRNG float_rng(0, 255);
  69. #define cb(data_type, data_rng) \
  70. for (auto arg : args) { \
  71. UniformFloatRNG map_rng( \
  72. -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
  73. checker.set_dtype(0, data_type) \
  74. .set_dtype(1, dtype::Float32()) \
  75. .set_dtype(2, data_type) \
  76. .set_rng(0, &data_rng) \
  77. .set_rng(1, &map_rng) \
  78. .set_rng(2, &data_rng) \
  79. .set_param(arg.param) \
  80. .execs({arg.src, arg.map_xy, arg.dst}); \
  81. }
  82. cb(dtype::Float32(), float_rng);
  83. cb(dtype::Float16(), float_rng);
  84. #undef cb
  85. }
  86. TEST_F(CUDA, REMAP_NHWC_INT) {
  87. Checker<Remap> checker(handle_cuda());
  88. std::vector<TestArg> args = get_nhwc_args();
  89. UniformIntRNG uint8_rng(0, 255);
  90. UniformIntRNG int8_rng(-128, 127);
  91. #define cb(data_type, data_rng) \
  92. for (auto arg : args) { \
  93. UniformFloatRNG map_rng( \
  94. -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
  95. checker.set_dtype(0, data_type) \
  96. .set_dtype(1, dtype::Float32()) \
  97. .set_dtype(2, data_type) \
  98. .set_rng(0, &data_rng) \
  99. .set_rng(1, &map_rng) \
  100. .set_rng(2, &data_rng) \
  101. .set_epsilon(1) \
  102. .set_param(arg.param) \
  103. .execs({arg.src, arg.map_xy, arg.dst}); \
  104. }
  105. cb(dtype::Int8(), int8_rng);
  106. cb(dtype::Uint8(), uint8_rng);
  107. #undef cb
  108. }
  109. #if MEGDNN_WITH_BENCHMARK
  110. TEST_F(CUDA, BENCHMARK_REMAP) {
  111. using Param = param::Remap;
  112. auto run = [&](const TensorShapeArray& shapes, Param param, DType dtype) {
  113. auto handle_cpu = create_cpu_handle(2);
  114. Benchmarker<Remap> benchmarker_naive(handle_cpu.get());
  115. CUBenchmarker<Remap> benchmarker_cuda(handle_cuda());
  116. UniformIntRNG rng(0, 0xff);
  117. UniformFloatRNG map_rng(
  118. -2, std::max(shapes[1].shape[1], shapes[1].shape[2]) + 2);
  119. benchmarker_naive.set_rng(0, &rng);
  120. benchmarker_cuda.set_rng(0, &rng);
  121. benchmarker_naive.set_rng(1, &map_rng);
  122. benchmarker_cuda.set_rng(1, &map_rng);
  123. benchmarker_naive.set_rng(2, &rng);
  124. benchmarker_cuda.set_rng(2, &rng);
  125. benchmarker_naive.set_dtype(1, dtype::Float32());
  126. benchmarker_cuda.set_dtype(1, dtype::Float32());
  127. benchmarker_naive.set_dtype(0, dtype).set_dtype(2, dtype);
  128. benchmarker_cuda.set_dtype(0, dtype).set_dtype(2, dtype);
  129. size_t RUN = 10;
  130. auto t1 = benchmarker_naive.set_display(false)
  131. .set_times(RUN)
  132. .set_param(param)
  133. .execs(shapes);
  134. auto t2 = benchmarker_cuda.set_display(false).set_param(param).execs(
  135. shapes);
  136. const TensorShape dst_layout = shapes[2];
  137. float calc_amount = dst_layout.total_nr_elems();
  138. printf("naive={%.3fms, %.3fMflops}, "
  139. "cuda={%.3fms, %.3fMflops}\n",
  140. t1 / RUN, calc_amount / (t1 / RUN * 1000), t2,
  141. calc_amount / (t2 * 1000));
  142. };
  143. Param param;
  144. param.imode = param::Remap::InterpolationMode::LINEAR;
  145. param.format = param::Remap::Format::NHWC;
  146. param.border_type = param::Remap::BorderMode::CONSTANT;
  147. run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
  148. dtype::Float32{});
  149. run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
  150. dtype::Float16{});
  151. run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
  152. dtype::Uint8{});
  153. run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
  154. dtype::Int8{});
  155. param.border_type = param::Remap::BorderMode::REPLICATE;
  156. run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
  157. dtype::Float32{});
  158. run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
  159. dtype::Float16{});
  160. run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
  161. dtype::Uint8{});
  162. run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
  163. dtype::Int8{});
  164. param.format = param::Remap::Format::NCHW;
  165. param.border_type = param::Remap::BorderMode::CONSTANT;
  166. run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
  167. dtype::Float32{});
  168. run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
  169. dtype::Float16{});
  170. run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
  171. dtype::Uint8{});
  172. run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
  173. dtype::Int8{});
  174. param.border_type = param::Remap::BorderMode::REPLICATE;
  175. run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
  176. dtype::Float32{});
  177. run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
  178. dtype::Float16{});
  179. run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
  180. dtype::Uint8{});
  181. run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
  182. dtype::Int8{});
  183. }
  184. #endif
  185. } // namespace remap
  186. } // namespace test
  187. } // namespace megdnn
  188. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台