You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

remap.cpp 17 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. /**
  2. * \file dnn/test/cuda/remap.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "test/common/remap.h"
  13. #include "test/common/benchmarker.h"
  14. #include "test/common/checker.h"
  15. #include "test/common/rng.h"
  16. #include "test/cuda/benchmark.h"
  17. #include "test/cuda/fixture.h"
  18. namespace megdnn {
  19. namespace test {
  20. namespace remap {
  21. TEST_F(CUDA, REMAP_NCHW_FLOAT) {
  22. Checker<Remap> checker(handle_cuda());
  23. std::vector<TestArg> args = get_nchw_args();
  24. UniformFloatRNG float_rng(0, 255);
  25. #define cb(data_type, data_rng) \
  26. for (auto arg : args) { \
  27. UniformFloatRNG map_rng( \
  28. -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
  29. checker.set_dtype(0, data_type) \
  30. .set_dtype(1, dtype::Float32()) \
  31. .set_dtype(2, data_type) \
  32. .set_rng(0, &data_rng) \
  33. .set_rng(1, &map_rng) \
  34. .set_rng(2, &data_rng) \
  35. .set_param(arg.param) \
  36. .execs({arg.src, arg.map_xy, arg.dst}); \
  37. }
  38. cb(dtype::Float32(), float_rng);
  39. cb(dtype::Float16(), float_rng);
  40. #undef cb
  41. #define cb(data_type, data_rng) \
  42. for (auto arg : args) { \
  43. UniformFloatRNG map_rng( \
  44. -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
  45. checker.set_dtype(0, data_type) \
  46. .set_dtype(1, dtype::Float32()) \
  47. .set_dtype(2, data_type) \
  48. .set_rng(0, &data_rng) \
  49. .set_rng(1, &map_rng) \
  50. .set_rng(2, &data_rng) \
  51. .set_param(arg.param) \
  52. .set_epsilon(1e-2) \
  53. .execs({arg.src, arg.map_xy, arg.dst}); \
  54. }
  55. cb(dtype::BFloat16(), float_rng);
  56. #undef cb
  57. }
  58. TEST_F(CUDA, REMAP_NCHW_INT) {
  59. Checker<Remap> checker(handle_cuda());
  60. std::vector<TestArg> args = get_nchw_args();
  61. UniformIntRNG uint8_rng(0, 255);
  62. UniformIntRNG int8_rng(-128, 127);
  63. #define cb(data_type, data_rng) \
  64. for (auto arg : args) { \
  65. UniformFloatRNG map_rng( \
  66. -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
  67. checker.set_dtype(0, data_type) \
  68. .set_dtype(1, dtype::Float32()) \
  69. .set_dtype(2, data_type) \
  70. .set_rng(0, &data_rng) \
  71. .set_rng(1, &map_rng) \
  72. .set_rng(2, &data_rng) \
  73. .set_epsilon(1) \
  74. .set_param(arg.param) \
  75. .execs({arg.src, arg.map_xy, arg.dst}); \
  76. }
  77. cb(dtype::Int8(), int8_rng);
  78. cb(dtype::Uint8(), uint8_rng);
  79. #undef cb
  80. }
  81. TEST_F(CUDA, REMAP_NHWC_FLOAT) {
  82. Checker<Remap> checker(handle_cuda());
  83. std::vector<TestArg> args = get_nhwc_args();
  84. UniformFloatRNG float_rng(0, 255);
  85. #define cb(data_type, data_rng) \
  86. for (auto arg : args) { \
  87. UniformFloatRNG map_rng( \
  88. -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
  89. checker.set_dtype(0, data_type) \
  90. .set_dtype(1, dtype::Float32()) \
  91. .set_dtype(2, data_type) \
  92. .set_rng(0, &data_rng) \
  93. .set_rng(1, &map_rng) \
  94. .set_rng(2, &data_rng) \
  95. .set_param(arg.param) \
  96. .execs({arg.src, arg.map_xy, arg.dst}); \
  97. }
  98. cb(dtype::Float32(), float_rng);
  99. cb(dtype::Float16(), float_rng);
  100. #undef cb
  101. #define cb(data_type, data_rng) \
  102. for (auto arg : args) { \
  103. UniformFloatRNG map_rng( \
  104. -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
  105. checker.set_dtype(0, data_type) \
  106. .set_dtype(1, dtype::Float32()) \
  107. .set_dtype(2, data_type) \
  108. .set_rng(0, &data_rng) \
  109. .set_rng(1, &map_rng) \
  110. .set_rng(2, &data_rng) \
  111. .set_param(arg.param) \
  112. .set_epsilon(1e-2) \
  113. .execs({arg.src, arg.map_xy, arg.dst}); \
  114. }
  115. cb(dtype::BFloat16(), float_rng);
  116. #undef cb
  117. }
  118. TEST_F(CUDA, REMAP_NHWC_INT) {
  119. Checker<Remap> checker(handle_cuda());
  120. std::vector<TestArg> args = get_nhwc_args();
  121. UniformIntRNG uint8_rng(0, 255);
  122. UniformIntRNG int8_rng(-128, 127);
  123. #define cb(data_type, data_rng) \
  124. for (auto arg : args) { \
  125. UniformFloatRNG map_rng( \
  126. -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
  127. checker.set_dtype(0, data_type) \
  128. .set_dtype(1, dtype::Float32()) \
  129. .set_dtype(2, data_type) \
  130. .set_rng(0, &data_rng) \
  131. .set_rng(1, &map_rng) \
  132. .set_rng(2, &data_rng) \
  133. .set_epsilon(1) \
  134. .set_param(arg.param) \
  135. .execs({arg.src, arg.map_xy, arg.dst}); \
  136. }
  137. cb(dtype::Int8(), int8_rng);
  138. cb(dtype::Uint8(), uint8_rng);
  139. #undef cb
  140. }
  141. TEST_F(CUDA, REMAP_BACKWARD_DATA) {
  142. Checker<RemapBackwardData> checker(handle_cuda());
  143. std::vector<TestArg> args = get_nchw_args();
  144. UniformFloatRNG float_rng(0, 255);
  145. #define cb(data_type, data_rng) \
  146. for (auto arg : args) { \
  147. UniformFloatRNG map_rng( \
  148. -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
  149. checker.set_dtype(1, data_type) \
  150. .set_dtype(0, dtype::Float32()) \
  151. .set_dtype(2, data_type) \
  152. .set_rng(1, &data_rng) \
  153. .set_rng(0, &map_rng) \
  154. .set_rng(2, &data_rng) \
  155. .set_param(arg.param) \
  156. .execs({arg.map_xy, arg.dst, arg.src}); \
  157. }
  158. cb(dtype::Float32(), float_rng);
  159. #undef cb
  160. #define cb(data_type, data_rng) \
  161. for (auto arg : args) { \
  162. UniformFloatRNG map_rng( \
  163. -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
  164. checker.set_dtype(1, data_type) \
  165. .set_dtype(0, dtype::Float32()) \
  166. .set_dtype(2, data_type) \
  167. .set_rng(1, &data_rng) \
  168. .set_rng(0, &map_rng) \
  169. .set_rng(2, &data_rng) \
  170. .set_param(arg.param) \
  171. .set_epsilon(1e-1) \
  172. .execs({arg.map_xy, arg.dst, arg.src}); \
  173. }
  174. cb(dtype::BFloat16(), float_rng);
  175. cb(dtype::Float16(), float_rng);
  176. #undef cb
  177. }
  178. TEST_F(CUDA, REMAP_BACKWARD_MAT) {
  179. Checker<RemapBackwardMat> checker(handle_cuda());
  180. std::vector<TestArg> args = get_nchw_args();
  181. UniformFloatRNG float_rng(0, 255);
  182. #define cb(data_type, data_rng) \
  183. for (auto arg : args) { \
  184. UniformFloatRNG map_rng( \
  185. -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
  186. checker.set_dtype(0, data_type) \
  187. .set_dtype(1, dtype::Float32()) \
  188. .set_dtype(2, data_type) \
  189. .set_dtype(3, dtype::Float32()) \
  190. .set_rng(0, &data_rng) \
  191. .set_rng(1, &map_rng) \
  192. .set_rng(2, &data_rng) \
  193. .set_rng(3, &map_rng) \
  194. .set_param(arg.param) \
  195. .set_epsilon(2e-2) \
  196. .execs({arg.src, arg.map_xy, arg.dst, arg.map_xy}); \
  197. }
  198. cb(dtype::Float32(), float_rng);
  199. #undef cb
  200. #define cb(data_type, data_rng) \
  201. for (auto arg : args) { \
  202. UniformFloatRNG map_rng( \
  203. -2, std::max(arg.map_xy.shape[2], arg.map_xy.shape[1]) + 2); \
  204. checker.set_dtype(0, data_type) \
  205. .set_dtype(1, dtype::Float32()) \
  206. .set_dtype(2, data_type) \
  207. .set_dtype(3, dtype::Float32()) \
  208. .set_rng(0, &data_rng) \
  209. .set_rng(1, &map_rng) \
  210. .set_rng(2, &data_rng) \
  211. .set_rng(3, &map_rng) \
  212. .set_param(arg.param) \
  213. .set_epsilon(1e-1) \
  214. .execs({arg.src, arg.map_xy, arg.dst, arg.map_xy}); \
  215. }
  216. cb(dtype::BFloat16(), float_rng);
  217. cb(dtype::Float16(), float_rng);
  218. #undef cb
  219. }
  220. #if MEGDNN_WITH_BENCHMARK
  221. TEST_F(CUDA, BENCHMARK_REMAP) {
  222. using Param = param::Remap;
  223. auto run = [&](const TensorShapeArray& shapes, Param param, DType dtype) {
  224. auto handle_cpu = create_cpu_handle(2);
  225. Benchmarker<Remap> benchmarker_naive(handle_cpu.get());
  226. CUBenchmarker<Remap> benchmarker_cuda(handle_cuda());
  227. UniformIntRNG rng(0, 0xff);
  228. UniformFloatRNG map_rng(
  229. -2, std::max(shapes[1].shape[1], shapes[1].shape[2]) + 2);
  230. benchmarker_naive.set_rng(0, &rng);
  231. benchmarker_cuda.set_rng(0, &rng);
  232. benchmarker_naive.set_rng(1, &map_rng);
  233. benchmarker_cuda.set_rng(1, &map_rng);
  234. benchmarker_naive.set_rng(2, &rng);
  235. benchmarker_cuda.set_rng(2, &rng);
  236. benchmarker_naive.set_dtype(1, dtype::Float32());
  237. benchmarker_cuda.set_dtype(1, dtype::Float32());
  238. benchmarker_naive.set_dtype(0, dtype).set_dtype(2, dtype);
  239. benchmarker_cuda.set_dtype(0, dtype).set_dtype(2, dtype);
  240. size_t RUN = 10;
  241. auto t1 = benchmarker_naive.set_display(false)
  242. .set_times(RUN)
  243. .set_param(param)
  244. .execs(shapes);
  245. auto t2 = benchmarker_cuda.set_display(false).set_param(param).execs(shapes);
  246. int size = 0;
  247. if (dtype == dtype::Float32{}) {
  248. size = sizeof(float);
  249. printf("float32: ");
  250. } else if (dtype == dtype::Float16{}) {
  251. size = sizeof(dt_float16);
  252. printf("float16: ");
  253. } else if (dtype == dtype::Int8{}) {
  254. size = sizeof(dt_int8);
  255. printf("int8: ");
  256. } else if (dtype == dtype::Uint8{}) {
  257. size = sizeof(dt_uint8);
  258. printf("uint8: ");
  259. }
  260. const TensorShape map_xy = shapes[1];
  261. const TensorShape dst_layout = shapes[2];
  262. float calc_amount = (dst_layout.total_nr_elems() * (4.f + 1.f) * size +
  263. map_xy.total_nr_elems() * sizeof(float)) /
  264. (1024 * 1024 * 1024);
  265. printf("naive={%.3fms, %.3fGBPS}, "
  266. "cuda={%.3fms, %.3fGBPS}\n",
  267. t1 / RUN, calc_amount / (t1 / RUN) * 1e3, t2, calc_amount / t2 * 1e3);
  268. };
  269. Param param;
  270. param.imode = param::Remap::InterpolationMode::LINEAR;
  271. param.format = param::Remap::Format::NHWC;
  272. param.border_type = param::Remap::BorderMode::CONSTANT;
  273. run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
  274. dtype::Float32{});
  275. run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
  276. dtype::Float16{});
  277. run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
  278. dtype::Uint8{});
  279. run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param, dtype::Int8{});
  280. param.border_type = param::Remap::BorderMode::REPLICATE;
  281. run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
  282. dtype::Float32{});
  283. run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
  284. dtype::Float16{});
  285. run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param,
  286. dtype::Uint8{});
  287. run({{4, 200, 300, 10}, {4, 200, 300, 2}, {4, 200, 300, 10}}, param, dtype::Int8{});
  288. param.format = param::Remap::Format::NCHW;
  289. param.border_type = param::Remap::BorderMode::CONSTANT;
  290. run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
  291. dtype::Float32{});
  292. run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
  293. dtype::Float16{});
  294. run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
  295. dtype::Uint8{});
  296. run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param, dtype::Int8{});
  297. param.border_type = param::Remap::BorderMode::REPLICATE;
  298. run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
  299. dtype::Float32{});
  300. run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
  301. dtype::Float16{});
  302. run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param,
  303. dtype::Uint8{});
  304. run({{4, 10, 200, 300}, {4, 200, 300, 2}, {4, 10, 200, 300}}, param, dtype::Int8{});
  305. }
  306. #endif
  307. } // namespace remap
  308. } // namespace test
  309. } // namespace megdnn
  310. // vim: syntax=cpp.doxygen