You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

norm.cpp 10 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. #include "test/common/norm.h"
  2. #include "megdnn/dtype.h"
  3. #include "megdnn/oprs.h"
  4. #include "test/common/checker.h"
  5. // #include "test/naive/fixture.h"
  6. // #include "test/common/benchmarker.h"
  7. #include <iostream>
  8. #include "test/cuda/benchmark.h"
  9. #include "test/cuda/fixture.h"
  10. #include "test/cuda/utils.h"
  11. namespace megdnn {
  12. namespace test {
  13. // CORRECT
  14. // L2, fp32, dim
  15. TEST_F(CUDA, L2NORM_FP32_DIM0) {
  16. Checker<Norm> checker(handle_cuda());
  17. Norm::Param param;
  18. param.p = 2;
  19. param.dim = 0;
  20. checker.set_param(param);
  21. checker.exect(
  22. Testcase{
  23. TensorValue(
  24. {1, 2, 3, 4}, dtype::Float32(),
  25. {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
  26. 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
  27. {}},
  28. Testcase{
  29. {},
  30. TensorValue(
  31. {1, 2, 3, 4}, dtype::Float32(),
  32. {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
  33. 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
  34. });
  35. }
  36. TEST_F(CUDA, L2NORM_FP32_DIM1) {
  37. Checker<Norm> checker(handle_cuda());
  38. Norm::Param param;
  39. param.p = 2;
  40. param.dim = 1;
  41. checker.set_param(param);
  42. checker.exect(
  43. Testcase{
  44. TensorValue(
  45. {1, 2, 3, 4}, dtype::Float32(),
  46. {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
  47. 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
  48. {}},
  49. Testcase{
  50. {},
  51. TensorValue(
  52. {1, 1, 3, 4}, dtype::Float32(),
  53. {12.000, 13.0384, 14.1421, 15.2971, 16.4924, 17.7200,
  54. 18.9737, 20.2485, 21.5407, 22.8473, 24.1661, 25.4951}),
  55. });
  56. }
  57. TEST_F(CUDA, L2NORM_FP32_DIM3) {
  58. Checker<Norm> checker(handle_cuda());
  59. Norm::Param param;
  60. param.p = 2;
  61. param.dim = 3;
  62. checker.set_param(param).exect(
  63. Testcase{
  64. TensorValue(
  65. {1, 2, 3, 4}, dtype::Float32(),
  66. {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
  67. 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
  68. {}},
  69. Testcase{
  70. {},
  71. TensorValue(
  72. {1, 2, 3, 1}, dtype::Float32(),
  73. {3.7417, 11.2250, 19.1311, 27.0924, 35.0714, 43.0581})});
  74. }
  75. // TODO: support -1 dim param, or test for assert
  76. // l2, fp16
  77. TEST_F(CUDA, L2NORM_FP16_DIM3) {
  78. Checker<Norm> checker(handle_cuda());
  79. Norm::Param param;
  80. param.p = 2;
  81. param.dim = 3;
  82. checker.set_param(param).exect(
  83. Testcase{
  84. TensorValue(
  85. {1, 2, 3, 4}, dtype::Float16(),
  86. {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
  87. 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
  88. {}},
  89. Testcase{
  90. {},
  91. TensorValue(
  92. {1, 2, 3, 1}, dtype::Float16(),
  93. {3.7422, 11.2266, 19.1250, 27.0938, 35.0625, 43.0625})});
  94. }
  95. // l1, fp32,fp16
  96. TEST_F(CUDA, L1NORM_FP32_DIM3) {
  97. Checker<Norm> checker(handle_cuda());
  98. Norm::Param param;
  99. param.p = 1;
  100. param.dim = 3;
  101. checker.set_param(param).exect(
  102. Testcase{
  103. TensorValue(
  104. {1, 2, 3, 4}, dtype::Float32(),
  105. {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
  106. 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
  107. {}},
  108. Testcase{
  109. {},
  110. TensorValue(
  111. {1, 2, 3, 1}, dtype::Float32(), {6, 22, 38, 54, 70, 86}),
  112. });
  113. }
  114. TEST_F(CUDA, L1NORM_FP16_DIM3) {
  115. Checker<Norm> checker(handle_cuda());
  116. Norm::Param param;
  117. param.p = 1;
  118. param.dim = 3;
  119. checker.set_param(param).exect(
  120. Testcase{
  121. TensorValue(
  122. {1, 2, 3, 4}, dtype::Float16(),
  123. {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
  124. 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
  125. {}},
  126. Testcase{
  127. {},
  128. TensorValue(
  129. {1, 2, 3, 1}, dtype::Float16(), {6, 22, 38, 54, 70, 86}),
  130. });
  131. }
  132. // l0, fp32,fp16
  133. TEST_F(CUDA, L0NORM_FP32_DIM3) {
  134. Checker<Norm> checker(handle_cuda());
  135. Norm::Param param;
  136. param.p = 0;
  137. param.dim = 3;
  138. checker.set_param(param).exect(
  139. Testcase{
  140. TensorValue(
  141. {1, 2, 3, 4}, dtype::Float32(),
  142. {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
  143. 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
  144. {}},
  145. Testcase{
  146. {},
  147. TensorValue({1, 2, 3, 1}, dtype::Float32(), {3, 4, 4, 4, 4, 4}),
  148. });
  149. }
  150. TEST_F(CUDA, L0NORM_FP16_DIM3) {
  151. Checker<Norm> checker(handle_cuda());
  152. Norm::Param param;
  153. param.p = 0;
  154. param.dim = 3;
  155. checker.set_param(param).exect(
  156. Testcase{
  157. TensorValue(
  158. {1, 2, 3, 4}, dtype::Float16(),
  159. {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
  160. 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
  161. {}},
  162. Testcase{
  163. {},
  164. TensorValue({1, 2, 3, 1}, dtype::Float16(), {3, 4, 4, 4, 4, 4}),
  165. });
  166. }
  167. // inf
  168. TEST_F(CUDA, INF_NORM_FP32_DIM3) {
  169. Checker<Norm> checker(handle_cuda());
  170. Norm::Param param;
  171. using Mode = Norm::Param::Mode;
  172. param.dim = 3;
  173. param.mode = Mode::INF_NORM;
  174. checker.set_param(param).exect(
  175. Testcase{
  176. TensorValue(
  177. {1, 2, 3, 4}, dtype::Float32(),
  178. {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
  179. 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
  180. {}},
  181. Testcase{
  182. {},
  183. TensorValue({1, 2, 3, 1}, dtype::Float32(), {3, 7, 11, 15, 19, 23}),
  184. });
  185. }
  186. TEST_F(CUDA, INF_NORM_FP16_DIM3) {
  187. Checker<Norm> checker(handle_cuda());
  188. Norm::Param param;
  189. using Mode = Norm::Param::Mode;
  190. param.dim = 3;
  191. param.mode = Mode::INF_NORM;
  192. checker.set_param(param).exect(
  193. Testcase{
  194. TensorValue(
  195. {1, 2, 3, 4}, dtype::Float16(),
  196. {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
  197. 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
  198. {}},
  199. Testcase{
  200. {},
  201. TensorValue({1, 2, 3, 1}, dtype::Float16(), {3, 7, 11, 15, 19, 23}),
  202. });
  203. }
  204. // -inf
  205. TEST_F(CUDA, NEG_INF_NORM_FP32_DIM3) {
  206. Checker<Norm> checker(handle_cuda());
  207. Norm::Param param;
  208. param.mode = Norm::Param::Mode::NEG_INF_NORM;
  209. param.dim = 3;
  210. checker.set_param(param).exect(
  211. Testcase{
  212. TensorValue(
  213. {1, 2, 3, 4}, dtype::Float32(),
  214. {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
  215. 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
  216. {}},
  217. Testcase{
  218. {},
  219. TensorValue({1, 2, 3, 1}, dtype::Float32(), {0, 4, 8, 12, 16, 20}),
  220. });
  221. }
  222. TEST_F(CUDA, NEG_INF_NORM_FP16_DIM3) {
  223. Checker<Norm> checker(handle_cuda());
  224. Norm::Param param;
  225. param.mode = Norm::Param::Mode::NEG_INF_NORM;
  226. param.dim = 3;
  227. checker.set_param(param).exect(
  228. Testcase{
  229. TensorValue(
  230. {1, 2, 3, 4}, dtype::Float16(),
  231. {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
  232. 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}),
  233. {}},
  234. Testcase{
  235. {},
  236. TensorValue({1, 2, 3, 1}, dtype::Float16(), {0, 4, 8, 12, 16, 20}),
  237. });
  238. }
  239. // PERF
  240. TEST_F(CUDA, L2NORM_SPEED_FP32) {
  241. auto benchmarker = Benchmarker<Norm>(handle_cuda());
  242. benchmarker.set_dtype(0, dtype::Float32());
  243. benchmarker.set_dtype(1, dtype::Float32());
  244. Norm::Param param;
  245. param.mode = Norm::Param::Mode::P_NORM;
  246. param.dim = 0;
  247. param.p = 2;
  248. SmallVector<TensorShape> shapes{{4194304}, {}};
  249. NormalRNG rng(0, 1);
  250. float eachTime;
  251. float totalTime = 0.f;
  252. #define ITER 10
  253. for (auto i = 0; i < ITER; i++) {
  254. eachTime = benchmarker.set_param(param).set_rng(0, &rng).exec(shapes);
  255. // printf("PNORM_SPEED_FP32 cuda time: %.6fms\n", eachTime);
  256. totalTime += eachTime;
  257. }
  258. totalTime /= ITER;
  259. printf("PNORM_SPEED_FP32 AVG TIME: %.6fms\n", totalTime);
  260. #undef ITER
  261. }
  262. TEST_F(CUDA, INFNORM_SPEED_FP32) {
  263. auto benchmarker = Benchmarker<Norm>(handle_cuda());
  264. benchmarker.set_dtype(0, dtype::Float32());
  265. benchmarker.set_dtype(1, dtype::Float32());
  266. Norm::Param param;
  267. param.mode = Norm::Param::Mode::INF_NORM;
  268. param.dim = 0;
  269. SmallVector<TensorShape> shapes{{4194304}, {}};
  270. NormalRNG rng(0, 1);
  271. float time_fp32 = benchmarker.set_param(param).set_rng(0, &rng).exec(shapes);
  272. printf("INF_SPEED_FP32 cuda time: float=%.6fms\n", time_fp32);
  273. }
  274. TEST_F(CUDA, NEG_INFNORM_SPEED_FP32) {
  275. auto benchmarker = Benchmarker<Norm>(handle_cuda());
  276. benchmarker.set_dtype(0, dtype::Float32());
  277. benchmarker.set_dtype(1, dtype::Float32());
  278. Norm::Param param;
  279. param.mode = Norm::Param::Mode::NEG_INF_NORM;
  280. param.dim = 0;
  281. SmallVector<TensorShape> shapes{{4194304}, {}};
  282. NormalRNG rng(0, 1);
  283. float time_fp32 = benchmarker.set_param(param).set_rng(0, &rng).exec(shapes);
  284. printf("NEG_INF_SPEED_FP32 cuda time: float=%.6fms\n", time_fp32);
  285. }
  286. } // namespace test
  287. } // namespace megdnn