You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

accuracy_shake.cpp 9.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. /**
  2. * \file dnn/test/cuda/accuracy_shake.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "megdnn/dtype.h"
  13. #include "megdnn/oprs.h"
  14. #include "megdnn/opr_param_defs.h"
  15. #include "test/cuda/fixture.h"
  16. #include "test/cuda/utils.h"
  17. #include "test/common/rng.h"
  18. #include "test/common/accuracy_shake_checker.h"
  19. namespace megdnn {
  20. namespace test {
  21. TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD) {
  22. require_compute_capability(6, 1);
  23. AccuracyShakeChecker<ConvBiasForward> checker(handle_cuda());
  24. NormalRNG default_rng;
  25. checker.set_dtype(0, dtype::Float32())
  26. .set_dtype(1, dtype::Float32())
  27. .set_dtype(2, dtype::Float32())
  28. .set_rng(0, &default_rng)
  29. .set_rng(1, &default_rng);
  30. // convolution
  31. checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {}, {}, {}});
  32. // convbias without z
  33. checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {1, 64, 1, 1}, {}, {}});
  34. // convbias with z
  35. checker.exec({{64, 16, 32, 32},
  36. {64, 16, 3, 3},
  37. {1, 64, 1, 1},
  38. {64, 64, 30, 30},
  39. {}});
  40. ConvBias::Param param;
  41. // group
  42. param.sparse = ConvBias::Param::Sparse::GROUP;
  43. checker.set_param(param);
  44. checker.exec({{64, 16, 32, 32}, {2, 32, 8, 3, 3}, {}, {}, {}});
  45. checker.exec({{64, 16, 32, 32}, {2, 32, 8, 3, 3}, {1, 64, 1, 1}, {}, {}});
  46. checker.exec({{64, 16, 32, 32},
  47. {2, 32, 8, 3, 3},
  48. {1, 64, 1, 1},
  49. {64, 64, 30, 30},
  50. {}});
  51. }
  52. TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NCHW) {
  53. require_compute_capability(6, 1);
  54. AccuracyShakeChecker<ConvBiasForward> checker(handle_cuda());
  55. UniformIntRNG int_rng{-128, 127};
  56. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  57. .set_dtype(1, dtype::QuantizedS8(2.5f))
  58. .set_dtype(2, dtype::QuantizedS32(6.25f))
  59. .set_dtype(3, dtype::QuantizedS8(0.25f))
  60. .set_dtype(4, dtype::QuantizedS8(0.25f))
  61. .set_rng(0, &int_rng)
  62. .set_rng(1, &int_rng)
  63. .set_rng(2, &int_rng)
  64. .set_rng(3, &int_rng);
  65. // convolution
  66. checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {}, {}, {}});
  67. // convbias without z
  68. checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {1, 64, 1, 1}, {}, {}});
  69. // convbias with z
  70. checker.exec({{64, 16, 32, 32},
  71. {64, 16, 3, 3},
  72. {1, 64, 1, 1},
  73. {64, 64, 30, 30},
  74. {}});
  75. // group
  76. ConvBias::Param param;
  77. param.sparse = ConvBias::Param::Sparse::GROUP;
  78. checker.set_param(param);
  79. checker.exec({{64, 16, 32, 32}, {2, 32, 8, 3, 3}, {}, {}, {}});
  80. checker.exec({{64, 16, 32, 32}, {2, 32, 8, 3, 3}, {1, 64, 1, 1}, {}, {}});
  81. checker.exec({{64, 16, 32, 32},
  82. {2, 32, 8, 3, 3},
  83. {1, 64, 1, 1},
  84. {64, 64, 30, 30},
  85. {}});
  86. }
  87. TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NHWC) {
  88. require_compute_capability(6, 1);
  89. UniformIntRNG int_rng{-50, 50};
  90. AccuracyShakeChecker<ConvBiasForward> checker(handle_cuda());
  91. ConvBias::Param param;
  92. param.format = ConvBias::Param::Format::NHWC;
  93. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  94. .set_dtype(1, dtype::QuantizedS8(2.5f))
  95. .set_dtype(2, dtype::QuantizedS32(6.25f))
  96. .set_dtype(4, dtype::QuantizedS8(60.25f))
  97. .set_rng(0, &int_rng)
  98. .set_rng(1, &int_rng)
  99. .set_rng(2, &int_rng)
  100. .set_param(param);
  101. checker.exec({{20, 32, 32, 4}, {24, 1, 1, 4}, {1, 1, 1, 24}, {}, {}});
  102. param.sparse = ConvBias::Param::Sparse::GROUP;
  103. checker.set_param(param).exec(
  104. {{20, 32, 32, 16}, {4, 4, 1, 1, 4}, {1, 1, 1, 16}, {}, {}});
  105. }
  106. TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NCHWX) {
  107. using Format = ConvBias::Param::Format;
  108. require_compute_capability(6, 1);
  109. AccuracyShakeChecker<ConvBiasForward> checker(handle_cuda());
  110. UniformIntRNG int_rng{-5, 5};
  111. UniformFloatRNG float_rng{-50, 50};
  112. checker.set_dtype(0, dtype::QuantizedS8(1.2f))
  113. .set_dtype(1, dtype::QuantizedS8(1.3f))
  114. .set_dtype(2, dtype::QuantizedS32(1.2 * 1.3f))
  115. .set_dtype(3, dtype::QuantizedS8(1.3f))
  116. .set_dtype(4, dtype::QuantizedS8(1.3f))
  117. .set_rng(0, &int_rng)
  118. .set_rng(1, &int_rng)
  119. .set_rng(2, &int_rng)
  120. .set_rng(3, &int_rng);
  121. auto run = [&](const TensorShapeArray& shapes, const Format& format) {
  122. ConvBias::Param param;
  123. param.format = format;
  124. checker.set_param(param).exec(
  125. {shapes[0], shapes[1], shapes[2], {}, {}});
  126. };
  127. run({{20, 2, 24, 24, 4}, {24, 2, 3, 3, 4}, {1, 6, 1, 1, 4}}, Format::NCHW4);
  128. run({{20, 1, 24, 24, 32}, {64, 1, 3, 3, 32}, {1, 2, 1, 1, 32}},
  129. Format::NCHW32);
  130. run({{16, 4, 23, 40, 4},
  131. {32, 4, 3, 3, 4},
  132. {1, 1, 1, 1, 32}}, Format::NCHW4_NCHW32);
  133. checker.set_dtype(0, dtype::QuantizedS8(1.9980618f))
  134. .set_dtype(1, dtype::QuantizedS8(1.9980927f))
  135. .set_dtype(2, dtype::Float32())
  136. .set_dtype(3, dtype::Float32())
  137. .set_dtype(4, dtype::Float32())
  138. .set_rng(0, &int_rng)
  139. .set_rng(1, &int_rng)
  140. .set_rng(2, &float_rng)
  141. .set_rng(3, &float_rng);
  142. run({{16, 4, 92, 160, 4}, {20, 4, 3, 3, 4}, {1, 20, 1, 1}},
  143. Format::NCHW4_NCHW);
  144. }
  145. TEST_F(CUDA, SHAKE_MATRIX_MUL_FORWARD) {
  146. AccuracyShakeChecker<MatrixMul> checker(handle_cuda());
  147. checker.set_dtype(0, dtype::Float32())
  148. .set_dtype(1, dtype::Float32())
  149. .set_dtype(2, dtype::Float32())
  150. .exec({{50, 100}, {100, 60}, {}});
  151. }
  152. TEST_F(CUDA, SHAKE_BATCH_CONV_BIAS_QS8) {
  153. require_compute_capability(6, 1);
  154. AccuracyShakeChecker<BatchConvBiasForward> checker(handle_cuda());
  155. UniformIntRNG const_rng{1, 1};
  156. UniformIntRNG rng{-5, 5};
  157. UniformIntRNG bias_rng{-50, 50};
  158. checker.set_rng(0, &rng)
  159. .set_rng(1, &rng)
  160. .set_rng(2, &rng)
  161. .set_rng(3, &rng)
  162. .set_dtype(0, dtype::QuantizedS8{1.2f})
  163. .set_dtype(1, dtype::QuantizedS8{1.3f})
  164. .set_dtype(2, dtype::QuantizedS32{1.2f * 1.3f})
  165. .set_dtype(3, dtype::QuantizedS8{1.1f})
  166. .set_dtype(4, dtype::QuantizedS8{1.1f});
  167. param::BatchConvBias param;
  168. param.pad_h = 2, param.pad_w = 1;
  169. param.stride_h = 1, param.stride_w = 2;
  170. param.format = param::BatchConvBias::Format::NCHW4;
  171. checker.set_param(param).exec({{32, 4, 24, 24, 4},
  172. {32, 32, 4, 1, 1, 4},
  173. {1, 8, 1, 1, 4},
  174. {},
  175. {}});
  176. }
  177. TEST_F(CUDA, SHAKE_BATCHED_MATRIX_MUL) {
  178. AccuracyShakeChecker<BatchedMatrixMul> checker(handle_cuda());
  179. UniformIntRNG int_rng{-127, 127};
  180. NormalRNG default_rng;
  181. checker.set_dtype(0, dtype::QuantizedS8(1.2f))
  182. .set_dtype(1, dtype::QuantizedS8(1.3f))
  183. .set_dtype(2, {})
  184. .set_rng(0, &int_rng)
  185. .set_rng(1, &int_rng);
  186. checker.exec({{20, 424, 368}, {20, 368, 256}, {20, 424, 256}});
  187. checker.set_dtype(0, dtype::Float32())
  188. .set_dtype(1, dtype::Float32())
  189. .set_dtype(2, dtype::Float32())
  190. .set_rng(0, &default_rng)
  191. .set_rng(1, &default_rng);
  192. checker.exec({{20, 424, 368}, {20, 368, 256}, {20, 424, 256}});
  193. }
  194. TEST_F(CUDA, SHAKE_CONVOLUTION3D_FORWARD) {
  195. AccuracyShakeChecker<Convolution3DForward> checker(handle_cuda());
  196. NormalRNG default_rng;
  197. float scale = 1.0f / sqrt(5);
  198. UniformFloatRNG rng(scale, 2 * scale);
  199. param::Convolution3D param;
  200. param.mode = param::Convolution3D::Mode::CROSS_CORRELATION;
  201. param.stride_d = param.stride_h = param.stride_w = 2;
  202. param.pad_d = param.pad_h = param.pad_w = 0;
  203. param.dilate_d = param.dilate_h = param.dilate_w = 1;
  204. checker.set_dtype(0, dtype::Float32())
  205. .set_dtype(1, dtype::Float32())
  206. .set_rng(0, &default_rng)
  207. .set_rng(1, &default_rng)
  208. .set_param(param)
  209. .exec({{20, 5, 12, 12, 16}, {5, 5, 3, 3, 3}, {}});
  210. }
  211. TEST_F(CUDA, SHAKE_LOCAL_SHARE) {
  212. AccuracyShakeChecker<LocalShare> checker(handle_cuda());
  213. using Param = LocalShare::Param;
  214. Param param;
  215. param.spatial_groups_h = param.spatial_groups_w = 3;
  216. checker.set_param(param);
  217. checker.exec({{20, 16, 32, 32}, {3, 3, 16, 3, 3, 64}, {}});
  218. }
  219. } // namespace test
  220. } // namespace megdnn
  221. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台