You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

accuracy_shake.cpp 9.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. /**
  2. * \file dnn/test/cuda/accuracy_shake.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "megdnn/dtype.h"
  13. #include "megdnn/oprs.h"
  14. #include "megdnn/opr_param_defs.h"
  15. #include "test/cuda/fixture.h"
  16. #include "test/cuda/utils.h"
  17. #include "test/common/rng.h"
  18. #include "test/common/accuracy_shake_checker.h"
  19. namespace megdnn {
  20. namespace test {
  21. TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD) {
  22. require_compute_capability(6, 1);
  23. AccuracyShakeChecker<ConvBiasForward> checker(handle_cuda());
  24. NormalRNG default_rng;
  25. checker.set_dtype(0, dtype::Float32())
  26. .set_dtype(1, dtype::Float32())
  27. .set_dtype(2, dtype::Float32())
  28. .set_rng(0, &default_rng)
  29. .set_rng(1, &default_rng);
  30. // convolution
  31. checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {}, {}, {}});
  32. // convbias without z
  33. checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {1, 64, 1, 1}, {}, {}});
  34. // convbias with z
  35. checker.exec({{64, 16, 32, 32},
  36. {64, 16, 3, 3},
  37. {1, 64, 1, 1},
  38. {64, 64, 30, 30},
  39. {}});
  40. ConvBias::Param param;
  41. // FIXME currently group conv cannot get the attribute of it's subopr, so we
  42. // just ignore group conv here.
  43. }
  44. TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NCHW) {
  45. require_compute_capability(6, 1);
  46. AccuracyShakeChecker<ConvBiasForward> checker(handle_cuda());
  47. UniformIntRNG int_rng{-128, 127};
  48. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  49. .set_dtype(1, dtype::QuantizedS8(2.5f))
  50. .set_dtype(2, dtype::QuantizedS32(6.25f))
  51. .set_dtype(3, dtype::QuantizedS8(0.25f))
  52. .set_dtype(4, dtype::QuantizedS8(0.25f))
  53. .set_rng(0, &int_rng)
  54. .set_rng(1, &int_rng)
  55. .set_rng(2, &int_rng)
  56. .set_rng(3, &int_rng);
  57. // convolution
  58. checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {}, {}, {}});
  59. // convbias without z
  60. checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {1, 64, 1, 1}, {}, {}});
  61. // convbias with z
  62. checker.exec({{64, 16, 32, 32},
  63. {64, 16, 3, 3},
  64. {1, 64, 1, 1},
  65. {64, 64, 30, 30},
  66. {}});
  67. // group
  68. ConvBias::Param param;
  69. param.sparse = ConvBias::Param::Sparse::GROUP;
  70. checker.set_param(param);
  71. checker.exec({{64, 16, 32, 32}, {2, 32, 8, 3, 3}, {}, {}, {}});
  72. checker.exec({{64, 16, 32, 32}, {2, 32, 8, 3, 3}, {1, 64, 1, 1}, {}, {}});
  73. checker.exec({{64, 16, 32, 32},
  74. {2, 32, 8, 3, 3},
  75. {1, 64, 1, 1},
  76. {64, 64, 30, 30},
  77. {}});
  78. }
  79. TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NHWC) {
  80. require_compute_capability(6, 1);
  81. UniformIntRNG int_rng{-50, 50};
  82. AccuracyShakeChecker<ConvBiasForward> checker(handle_cuda());
  83. ConvBias::Param param;
  84. param.format = ConvBias::Param::Format::NHWC;
  85. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  86. .set_dtype(1, dtype::QuantizedS8(2.5f))
  87. .set_dtype(2, dtype::QuantizedS32(6.25f))
  88. .set_dtype(4, dtype::QuantizedS8(60.25f))
  89. .set_rng(0, &int_rng)
  90. .set_rng(1, &int_rng)
  91. .set_rng(2, &int_rng)
  92. .set_param(param);
  93. checker.exec({{20, 32, 32, 4}, {24, 1, 1, 4}, {1, 1, 1, 24}, {}, {}});
  94. param.sparse = ConvBias::Param::Sparse::GROUP;
  95. checker.set_param(param).exec(
  96. {{20, 32, 32, 16}, {4, 4, 1, 1, 4}, {1, 1, 1, 16}, {}, {}});
  97. }
  98. TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NCHWX) {
  99. using Format = ConvBias::Param::Format;
  100. require_compute_capability(6, 1);
  101. AccuracyShakeChecker<ConvBiasForward> checker(handle_cuda());
  102. UniformIntRNG int_rng{-5, 5};
  103. UniformFloatRNG float_rng{-50, 50};
  104. checker.set_dtype(0, dtype::QuantizedS8(1.2f))
  105. .set_dtype(1, dtype::QuantizedS8(1.3f))
  106. .set_dtype(2, dtype::QuantizedS32(1.2 * 1.3f))
  107. .set_dtype(3, dtype::QuantizedS8(1.3f))
  108. .set_dtype(4, dtype::QuantizedS8(1.3f))
  109. .set_rng(0, &int_rng)
  110. .set_rng(1, &int_rng)
  111. .set_rng(2, &int_rng)
  112. .set_rng(3, &int_rng);
  113. auto run = [&](const TensorShapeArray& shapes, const Format& format) {
  114. ConvBias::Param param;
  115. param.format = format;
  116. checker.set_param(param).exec(
  117. {shapes[0], shapes[1], shapes[2], {}, {}});
  118. };
  119. run({{20, 2, 24, 24, 4}, {24, 2, 3, 3, 4}, {1, 6, 1, 1, 4}}, Format::NCHW4);
  120. run({{20, 1, 24, 24, 32}, {64, 1, 3, 3, 32}, {1, 2, 1, 1, 32}},
  121. Format::NCHW32);
  122. run({{16, 4, 23, 40, 4},
  123. {32, 4, 3, 3, 4},
  124. {1, 1, 1, 1, 32}}, Format::NCHW4_NCHW32);
  125. checker.set_dtype(0, dtype::QuantizedS8(1.9980618f))
  126. .set_dtype(1, dtype::QuantizedS8(1.9980927f))
  127. .set_dtype(2, dtype::Float32())
  128. .set_dtype(3, dtype::Float32())
  129. .set_dtype(4, dtype::Float32())
  130. .set_rng(0, &int_rng)
  131. .set_rng(1, &int_rng)
  132. .set_rng(2, &float_rng)
  133. .set_rng(3, &float_rng);
  134. run({{16, 4, 92, 160, 4}, {20, 4, 3, 3, 4}, {1, 20, 1, 1}},
  135. Format::NCHW4_NCHW);
  136. }
  137. TEST_F(CUDA, SHAKE_MATRIX_MUL_FORWARD) {
  138. AccuracyShakeChecker<MatrixMul> checker(handle_cuda());
  139. checker.set_dtype(0, dtype::Float32())
  140. .set_dtype(1, dtype::Float32())
  141. .set_dtype(2, dtype::Float32())
  142. .exec({{50, 100}, {100, 60}, {}});
  143. }
  144. TEST_F(CUDA, SHAKE_BATCH_CONV_BIAS_QS8) {
  145. require_compute_capability(6, 1);
  146. AccuracyShakeChecker<BatchConvBiasForward> checker(handle_cuda());
  147. UniformIntRNG const_rng{1, 1};
  148. UniformIntRNG rng{-5, 5};
  149. UniformIntRNG bias_rng{-50, 50};
  150. checker.set_rng(0, &rng)
  151. .set_rng(1, &rng)
  152. .set_rng(2, &rng)
  153. .set_rng(3, &rng)
  154. .set_dtype(0, dtype::QuantizedS8{1.2f})
  155. .set_dtype(1, dtype::QuantizedS8{1.3f})
  156. .set_dtype(2, dtype::QuantizedS32{1.2f * 1.3f})
  157. .set_dtype(3, dtype::QuantizedS8{1.1f})
  158. .set_dtype(4, dtype::QuantizedS8{1.1f});
  159. param::BatchConvBias param;
  160. param.pad_h = 2, param.pad_w = 1;
  161. param.stride_h = 1, param.stride_w = 2;
  162. param.format = param::BatchConvBias::Format::NCHW4;
  163. checker.set_param(param).exec({{32, 4, 24, 24, 4},
  164. {32, 32, 4, 1, 1, 4},
  165. {1, 8, 1, 1, 4},
  166. {},
  167. {}});
  168. }
  169. TEST_F(CUDA, SHAKE_BATCHED_MATRIX_MUL) {
  170. AccuracyShakeChecker<BatchedMatrixMul> checker(handle_cuda());
  171. UniformIntRNG int_rng{-127, 127};
  172. NormalRNG default_rng;
  173. checker.set_dtype(0, dtype::QuantizedS8(1.2f))
  174. .set_dtype(1, dtype::QuantizedS8(1.3f))
  175. .set_dtype(2, {})
  176. .set_rng(0, &int_rng)
  177. .set_rng(1, &int_rng);
  178. checker.exec({{20, 424, 368}, {20, 368, 256}, {20, 424, 256}});
  179. checker.set_dtype(0, dtype::Float32())
  180. .set_dtype(1, dtype::Float32())
  181. .set_dtype(2, dtype::Float32())
  182. .set_rng(0, &default_rng)
  183. .set_rng(1, &default_rng);
  184. checker.exec({{20, 424, 368}, {20, 368, 256}, {20, 424, 256}});
  185. }
  186. TEST_F(CUDA, SHAKE_CONVOLUTION3D_FORWARD) {
  187. AccuracyShakeChecker<Convolution3DForward> checker(handle_cuda());
  188. NormalRNG default_rng;
  189. float scale = 1.0f / sqrt(5);
  190. UniformFloatRNG rng(scale, 2 * scale);
  191. param::Convolution3D param;
  192. param.mode = param::Convolution3D::Mode::CROSS_CORRELATION;
  193. param.stride_d = param.stride_h = param.stride_w = 2;
  194. param.pad_d = param.pad_h = param.pad_w = 0;
  195. param.dilate_d = param.dilate_h = param.dilate_w = 1;
  196. checker.set_dtype(0, dtype::Float32())
  197. .set_dtype(1, dtype::Float32())
  198. .set_rng(0, &default_rng)
  199. .set_rng(1, &default_rng)
  200. .set_param(param)
  201. .exec({{20, 5, 12, 12, 16}, {5, 5, 3, 3, 3}, {}});
  202. }
  203. TEST_F(CUDA, SHAKE_LOCAL_SHARE) {
  204. AccuracyShakeChecker<LocalShare> checker(handle_cuda());
  205. using Param = LocalShare::Param;
  206. Param param;
  207. param.spatial_groups_h = param.spatial_groups_w = 3;
  208. checker.set_param(param);
  209. checker.exec({{20, 16, 32, 32}, {3, 3, 16, 3, 3, 64}, {}});
  210. }
  211. TEST_F(CUDA, SHAKE_CONVOLUTION_BACKWARD_DATA) {
  212. AccuracyShakeChecker<ConvolutionBackwardData> checker(handle_cuda());
  213. NormalRNG default_rng;
  214. checker.set_dtype(0, dtype::Float32())
  215. .set_dtype(1, dtype::Float32())
  216. .set_rng(0, &default_rng)
  217. .set_rng(1, &default_rng);
  218. checker.exec({{8, 16, 3, 3}, {64, 8, 5, 5}, {64, 16, 7, 7}});
  219. // FIXME currently group conv cannot get the attribute of it's subopr, so we
  220. // just ignore group conv here.
  221. }
  222. TEST_F(CUDA, SHAKE_CONVOLUTION_BACKWARD_FILTER) {
  223. AccuracyShakeChecker<ConvolutionBackwardFilter> checker(handle_cuda());
  224. NormalRNG default_rng;
  225. checker.set_dtype(0, dtype::Float32())
  226. .set_dtype(1, dtype::Float32())
  227. .set_rng(0, &default_rng)
  228. .set_rng(1, &default_rng);
  229. checker.exec({{2, 64, 7, 7}, {2, 32, 5, 5}, {32, 64, 3, 3}});
  230. // FIXME currently group conv cannot get the attribute of it's subopr, so we
  231. // just ignore group conv here.
  232. }
  233. } // namespace test
  234. } // namespace megdnn
  235. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台