You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

accuracy_shake.cpp 9.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. #include "megdnn/dtype.h"
  2. #include "megdnn/opr_param_defs.h"
  3. #include "megdnn/oprs.h"
  4. #include "test/common/accuracy_shake_checker.h"
  5. #include "test/common/rng.h"
  6. #include "test/cuda/fixture.h"
  7. #include "test/cuda/utils.h"
  8. namespace megdnn {
  9. namespace test {
  10. TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD) {
  11. require_compute_capability(6, 1);
  12. AccuracyShakeChecker<ConvBiasForward> checker(handle_cuda());
  13. NormalRNG default_rng;
  14. checker.set_dtype(0, dtype::Float32())
  15. .set_dtype(1, dtype::Float32())
  16. .set_dtype(2, dtype::Float32())
  17. .set_rng(0, &default_rng)
  18. .set_rng(1, &default_rng);
  19. // convolution
  20. checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {}, {}, {}});
  21. // convbias without z
  22. checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {1, 64, 1, 1}, {}, {}});
  23. // convbias with z
  24. checker.exec(
  25. {{64, 16, 32, 32}, {64, 16, 3, 3}, {1, 64, 1, 1}, {64, 64, 30, 30}, {}});
  26. ConvBias::Param param;
  27. // FIXME currently group conv cannot get the attribute of it's subopr, so we
  28. // just ignore group conv here.
  29. }
  30. TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NCHW) {
  31. require_compute_capability(6, 1);
  32. AccuracyShakeChecker<ConvBiasForward> checker(handle_cuda());
  33. UniformIntRNG int_rng{-128, 127};
  34. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  35. .set_dtype(1, dtype::QuantizedS8(2.5f))
  36. .set_dtype(2, dtype::QuantizedS32(6.25f))
  37. .set_dtype(3, dtype::QuantizedS8(0.25f))
  38. .set_dtype(4, dtype::QuantizedS8(0.25f))
  39. .set_rng(0, &int_rng)
  40. .set_rng(1, &int_rng)
  41. .set_rng(2, &int_rng)
  42. .set_rng(3, &int_rng);
  43. // convolution
  44. checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {}, {}, {}});
  45. // convbias without z
  46. checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {1, 64, 1, 1}, {}, {}});
  47. // convbias with z
  48. checker.exec(
  49. {{64, 16, 32, 32}, {64, 16, 3, 3}, {1, 64, 1, 1}, {64, 64, 30, 30}, {}});
  50. // group
  51. ConvBias::Param param;
  52. param.sparse = ConvBias::Param::Sparse::GROUP;
  53. checker.set_param(param);
  54. checker.exec({{64, 16, 32, 32}, {2, 32, 8, 3, 3}, {}, {}, {}});
  55. checker.exec({{64, 16, 32, 32}, {2, 32, 8, 3, 3}, {1, 64, 1, 1}, {}, {}});
  56. checker.exec(
  57. {{64, 16, 32, 32}, {2, 32, 8, 3, 3}, {1, 64, 1, 1}, {64, 64, 30, 30}, {}});
  58. }
  59. TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NHWC) {
  60. require_compute_capability(6, 1);
  61. UniformIntRNG int_rng{-50, 50};
  62. AccuracyShakeChecker<ConvBiasForward> checker(handle_cuda());
  63. ConvBias::Param param;
  64. param.format = ConvBias::Param::Format::NHWC;
  65. checker.set_dtype(0, dtype::QuantizedS8(2.5f))
  66. .set_dtype(1, dtype::QuantizedS8(2.5f))
  67. .set_dtype(2, dtype::QuantizedS32(6.25f))
  68. .set_dtype(4, dtype::QuantizedS8(60.25f))
  69. .set_rng(0, &int_rng)
  70. .set_rng(1, &int_rng)
  71. .set_rng(2, &int_rng)
  72. .set_param(param);
  73. checker.exec({{20, 32, 32, 4}, {24, 1, 1, 4}, {1, 1, 1, 24}, {}, {}});
  74. param.sparse = ConvBias::Param::Sparse::GROUP;
  75. checker.set_param(param).exec(
  76. {{20, 32, 32, 16}, {4, 4, 1, 1, 4}, {1, 1, 1, 16}, {}, {}});
  77. }
  78. TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NCHWX) {
  79. require_compute_capability(7, 5);
  80. using Format = ConvBias::Param::Format;
  81. AccuracyShakeChecker<ConvBiasForward> checker(handle_cuda());
  82. UniformIntRNG int_rng{-5, 5};
  83. UniformFloatRNG float_rng{-50, 50};
  84. checker.set_dtype(0, dtype::QuantizedS8(1.2f))
  85. .set_dtype(1, dtype::QuantizedS8(1.3f))
  86. .set_dtype(2, dtype::QuantizedS32(1.2 * 1.3f))
  87. .set_dtype(3, dtype::QuantizedS8(1.3f))
  88. .set_dtype(4, dtype::QuantizedS8(1.3f))
  89. .set_rng(0, &int_rng)
  90. .set_rng(1, &int_rng)
  91. .set_rng(2, &int_rng)
  92. .set_rng(3, &int_rng);
  93. auto run = [&](const TensorShapeArray& shapes, const Format& format) {
  94. ConvBias::Param param;
  95. param.format = format;
  96. checker.set_param(param).exec({shapes[0], shapes[1], shapes[2], {}, {}});
  97. };
  98. run({{20, 2, 24, 24, 4}, {24, 2, 3, 3, 4}, {1, 6, 1, 1, 4}}, Format::NCHW4);
  99. run({{20, 1, 24, 24, 32}, {64, 1, 3, 3, 32}, {1, 2, 1, 1, 32}}, Format::NCHW32);
  100. run({{16, 4, 23, 40, 4}, {32, 4, 3, 3, 4}, {1, 1, 1, 1, 32}}, Format::NCHW4_NCHW32);
  101. checker.set_dtype(0, dtype::QuantizedS8(1.9980618f))
  102. .set_dtype(1, dtype::QuantizedS8(1.9980927f))
  103. .set_dtype(2, dtype::Float32())
  104. .set_dtype(3, dtype::Float32())
  105. .set_dtype(4, dtype::Float32())
  106. .set_rng(0, &int_rng)
  107. .set_rng(1, &int_rng)
  108. .set_rng(2, &float_rng)
  109. .set_rng(3, &float_rng);
  110. run({{16, 4, 92, 160, 4}, {20, 4, 3, 3, 4}, {1, 20, 1, 1}}, Format::NCHW4_NCHW);
  111. }
  112. TEST_F(CUDA, SHAKE_MATRIX_MUL_FORWARD) {
  113. require_compute_capability(6, 1);
  114. AccuracyShakeChecker<MatrixMul> checker(handle_cuda());
  115. checker.set_dtype(0, dtype::Float32())
  116. .set_dtype(1, dtype::Float32())
  117. .set_dtype(2, dtype::Float32())
  118. .exec({{50, 100}, {100, 60}, {}});
  119. }
  120. TEST_F(CUDA, SHAKE_BATCH_CONV_BIAS_QS8) {
  121. require_compute_capability(6, 1);
  122. AccuracyShakeChecker<BatchConvBiasForward> checker(handle_cuda());
  123. UniformIntRNG const_rng{1, 1};
  124. UniformIntRNG rng{-5, 5};
  125. UniformIntRNG bias_rng{-50, 50};
  126. checker.set_rng(0, &rng)
  127. .set_rng(1, &rng)
  128. .set_rng(2, &rng)
  129. .set_rng(3, &rng)
  130. .set_dtype(0, dtype::QuantizedS8{1.2f})
  131. .set_dtype(1, dtype::QuantizedS8{1.3f})
  132. .set_dtype(2, dtype::QuantizedS32{1.2f * 1.3f})
  133. .set_dtype(3, dtype::QuantizedS8{1.1f})
  134. .set_dtype(4, dtype::QuantizedS8{1.1f});
  135. param::BatchConvBias param;
  136. param.pad_h = 2, param.pad_w = 1;
  137. param.stride_h = 1, param.stride_w = 2;
  138. param.format = param::BatchConvBias::Format::NCHW4;
  139. checker.set_param(param).exec(
  140. {{32, 4, 24, 24, 4}, {32, 32, 4, 1, 1, 4}, {1, 8, 1, 1, 4}, {}, {}});
  141. }
  142. TEST_F(CUDA, SHAKE_BATCHED_MATRIX_MUL) {
  143. require_compute_capability(6, 1);
  144. AccuracyShakeChecker<BatchedMatrixMul> checker(handle_cuda());
  145. UniformIntRNG int_rng{-127, 127};
  146. NormalRNG default_rng;
  147. checker.set_dtype(0, dtype::QuantizedS8(1.2f))
  148. .set_dtype(1, dtype::QuantizedS8(1.3f))
  149. .set_dtype(2, {})
  150. .set_rng(0, &int_rng)
  151. .set_rng(1, &int_rng);
  152. checker.exec({{20, 424, 368}, {20, 368, 256}, {20, 424, 256}});
  153. checker.set_dtype(0, dtype::Float32())
  154. .set_dtype(1, dtype::Float32())
  155. .set_dtype(2, dtype::Float32())
  156. .set_rng(0, &default_rng)
  157. .set_rng(1, &default_rng);
  158. checker.exec({{20, 424, 368}, {20, 368, 256}, {20, 424, 256}});
  159. }
  160. TEST_F(CUDA, SHAKE_CONVOLUTION3D_FORWARD) {
  161. require_compute_capability(6, 1);
  162. AccuracyShakeChecker<Convolution3DForward> checker(handle_cuda());
  163. NormalRNG default_rng;
  164. float scale = 1.0f / sqrt(5);
  165. UniformFloatRNG rng(scale, 2 * scale);
  166. param::Convolution3D param;
  167. param.mode = param::Convolution3D::Mode::CROSS_CORRELATION;
  168. param.stride_d = param.stride_h = param.stride_w = 2;
  169. param.pad_d = param.pad_h = param.pad_w = 0;
  170. param.dilate_d = param.dilate_h = param.dilate_w = 1;
  171. checker.set_dtype(0, dtype::Float32())
  172. .set_dtype(1, dtype::Float32())
  173. .set_rng(0, &default_rng)
  174. .set_rng(1, &default_rng)
  175. .set_param(param)
  176. .exec({{20, 5, 12, 12, 16}, {5, 5, 3, 3, 3}, {}});
  177. }
  178. TEST_F(CUDA, SHAKE_LOCAL_SHARE) {
  179. require_compute_capability(6, 1);
  180. AccuracyShakeChecker<LocalShare> checker(handle_cuda());
  181. using Param = LocalShare::Param;
  182. Param param;
  183. param.spatial_groups_h = param.spatial_groups_w = 3;
  184. checker.set_param(param);
  185. checker.exec({{20, 16, 32, 32}, {3, 3, 16, 3, 3, 64}, {}});
  186. }
  187. TEST_F(CUDA, SHAKE_CONVOLUTION_BACKWARD_DATA) {
  188. require_compute_capability(6, 1);
  189. AccuracyShakeChecker<ConvolutionBackwardData> checker(handle_cuda());
  190. NormalRNG default_rng;
  191. checker.set_dtype(0, dtype::Float32())
  192. .set_dtype(1, dtype::Float32())
  193. .set_rng(0, &default_rng)
  194. .set_rng(1, &default_rng);
  195. checker.exec({{8, 16, 3, 3}, {64, 8, 5, 5}, {64, 16, 7, 7}});
  196. // FIXME currently group conv cannot get the attribute of it's subopr, so we
  197. // just ignore group conv here.
  198. }
  199. TEST_F(CUDA, SHAKE_CONVOLUTION_BACKWARD_FILTER) {
  200. require_compute_capability(6, 1);
  201. AccuracyShakeChecker<ConvolutionBackwardFilter> checker(handle_cuda());
  202. NormalRNG default_rng;
  203. checker.set_dtype(0, dtype::Float32())
  204. .set_dtype(1, dtype::Float32())
  205. .set_rng(0, &default_rng)
  206. .set_rng(1, &default_rng);
  207. checker.exec({{2, 64, 7, 7}, {2, 32, 5, 5}, {32, 64, 3, 3}});
  208. // FIXME currently group conv cannot get the attribute of it's subopr, so we
  209. // just ignore group conv here.
  210. }
  211. } // namespace test
  212. } // namespace megdnn
  213. // vim: syntax=cpp.doxygen