You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

reduce.cpp 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. #include "test/fallback/fixture.h"
  2. #include "megdnn/oprs.h"
  3. #include "test/common/checker.h"
  4. #include "test/common/task_record_check.h"
  5. #include "test/common/tensor.h"
  6. #include "test/common/workspace_wrapper.h"
  7. using namespace megdnn;
  8. using namespace test;
  9. TEST_F(FALLBACK, REDUCE_FULL) {
  10. using Param = Reduce::Param;
  11. using Mode = Param::Mode;
  12. Checker<Reduce> checker(handle());
  13. UniformIntRNG rng{INT8_MIN >> 1, INT8_MAX >> 1};
  14. checker.set_rng(0, &rng);
  15. struct Config {
  16. Param param;
  17. DType dtype;
  18. TensorShape shape;
  19. Config(Param param, DType dtype, TensorShape shape)
  20. : param(param), dtype(dtype), shape(shape) {}
  21. };
  22. std::vector<Config> configs;
  23. for (auto mode : {Mode::MEAN, Mode::MAX, Mode::MIN})
  24. for (auto dtype : std::vector<DType>{
  25. dtype::Float32(), dtype::Float16(), dtype::QuantizedS8(1.3f),
  26. dtype::Quantized8Asymm(1.3f, static_cast<uint8_t>(3))})
  27. for (int32_t axis : {0, 1, 2}) {
  28. for (size_t A : {1, 3, 5}) {
  29. for (size_t B : {4, 6, 9, 16, 33, 45}) {
  30. for (size_t C : {4, 6, 9, 16, 33, 45}) {
  31. TensorShape shape{A, B, C};
  32. Param param(mode, axis);
  33. Config config(param, dtype, shape);
  34. configs.push_back(config);
  35. }
  36. }
  37. }
  38. }
  39. for (auto&& config : configs) {
  40. auto&& dtype = config.dtype;
  41. auto&& param = config.param;
  42. auto&& shape = config.shape;
  43. checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
  44. }
  45. configs.clear();
  46. for (auto mode : {Mode::SUM, Mode::PRODUCT, Mode::SUM_SQR})
  47. for (auto dtype : std::vector<DType>{dtype::Float32(), dtype::Float16()})
  48. for (int32_t axis : {0, 1, 2}) {
  49. for (size_t A : {1, 3, 5}) {
  50. for (size_t B : {4, 6, 9, 16, 33, 45}) {
  51. for (size_t C : {4, 6, 9, 16, 33, 45}) {
  52. TensorShape shape{A, B, C};
  53. Param param(mode, axis);
  54. Config config(param, dtype, shape);
  55. configs.push_back(config);
  56. }
  57. }
  58. }
  59. }
  60. UniformFloatRNG rng_float(-2, 2);
  61. checker.set_rng(0, &rng_float);
  62. checker.set_epsilon(1e-1);
  63. for (auto&& config : configs) {
  64. auto&& dtype = config.dtype;
  65. auto&& param = config.param;
  66. auto&& shape = config.shape;
  67. if (dtype == dtype::Float16())
  68. checker.set_epsilon(1e-1);
  69. else
  70. checker.set_epsilon(1e-3);
  71. checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
  72. }
  73. }
  74. TEST_F(FALLBACK, REDUCE) {
  75. using Param = Reduce::Param;
  76. using Mode = Param::Mode;
  77. using DataType = Param::DataType;
  78. Checker<Reduce> checker(handle());
  79. struct Config {
  80. Param param;
  81. DType dtype;
  82. TensorShape shape;
  83. Config(Param param, DType dtype, TensorShape shape)
  84. : param(param), dtype(dtype), shape(shape) {}
  85. };
  86. std::vector<Config> configs;
  87. // general
  88. for (auto mode :
  89. {Mode::SUM, Mode::MEAN, Mode::SUM_SQR, Mode::PRODUCT, Mode::MIN, Mode::MAX})
  90. for (auto dtype : std::vector<DType>{
  91. dtype::Float16(), dtype::Float32(), dtype::Int32(), dtype::Int16(),
  92. dtype::Int8(), dtype::Uint8()})
  93. for (int32_t axis : {0, 1, 2, 3}) {
  94. TensorShape shape{2, 3, 20, 5};
  95. Param param(mode, axis);
  96. Config config(param, dtype, shape);
  97. configs.push_back(config);
  98. if (dtype.category() == DTypeCategory::FLOAT) {
  99. Param param(mode, axis, DataType::FLOAT_O16xC32);
  100. Config config(param, dtype, shape);
  101. configs.push_back(config);
  102. param.data_type = DataType::FLOAT_O32xC32;
  103. config = Config(param, dtype, shape);
  104. configs.push_back(config);
  105. } else if (dtype == dtype::Int32()) {
  106. Param param(mode, axis, DataType::FLOAT_O32xC32);
  107. Config config(param, dtype, shape);
  108. configs.push_back(config);
  109. }
  110. }
  111. // large (ABC) -> (A1C) case
  112. for (auto mode : {Mode::SUM_SQR})
  113. for (auto dtype : std::vector<DType>{dtype::Int32()})
  114. for (int32_t axis : {0, 1, 2, 3}) {
  115. TensorShape shape{2, 3, 10000, 5};
  116. Param param(mode, axis);
  117. Config config(param, dtype, shape);
  118. configs.push_back(config);
  119. }
  120. // large (AB) -> (A1) case
  121. for (auto mode : {Mode::SUM_SQR})
  122. for (auto dtype : std::vector<DType>{dtype::Int32()})
  123. for (int32_t axis : {0, 1, 2, 3}) {
  124. TensorShape shape{2, 3, 5, 10000};
  125. Param param(mode, axis);
  126. Config config(param, dtype, shape);
  127. configs.push_back(config);
  128. }
  129. {
  130. // large reduce_mean for O16C32
  131. TensorShape shape{1, 65536, 5};
  132. Param param(Mode::MEAN, 1, DataType::FLOAT_O16xC32);
  133. Config config(param, dtype::Float16(), shape);
  134. configs.push_back(config);
  135. }
  136. for (auto&& config : configs) {
  137. auto&& dtype = config.dtype;
  138. auto&& param = config.param;
  139. auto&& mode = config.param.mode;
  140. auto&& shape = config.shape;
  141. auto&& data_type = config.param.data_type;
  142. // when input/output both float16, the internal compute is float16, mode
  143. // is SUM or SUM_SQR, need set epsilon to 1e-2 to pass test
  144. if (dtype == dtype::Float16() && data_type == DataType::DEFAULT &&
  145. (mode == Mode::SUM || mode == Mode::SUM_SQR)) {
  146. checker.set_epsilon(1e-2);
  147. }
  148. checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
  149. }
  150. {
  151. static size_t N = 1 << 26;
  152. {
  153. // cpu vs naive
  154. Checker<Reduce> checker(handle());
  155. Reduce::Param param;
  156. param.axis = 0;
  157. UniformFloatRNG rng(1, 1);
  158. checker.set_param(param);
  159. checker.set_rng(0, &rng);
  160. checker.execs({{N}, {}});
  161. }
  162. {
  163. // naive vs groundtruth
  164. TensorLayout layoutN(TensorShape{N}, dtype::Float32()),
  165. layout1(TensorShape{1}, dtype::Float32());
  166. auto handle = this->handle();
  167. Tensor<float> src(handle, layoutN), dst(handle, layout1);
  168. float* ptr = src.ptr();
  169. for (size_t i = 0; i < N; ++i)
  170. ptr[i] = 1;
  171. auto opr = handle->create_operator<Reduce>();
  172. opr->param().axis = 0;
  173. auto wsize = opr->get_workspace_in_bytes(layoutN, layout1);
  174. WorkspaceWrapper workspace(handle, wsize);
  175. opr->exec(src.tensornd(), dst.tensornd(), workspace.workspace());
  176. megdnn_sync(handle);
  177. ASSERT_EQ(N, dst.ptr()[0]);
  178. }
  179. }
  180. }
  181. TEST_F(FALLBACK, REDUCE_RECORD) {
  182. using Param = Reduce::Param;
  183. using Mode = Param::Mode;
  184. using DataType = Param::DataType;
  185. TaskRecordChecker<Reduce> checker(1);
  186. struct Config {
  187. Param param;
  188. DType dtype;
  189. TensorShape shape;
  190. Config(Param param, DType dtype, TensorShape shape)
  191. : param(param), dtype(dtype), shape(shape) {}
  192. };
  193. std::vector<Config> configs;
  194. // general
  195. for (auto mode :
  196. {Mode::SUM, Mode::MEAN, Mode::SUM_SQR, Mode::PRODUCT, Mode::MIN, Mode::MAX})
  197. for (auto dtype : std::vector<DType>{
  198. dtype::Float16(), dtype::Float32(), dtype::Int32(), dtype::Int16(),
  199. dtype::Int8(), dtype::Uint8()})
  200. for (int32_t axis : {0, 1, 2, 3}) {
  201. TensorShape shape{2, 3, 20, 5};
  202. Param param(mode, axis);
  203. Config config(param, dtype, shape);
  204. configs.push_back(config);
  205. if (dtype.category() == DTypeCategory::FLOAT) {
  206. Param param(mode, axis, DataType::FLOAT_O16xC32);
  207. Config config(param, dtype, shape);
  208. configs.push_back(config);
  209. param.data_type = DataType::FLOAT_O32xC32;
  210. config = Config(param, dtype, shape);
  211. configs.push_back(config);
  212. } else if (dtype == dtype::Int32()) {
  213. Param param(mode, axis, DataType::FLOAT_O32xC32);
  214. Config config(param, dtype, shape);
  215. configs.push_back(config);
  216. }
  217. }
  218. // large (ABC) -> (A1C) case
  219. for (auto mode : {Mode::SUM_SQR})
  220. for (auto dtype : std::vector<DType>{dtype::Int32()})
  221. for (int32_t axis : {0, 1, 2, 3}) {
  222. TensorShape shape{2, 3, 10000, 5};
  223. Param param(mode, axis);
  224. Config config(param, dtype, shape);
  225. configs.push_back(config);
  226. }
  227. // large (AB) -> (A1) case
  228. for (auto mode : {Mode::SUM_SQR})
  229. for (auto dtype : std::vector<DType>{dtype::Int32()})
  230. for (int32_t axis : {0, 1, 2, 3}) {
  231. TensorShape shape{2, 3, 5, 10000};
  232. Param param(mode, axis);
  233. Config config(param, dtype, shape);
  234. configs.push_back(config);
  235. }
  236. {
  237. // large reduce_mean for O16C32
  238. TensorShape shape{1, 65536, 5};
  239. Param param(Mode::MEAN, 1, DataType::FLOAT_O16xC32);
  240. Config config(param, dtype::Float16(), shape);
  241. configs.push_back(config);
  242. }
  243. for (auto&& config : configs) {
  244. auto&& dtype = config.dtype;
  245. auto&& param = config.param;
  246. auto&& mode = config.param.mode;
  247. auto&& shape = config.shape;
  248. auto&& data_type = config.param.data_type;
  249. // when input/output both float16, the internal compute is float16, mode
  250. // is SUM or SUM_SQR, need set epsilon to 1e-2 to pass test
  251. if (dtype == dtype::Float16() && data_type == DataType::DEFAULT &&
  252. (mode == Mode::SUM || mode == Mode::SUM_SQR)) {
  253. checker.set_epsilon(1e-2);
  254. }
  255. checker.set_dtype(0, dtype).set_param(param).execs({shape, {}});
  256. }
  257. {
  258. static size_t N = 1 << 26;
  259. {
  260. // cpu vs naive
  261. TaskRecordChecker<Reduce> checker(1);
  262. Reduce::Param param;
  263. param.axis = 0;
  264. UniformFloatRNG rng(1, 1);
  265. checker.set_param(param);
  266. checker.set_rng(0, &rng);
  267. checker.execs({{N}, {}});
  268. }
  269. {
  270. // naive vs groundtruth
  271. TensorLayout layoutN(TensorShape{N}, dtype::Float32()),
  272. layout1(TensorShape{1}, dtype::Float32());
  273. auto handle = this->handle();
  274. Tensor<float> src(handle, layoutN), dst(handle, layout1);
  275. float* ptr = src.ptr();
  276. for (size_t i = 0; i < N; ++i)
  277. ptr[i] = 1;
  278. auto opr = handle->create_operator<Reduce>();
  279. opr->param().axis = 0;
  280. auto wsize = opr->get_workspace_in_bytes(layoutN, layout1);
  281. WorkspaceWrapper workspace(handle, wsize);
  282. opr->exec(src.tensornd(), dst.tensornd(), workspace.workspace());
  283. megdnn_sync(handle);
  284. ASSERT_EQ(N, dst.ptr()[0]);
  285. }
  286. }
  287. }
  288. // vim: syntax=cpp.doxygen