You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

checker.cpp 19 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. /**
  2. * \file dnn/test/common/checker.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./checker.h"
  12. #include "megdnn/tensor_iter.h"
  13. #include "megdnn/tensor_format.h"
  14. #include "test/common/tensor.h"
  15. #include "test/common/timer.h"
  16. using namespace megdnn;
  17. using namespace test;
  18. namespace {
  19. bool good_float(float val) {
  20. return std::isfinite(val);
  21. }
  22. bool good_float(int) {
  23. return true;
  24. }
  25. bool good_float(dt_qint8) {
  26. return true;
  27. }
  28. bool good_float(dt_qint16) {
  29. return true;
  30. }
  31. bool good_float(dt_quint8) {
  32. return true;
  33. }
  34. bool good_float(dt_qint32) {
  35. return true;
  36. }
  37. // A hack for the (x+0) promote to int trick on dt_quint8.
  38. int operator +(dt_quint8 lhs, int rhs) {
  39. megdnn_assert(rhs == 0, "unexpected rhs");
  40. return lhs.as_uint8();
  41. }
  42. int operator +(dt_qint32 lhs, int rhs) {
  43. megdnn_assert(rhs == 0, "unexpected rhs");
  44. return lhs.as_int32();
  45. }
  46. int operator +(dt_qint8 lhs, int rhs) {
  47. megdnn_assert(rhs == 0, "unexpected rhs");
  48. return int8_t(lhs);
  49. }
  50. int operator +(dt_qint16 lhs, int rhs) {
  51. megdnn_assert(rhs == 0, "unexpected rhs");
  52. return lhs.as_int16();
  53. }
  54. template<typename ctype, class Iter>
  55. ::testing::AssertionResult assert_tensor_eq_with_iter(
  56. const char *expr0, const char *expr1,
  57. Iter it0, Iter it1, const TensorLayout &layout,
  58. float maxerr, float maxerr_avg, float maxerr_avg_biased) {
  59. auto nr_elem = layout.total_nr_elems();
  60. double error_sum = 0;
  61. double error_sum_biased = 0;
  62. for (size_t i = 0; i < nr_elem; ++ i) {
  63. ctype iv0 = *it0, iv1 = *it1;
  64. float err = diff(iv0, iv1);
  65. error_sum += std::abs(err);
  66. error_sum_biased += err;
  67. if (!good_float(iv0) || !good_float(iv1) ||
  68. std::abs(err) > maxerr) {
  69. Index index(layout, i);
  70. return ::testing::AssertionFailure()
  71. << "Unequal value\n"
  72. << "Value of: " << expr1 << "\n"
  73. << " Actual: " << (iv1 + 0) << "\n"
  74. << "Expected: " << expr0 << "\n"
  75. << "Which is: " << (iv0 + 0) << "\n"
  76. << "At index: " << index.to_string() << "/"
  77. << layout.TensorShape::to_string() << "\n"
  78. << " DType: " << layout.dtype.name() << "\n"
  79. << " error: " << std::abs(err) << "/" << maxerr;
  80. }
  81. ++ it0;
  82. ++ it1;
  83. }
  84. float error_avg = error_sum / nr_elem;
  85. if (error_avg > maxerr_avg) {
  86. return ::testing::AssertionFailure()
  87. << "Average error exceeds the upper limit\n"
  88. << "Value of: " << expr1 << "\n"
  89. << "Expected: " << expr0 << "\n"
  90. << "Average error: " << error_avg << "/" << maxerr_avg
  91. << "\n"
  92. << "Num of elements: " << nr_elem;
  93. }
  94. float error_avg_biased = error_sum_biased / nr_elem;
  95. if (std::abs(error_avg_biased) > maxerr_avg_biased) {
  96. return ::testing::AssertionFailure()
  97. << "Average biased error exceeds the upper limit\n"
  98. << "Value of: " << expr1 << "\n"
  99. << "Expected: " << expr0 << "\n"
  100. << "Average biased error: " << error_avg_biased << "/"
  101. << maxerr_avg_biased << "\n"
  102. << "Num of elements: " << nr_elem;
  103. }
  104. return ::testing::AssertionSuccess();
  105. }
  106. template<typename ctype>
  107. ::testing::AssertionResult assert_tensor_eq_with_dtype(
  108. const char *expr0, const char *expr1,
  109. const TensorND &v0, const TensorND &v1,
  110. float maxerr, float maxerr_avg, float maxerr_avg_biased) {
  111. if (v0.layout.is_physical_contiguous() &&
  112. v1.layout.is_physical_contiguous()) {
  113. return assert_tensor_eq_with_iter<ctype>(
  114. expr0, expr1, v0.ptr<ctype>(), v1.ptr<ctype>(), v0.layout,
  115. maxerr, maxerr_avg, maxerr_avg_biased);
  116. }
  117. auto it0 = megdnn::tensor_iter_valonly<ctype>(v0).begin(),
  118. it1 = megdnn::tensor_iter_valonly<ctype>(v1).begin();
  119. return assert_tensor_eq_with_iter<ctype>(expr0, expr1, it0, it1,
  120. v0.layout, maxerr, maxerr_avg,
  121. maxerr_avg_biased);
  122. }
  123. template <typename ITYPE>
  124. ::testing::AssertionResult assert_tensor_eq_with_lowbit4(
  125. const char* expr0, const char* expr1,
  126. const TensorND& v0, const TensorND& v1,
  127. float maxerr, float maxerr_avg) {
  128. if (!v0.layout.eq_layout(v1.layout)) {
  129. return ::testing::AssertionFailure()
  130. << "Layout mismatch for testing equality of lowbit4\n"
  131. << "Value of: " << expr1 << "\n"
  132. << " Actual: " << v1.layout.TensorShape::to_string() << "\n"
  133. << "Expected: " << expr0 << "\n"
  134. << "Which is: " << v0.layout.TensorShape::to_string() << "\n";
  135. }
  136. auto v0_ptr = static_cast<ITYPE*>(v0.raw_ptr) - v0.layout.span().low_byte;
  137. auto v1_ptr = static_cast<ITYPE*>(v1.raw_ptr) - v1.layout.span().low_byte;
  138. double error_sum = 0;
  139. for (size_t i = 0; i < v0.layout.span().dist_elem(); ++i) {
  140. ITYPE iv0 = (v0_ptr[i / 2] << (i ^ 1) * 4);
  141. iv0 = iv0 >> 4;
  142. ITYPE iv1 = (v1_ptr[i / 2] << (i ^ 1) * 4);
  143. iv1 = iv1 >> 4;
  144. float err = std::abs(diff(iv0, iv1));
  145. error_sum += err;
  146. if (!good_float(iv0) || !good_float(iv1) || err >= maxerr) {
  147. Index index(v0.layout, i);
  148. return ::testing::AssertionFailure()
  149. << "Unequal value\n"
  150. << "Value of: " << expr1 << "\n"
  151. << " Actual: " << (iv1+0) << "\n"
  152. << "Expected: " << expr0 << "\n"
  153. << "Which is: " << (iv0+0) << "\n"
  154. << "At index: " <<
  155. index.to_string() << "/" << v0.layout.TensorShape::to_string() << "\n"
  156. << " Dtype: " << v0.layout.dtype.name() << "\n"
  157. << " error: " << err << "/" << maxerr;
  158. }
  159. }
  160. float error_avg = error_sum / v0.layout.total_nr_elems();
  161. if (error_avg > maxerr_avg) {
  162. return ::testing::AssertionFailure()
  163. << "Average error too high\n"
  164. << "Value of: " << expr1 << "\n"
  165. << "Expected: " << expr0 << "\n"
  166. << "Average error: " << error_avg << "/" << maxerr_avg;
  167. }
  168. return ::testing::AssertionSuccess();
  169. }
  170. template<class Impl>
  171. void memcpy_noncontig(
  172. void *dst, const void *src, const TensorLayout &layout,
  173. const Impl& impl) {
  174. auto span = layout.span();
  175. dst = static_cast<dt_byte*>(dst) + span.low_byte;
  176. src = static_cast<const dt_byte*>(src) + span.low_byte;
  177. impl(dst, src, span.dist_byte());
  178. }
  179. template <typename Impl>
  180. void copy_tensors(const CheckerHelper::TensorValueArray& dest,
  181. const CheckerHelper::TensorValueArray& src,
  182. const Impl& copy_impl) {
  183. megdnn_assert(dest.size() == src.size());
  184. for (size_t i = 0; i < src.size(); i++) {
  185. auto&& tensor = src[i];
  186. if (tensor.layout.ndim == 0)
  187. continue;
  188. memcpy_noncontig(dest[i].raw_ptr, tensor.raw_ptr, tensor.layout,
  189. copy_impl);
  190. }
  191. }
  192. void copy_tensors(const CheckerHelper::TensorValueArray& dest,
  193. const CheckerHelper::TensorValueArray& src) {
  194. copy_tensors(dest, src, memcpy);
  195. }
  196. } // anonymous namespace
  197. ::testing::AssertionResult test::__assert_tensor_eq(
  198. const char *expr0, const char *expr1, const char * /*expr_maxerr*/,
  199. const char* /*expr_maxerr_avg*/,
  200. const char* /*expr_maxerr_avg*/,
  201. const TensorND &v0, const TensorND &v1,
  202. float maxerr, float maxerr_avg, float maxerr_avg_biased) {
  203. if (!v0.layout.eq_shape(v1.layout)) {
  204. return ::testing::AssertionFailure()
  205. << "Shape mismatch\n"
  206. << "Value of: " << expr1 << "\n"
  207. << " Actual: " << v1.layout.TensorShape::to_string() << "\n"
  208. << "Expected: " << expr0 << "\n"
  209. << "Which is: " << v0.layout.TensorShape::to_string() << "\n";
  210. }
  211. auto dtype = v0.layout.dtype;
  212. if (dtype != v1.layout.dtype) {
  213. return ::testing::AssertionFailure()
  214. << "Data type mismatch\n"
  215. << "Value of: " << expr1 << "\n"
  216. << " Actual: " << v1.layout.dtype.name() << "\n"
  217. << "Expected: " << expr0 << "\n"
  218. << "Which is: " << v0.layout.dtype.name() << "\n";
  219. }
  220. switch(dtype.enumv()) {
  221. #define cb(_dt) \
  222. case DTypeTrait<_dt>::enumv: \
  223. return assert_tensor_eq_with_dtype<DTypeTrait<_dt>::ctype>( \
  224. expr0, expr1, v0, v1, maxerr, maxerr_avg, maxerr_avg_biased);
  225. MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
  226. MEGDNN_FOREACH_QUANTIZED_DTYPE(cb)
  227. //! In order to avoid an unnecessary increase in binary size, we just
  228. //! use QuantizedS16 dtype in winograd_filter_preprocess now.
  229. cb(::megdnn::dtype::QuantizedS16)
  230. case DTypeTrait<dtype::Quantized4Asymm>::enumv:
  231. return assert_tensor_eq_with_lowbit4<uint8_t>(expr0, expr1, v0, v1,
  232. maxerr, maxerr_avg);
  233. case DTypeTrait<dtype::QuantizedS4>::enumv:
  234. return assert_tensor_eq_with_lowbit4<int8_t>(expr0, expr1, v0, v1,
  235. maxerr, maxerr_avg);
  236. #undef cb
  237. default:
  238. megdnn_trap();
  239. }
  240. }
  241. CheckerHelper::CheckerHelper(Handle *handle, bool check_dispatch):
  242. m_handle_naive(create_cpu_handle(2, check_dispatch)),
  243. m_handle_cur(handle),
  244. m_default_rng(new NormalRNG())
  245. {
  246. }
  247. CheckerHelper::~CheckerHelper() noexcept = default;
  248. void CheckerHelper::do_exec_with_testcases(const TensorValueArray& testcase_in,
  249. const TensorValueArray& testcase_out,
  250. const OprExec& exec_opr) {
  251. m_prev_succ = false;
  252. // Validate layouts of tensors in testcase_in and testcase_out.
  253. // It must be possible to aggregate the layouts of inputs and outputs.
  254. TensorLayoutArray layouts;
  255. for (size_t i = 0; i < testcase_in.size(); i++) {
  256. // ndim == 0 means does not apply.
  257. ASSERT_TRUE(testcase_in[i].layout.ndim == 0 ||
  258. testcase_out[i].layout.ndim == 0 ||
  259. testcase_in[i].layout.eq_layout(testcase_out[i].layout));
  260. layouts.emplace_back(testcase_in[i].layout.ndim > 0
  261. ? testcase_in[i].layout
  262. : testcase_out[i].layout);
  263. }
  264. auto tensors_cur_storage = alloc_tensors(m_handle_cur, layouts, m_offset);
  265. auto tensors_cur_host_storage =
  266. alloc_tensors(m_handle_naive.get(), layouts, m_offset);
  267. auto &&tensors_cur = *tensors_cur_storage;
  268. auto &&tensors_cur_host = *tensors_cur_host_storage;
  269. copy_tensors_to_device(tensors_cur, testcase_in);
  270. exec_opr(tensors_cur);
  271. if (m_expect_exec_fail) {
  272. m_expect_exec_fail();
  273. m_expect_exec_fail = {};
  274. return;
  275. }
  276. copy_tensors_from_device(tensors_cur_host, tensors_cur);
  277. check_tensors(testcase_out, tensors_cur_host);
  278. m_prev_succ = !::testing::Test::HasFailure();
  279. }
  280. void CheckerHelper::do_exec(const TensorLayoutArray &user_layouts,
  281. const TensorLayoutArray &deduced_layouts,
  282. const OprExec &exec_naive, const OprExec &exec_opr) {
  283. m_prev_succ = false;
  284. // check if user provided layouts are correct
  285. for (size_t i = 0; i < deduced_layouts.size(); ++i) {
  286. if (user_layouts[i].ndim > 0) {
  287. ASSERT_TRUE(deduced_layouts[i].eq_shape(user_layouts[i]))
  288. << "User provided shape is "
  289. << user_layouts[i].TensorShape::to_string()
  290. << "\nExpected shape is "
  291. << deduced_layouts[i].TensorShape::to_string();
  292. }
  293. }
  294. auto layouts = user_layouts;
  295. for (size_t i = 0; i < layouts.size(); ++i) {
  296. if (layouts[i].ndim == 0) {
  297. //! in some opr, such as conv_bias has ndim==0
  298. layouts[i] = deduced_layouts[i];
  299. }
  300. }
  301. // allocate
  302. m_tensors_naive = alloc_tensors(m_handle_naive.get(), layouts, m_offset);
  303. auto tensors_cur_storage = alloc_tensors(m_handle_cur, layouts, m_offset);
  304. auto tensors_cur_host_storage =
  305. alloc_tensors(m_handle_naive.get(), layouts, m_offset);
  306. auto &&tensors_naive = *m_tensors_naive;
  307. auto &&tensors_cur = *tensors_cur_storage;
  308. auto &&tensors_cur_host = *tensors_cur_host_storage;
  309. std::shared_ptr<TensorValueArray> tensors_extra_opr_impl;
  310. if (m_extra_opr_impl) {
  311. tensors_extra_opr_impl =
  312. alloc_tensors(m_handle_naive.get(), layouts, m_offset);
  313. }
  314. init_naive_values();
  315. copy_tensors_to_device(tensors_cur, tensors_naive);
  316. if (m_extra_opr_impl) {
  317. copy_tensors(*tensors_extra_opr_impl, tensors_naive);
  318. }
  319. // execute
  320. exec_opr(tensors_cur);
  321. if (m_expect_exec_fail) {
  322. m_expect_exec_fail();
  323. m_expect_exec_fail = {};
  324. return;
  325. }
  326. exec_naive(tensors_naive);
  327. if (m_extra_opr_impl) {
  328. m_extra_opr_impl(*tensors_extra_opr_impl);
  329. }
  330. // see if we need performance regression test
  331. if (m_perf_check) {
  332. ASSERT_GT(m_perf_check_threshold, 0) << "perf_check_threshold should be "
  333. "set ahead of time.";
  334. Timer timer_naive, timer_cur;
  335. megdnn_sync(m_handle_naive.get());
  336. timer_naive.start();
  337. exec_naive(tensors_naive);
  338. megdnn_sync(m_handle_naive.get());
  339. timer_naive.stop();
  340. megdnn_sync(m_handle_cur);
  341. timer_cur.start();
  342. exec_opr(tensors_cur);
  343. megdnn_sync(m_handle_cur);
  344. timer_cur.stop();
  345. size_t time_in_us_naive = timer_naive.get_time_in_us(),
  346. time_in_us_cur = timer_cur.get_time_in_us();
  347. EXPECT_GE(time_in_us_naive, static_cast<size_t>(100))
  348. << "Running time smaller than 100us "
  349. << "might be imprecise. naive_time="
  350. << time_in_us_naive << "us.";
  351. float speedup_ratio = static_cast<float>(time_in_us_naive) /
  352. time_in_us_cur;
  353. EXPECT_GE(speedup_ratio, m_perf_check_threshold) << "speedup_ratio="
  354. << speedup_ratio << " threshold=" << m_perf_check_threshold
  355. << " naive_time=" << time_in_us_naive << "us cur_time="
  356. << time_in_us_cur << "us";
  357. }
  358. copy_tensors_from_device(tensors_cur_host, tensors_cur);
  359. if (m_output_canonizer) {
  360. m_output_canonizer(tensors_cur_host);
  361. m_output_canonizer(tensors_naive);
  362. }
  363. check_tensors(tensors_naive, tensors_cur_host);
  364. if (m_extra_opr_impl) {
  365. check_tensors(tensors_naive, *tensors_extra_opr_impl);
  366. }
  367. m_prev_succ = !::testing::Test::HasFailure();
  368. }
  369. std::shared_ptr<CheckerHelper::TensorValueArray>
  370. CheckerHelper::alloc_tensors(Handle *handle, const TensorLayoutArray &layouts,
  371. const size_t offset) {
  372. auto deleter = [handle, offset](TensorValueArray *ptr) {
  373. for (auto &&i: *ptr) {
  374. auto pdata = static_cast<dt_byte*>(i.raw_ptr) +
  375. i.layout.span().low_byte - offset;
  376. megdnn_free(handle, pdata);
  377. }
  378. delete ptr;
  379. };
  380. std::shared_ptr<TensorValueArray> ret{new TensorValueArray, deleter};
  381. for (size_t i = 0; i < layouts.size(); ++ i) {
  382. auto span = layouts[i].span();
  383. ret->emplace_back(static_cast<dt_byte*>(megdnn_malloc(
  384. handle, span.dist_byte() + offset)) -
  385. span.low_byte + offset,
  386. layouts[i]);
  387. }
  388. return ret;
  389. }
  390. void CheckerHelper::init_naive_values() {
  391. auto &&tensors_naive = *m_tensors_naive;
  392. megdnn_assert(!m_input_tensors_fpath || !m_tensor_constraint);
  393. if (m_input_tensors_fpath) {
  394. auto load = load_tensors(m_input_tensors_fpath);
  395. m_input_tensors_fpath = nullptr;
  396. megdnn_assert(load.size() <= tensors_naive.size());
  397. for (size_t i = 0; i < load.size(); ++ i) {
  398. auto &&src = load[i];
  399. auto &&dst = tensors_naive[i];
  400. megdnn_assert(src->layout.eq_layout(dst.layout));
  401. memcpy_noncontig(dst.raw_ptr, src->raw_ptr, dst.layout, memcpy);
  402. }
  403. return;
  404. }
  405. for (size_t i = 0; i < tensors_naive.size(); ++i) {
  406. auto &&tensor = tensors_naive[i];
  407. auto rng = m_rng[i];
  408. if (!rng)
  409. rng = m_default_rng.get();
  410. rng->gen(tensor);
  411. }
  412. if (m_tensor_constraint) {
  413. m_tensor_constraint(tensors_naive);
  414. }
  415. }
  416. void CheckerHelper::copy_tensors_from_device(const TensorValueArray& dest,
  417. const TensorValueArray& src) {
  418. auto impl_d2h = [this](void* dst, const void* src, size_t sz) {
  419. megdnn_memcpy_D2H(m_handle_cur, dst, src, sz);
  420. };
  421. copy_tensors(dest, src, impl_d2h);
  422. }
  423. void CheckerHelper::check_tensors(const TensorValueArray& expected,
  424. const TensorValueArray& computed) {
  425. for (size_t i = 0; i < expected.size(); ++i) {
  426. if (expected[i].layout.ndim == 0)
  427. continue;
  428. MEGDNN_ASSERT_TENSOR_EQ_EPS_AVG(expected[i], computed[i], m_epsilon,
  429. m_max_avg_error,
  430. m_max_avg_biased_error);
  431. }
  432. }
  433. void CheckerHelper::copy_tensors_to_device(const TensorValueArray& dest,
  434. const TensorValueArray& src) {
  435. auto impl_h2d = [this](void* dst, const void* src, size_t sz) {
  436. megdnn_memcpy_H2D(m_handle_cur, dst, src, sz);
  437. };
  438. copy_tensors(dest, src, impl_h2d);
  439. }
  440. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台