You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

checker.cpp 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. #include "./checker.h"
  2. #include "megdnn/tensor_format.h"
  3. #include "megdnn/tensor_iter.h"
  4. #include "test/common/tensor.h"
  5. #include "test/common/timer.h"
  6. using namespace megdnn;
  7. using namespace test;
  8. namespace {
  9. template <typename ctype, class Iter>
  10. ::testing::AssertionResult assert_tensor_eq_with_iter(
  11. const char* expr0, const char* expr1, Iter it0, Iter it1,
  12. const TensorLayout& layout, float maxerr, float maxerr_avg,
  13. float maxerr_avg_biased, bool allow_invalid) {
  14. auto nr_elem = layout.total_nr_elems();
  15. double error_sum = 0;
  16. double error_sum_biased = 0;
  17. for (size_t i = 0; i < nr_elem; ++i) {
  18. ctype iv0 = *it0, iv1 = *it1;
  19. float err = diff(iv0, iv1);
  20. error_sum += std::abs(err);
  21. error_sum_biased += err;
  22. if (!allow_invalid &&
  23. (!good_float(iv0) || !good_float(iv1) || std::abs(err) > maxerr)) {
  24. Index index(layout, i);
  25. return ::testing::AssertionFailure()
  26. << "Unequal value\n"
  27. << "Value of: " << expr1 << "\n"
  28. << " Actual: " << (iv1 + 0) << "\n"
  29. << "Expected: " << expr0 << "\n"
  30. << "Which is: " << (iv0 + 0) << "\n"
  31. << "At index: " << index.to_string() << "/"
  32. << layout.TensorShape::to_string() << "\n"
  33. << " DType: " << layout.dtype.name() << "\n"
  34. << " error: " << std::abs(err) << "/" << maxerr;
  35. }
  36. ++it0;
  37. ++it1;
  38. }
  39. float error_avg = error_sum / nr_elem;
  40. if (error_avg > maxerr_avg) {
  41. return ::testing::AssertionFailure()
  42. << "Average error exceeds the upper limit\n"
  43. << "Value of: " << expr1 << "\n"
  44. << "Expected: " << expr0 << "\n"
  45. << "Average error: " << error_avg << "/" << maxerr_avg << "\n"
  46. << "Num of elements: " << nr_elem;
  47. }
  48. float error_avg_biased = error_sum_biased / nr_elem;
  49. if (std::abs(error_avg_biased) > maxerr_avg_biased) {
  50. return ::testing::AssertionFailure()
  51. << "Average biased error exceeds the upper limit\n"
  52. << "Value of: " << expr1 << "\n"
  53. << "Expected: " << expr0 << "\n"
  54. << "Average biased error: " << error_avg_biased << "/" << maxerr_avg_biased
  55. << "\n"
  56. << "Num of elements: " << nr_elem;
  57. }
  58. return ::testing::AssertionSuccess();
  59. }
  60. template <typename ctype>
  61. ::testing::AssertionResult assert_tensor_eq_with_dtype(
  62. const char* expr0, const char* expr1, const TensorND& v0, const TensorND& v1,
  63. float maxerr, float maxerr_avg, float maxerr_avg_biased, bool allow_invalid) {
  64. if (!std::is_same<ctype, dt_qint4>::value &&
  65. !std::is_same<ctype, dt_quint4>::value &&
  66. !std::is_same<ctype, dt_qint1>::value) {
  67. if (v0.layout.is_physical_contiguous() && v1.layout.is_physical_contiguous()) {
  68. return assert_tensor_eq_with_iter<ctype>(
  69. expr0, expr1, v0.ptr<ctype>(), v1.ptr<ctype>(), v0.layout, maxerr,
  70. maxerr_avg, maxerr_avg_biased, allow_invalid);
  71. }
  72. }
  73. auto it0 = megdnn::tensor_iter_valonly<ctype>(v0).begin(),
  74. it1 = megdnn::tensor_iter_valonly<ctype>(v1).begin();
  75. return assert_tensor_eq_with_iter<ctype>(
  76. expr0, expr1, it0, it1, v0.layout, maxerr, maxerr_avg, maxerr_avg_biased,
  77. allow_invalid);
  78. }
  79. template <class Impl>
  80. void memcpy_noncontig(
  81. void* dst, const void* src, const TensorLayout& layout, const Impl& impl) {
  82. auto span = layout.span();
  83. dst = static_cast<dt_byte*>(dst) + span.low_byte;
  84. src = static_cast<const dt_byte*>(src) + span.low_byte;
  85. impl(dst, src, span.dist_byte());
  86. }
  87. template <typename Impl>
  88. void copy_tensors(
  89. const CheckerHelper::TensorValueArray& dest,
  90. const CheckerHelper::TensorValueArray& src, const Impl& copy_impl) {
  91. megdnn_assert(dest.size() == src.size(), "%zu != %zu", dest.size(), src.size());
  92. for (size_t i = 0; i < src.size(); i++) {
  93. auto&& tensor = src[i];
  94. if (tensor.layout.ndim == 0)
  95. continue;
  96. memcpy_noncontig(dest[i].raw_ptr(), tensor.raw_ptr(), tensor.layout, copy_impl);
  97. }
  98. }
  99. void copy_tensors(
  100. const CheckerHelper::TensorValueArray& dest,
  101. const CheckerHelper::TensorValueArray& src) {
  102. copy_tensors(dest, src, memcpy);
  103. }
  104. } // anonymous namespace
  105. ::testing::AssertionResult test::__assert_tensor_eq(
  106. const char* expr0, const char* expr1, const char* /*expr_maxerr*/,
  107. const char* /*expr_maxerr_avg*/, const char* /*expr_maxerr_avg*/,
  108. const TensorND& v0, const TensorND& v1, float maxerr, float maxerr_avg,
  109. float maxerr_avg_biased, bool allow_invalid) {
  110. if (!v0.layout.eq_shape(v1.layout)) {
  111. return ::testing::AssertionFailure()
  112. << "Shape mismatch\n"
  113. << "Value of: " << expr1 << "\n"
  114. << " Actual: " << v1.layout.TensorShape::to_string() << "\n"
  115. << "Expected: " << expr0 << "\n"
  116. << "Which is: " << v0.layout.TensorShape::to_string() << "\n";
  117. }
  118. auto dtype = v0.layout.dtype;
  119. if (dtype != v1.layout.dtype) {
  120. return ::testing::AssertionFailure()
  121. << "Data type mismatch\n"
  122. << "Value of: " << expr1 << "\n"
  123. << " Actual: " << v1.layout.dtype.name() << "\n"
  124. << "Expected: " << expr0 << "\n"
  125. << "Which is: " << v0.layout.dtype.name() << "\n";
  126. }
  127. switch (dtype.enumv()) {
  128. #define cb(_dt) \
  129. case DTypeTrait<_dt>::enumv: \
  130. return assert_tensor_eq_with_dtype<DTypeTrait<_dt>::ctype>( \
  131. expr0, expr1, v0, v1, maxerr, maxerr_avg, maxerr_avg_biased, \
  132. allow_invalid);
  133. MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
  134. MEGDNN_FOREACH_QUANTIZED_DTYPE(cb)
  135. //! In order to avoid an unnecessary increase in binary size, we just
  136. //! use QuantizedS16 dtype in winograd_filter_preprocess now.
  137. cb(::megdnn::dtype::QuantizedS16) MEGDNN_FOREACH_QUANTIZED_LOWBIT_DTYPE(cb)
  138. cb(::megdnn::dtype::Uint16) cb(::megdnn::dtype::QuantizedS1)
  139. cb(::megdnn::dtype::Bool)
  140. #undef cb
  141. default : megdnn_trap();
  142. }
  143. }
  144. ::testing::AssertionResult test::__assert_tensor_eq_allow_invalid(
  145. const char* expr0, const char* expr1, const char* expr_maxerr,
  146. const char* expr_maxerr_avg, const char* expr_maxerr_avg_biased,
  147. const TensorND& v0, const TensorND& v1, float maxerr, float maxerr_avg,
  148. float maxerr_avg_biased) {
  149. return __assert_tensor_eq(
  150. expr0, expr1, expr_maxerr, expr_maxerr_avg, expr_maxerr_avg_biased, v0, v1,
  151. maxerr, maxerr_avg, maxerr_avg_biased, true);
  152. };
  153. CheckerHelper::CheckerHelper(Handle* handle, bool check_dispatch)
  154. : m_handle_cur(handle), m_default_rng(new NormalRNG()) {
  155. //! set MGB_NO_NAIVE_CHECK=1 to close megdnn test check with naive
  156. const char* env_p = std::getenv("MGB_NO_NAIVE_CHECK");
  157. if (env_p) {
  158. int no_naive_flag = atoi(env_p);
  159. m_no_naive_and_check = no_naive_flag > 0 ? true : false;
  160. check_dispatch = false;
  161. } else {
  162. m_no_naive_and_check = false;
  163. }
  164. auto tmp_handle = create_cpu_handle(2, check_dispatch);
  165. m_handle_naive = std::move(tmp_handle);
  166. }
  167. CheckerHelper::~CheckerHelper() noexcept = default;
  168. void CheckerHelper::do_exec_with_testcases(
  169. const TensorValueArray& testcase_in, const TensorValueArray& testcase_out,
  170. const OprExec& exec_opr) {
  171. m_prev_succ = false;
  172. // Validate layouts of tensors in testcase_in and testcase_out.
  173. // It must be possible to aggregate the layouts of inputs and outputs.
  174. TensorLayoutArray layouts;
  175. for (size_t i = 0; i < testcase_in.size(); i++) {
  176. // ndim == 0 means does not apply.
  177. ASSERT_TRUE(
  178. testcase_in[i].layout.ndim == 0 || testcase_out[i].layout.ndim == 0 ||
  179. testcase_in[i].layout.eq_layout(testcase_out[i].layout));
  180. layouts.emplace_back(
  181. testcase_in[i].layout.ndim > 0 ? testcase_in[i].layout
  182. : testcase_out[i].layout);
  183. }
  184. auto tensors_cur_storage = alloc_tensors(m_handle_cur, layouts, m_offset);
  185. auto tensors_cur_host_storage =
  186. alloc_tensors(m_handle_naive.get(), layouts, m_offset);
  187. auto&& tensors_cur = *tensors_cur_storage;
  188. auto&& tensors_cur_host = *tensors_cur_host_storage;
  189. copy_tensors_to_device(tensors_cur, testcase_in);
  190. exec_opr(tensors_cur);
  191. if (m_expect_exec_fail) {
  192. m_expect_exec_fail();
  193. m_expect_exec_fail = {};
  194. return;
  195. }
  196. copy_tensors_from_device(tensors_cur_host, tensors_cur);
  197. check_tensors(testcase_out, tensors_cur_host);
  198. m_prev_succ = !::testing::Test::HasFailure();
  199. }
  200. void CheckerHelper::do_exec(
  201. const TensorLayoutArray& user_layouts, const TensorLayoutArray& deduced_layouts,
  202. const OprExec& exec_naive, const OprExec& exec_opr) {
  203. m_prev_succ = false;
  204. // check if user provided layouts are correct
  205. for (size_t i = 0; i < deduced_layouts.size(); ++i) {
  206. if (user_layouts[i].ndim > 0) {
  207. ASSERT_TRUE(deduced_layouts[i].eq_shape(user_layouts[i]))
  208. << "User provided shape is "
  209. << user_layouts[i].TensorShape::to_string()
  210. << "\nExpected shape is "
  211. << deduced_layouts[i].TensorShape::to_string();
  212. }
  213. }
  214. auto layouts = user_layouts;
  215. for (size_t i = 0; i < layouts.size(); ++i) {
  216. if (layouts[i].ndim == 0) {
  217. //! in some opr, such as conv_bias has ndim==0
  218. layouts[i] = deduced_layouts[i];
  219. }
  220. }
  221. // allocate
  222. m_tensors_naive = alloc_tensors(m_handle_naive.get(), layouts, m_offset);
  223. auto tensors_cur_storage = alloc_tensors(m_handle_cur, layouts, m_offset);
  224. auto tensors_cur_host_storage =
  225. alloc_tensors(m_handle_naive.get(), layouts, m_offset);
  226. auto&& tensors_naive = *m_tensors_naive;
  227. auto&& tensors_cur = *tensors_cur_storage;
  228. auto&& tensors_cur_host = *tensors_cur_host_storage;
  229. std::shared_ptr<TensorValueArray> tensors_extra_opr_impl;
  230. if (m_extra_opr_impl) {
  231. tensors_extra_opr_impl = alloc_tensors(m_handle_naive.get(), layouts, m_offset);
  232. }
  233. init_naive_values();
  234. copy_tensors_to_device(tensors_cur, tensors_naive);
  235. if (m_extra_opr_impl) {
  236. copy_tensors(*tensors_extra_opr_impl, tensors_naive);
  237. }
  238. // execute
  239. exec_opr(tensors_cur);
  240. if (m_expect_exec_fail) {
  241. m_expect_exec_fail();
  242. m_expect_exec_fail = {};
  243. return;
  244. }
  245. if (m_stable_check) {
  246. auto tensors_bak_host_storage =
  247. alloc_tensors(m_handle_naive.get(), layouts, m_offset);
  248. auto&& tensors_bak_host = *tensors_bak_host_storage;
  249. copy_tensors_from_device(tensors_bak_host, tensors_cur);
  250. for (int i = 0; i < 10; i++) {
  251. exec_opr(tensors_cur);
  252. copy_tensors_from_device(tensors_cur_host, tensors_cur);
  253. check_tensors(tensors_bak_host, tensors_cur_host);
  254. }
  255. }
  256. if (m_no_naive_and_check) {
  257. m_prev_succ = !::testing::Test::HasFailure();
  258. return;
  259. }
  260. exec_naive(tensors_naive);
  261. if (m_extra_opr_impl) {
  262. m_extra_opr_impl(*tensors_extra_opr_impl);
  263. }
  264. // see if we need performance regression test
  265. if (m_perf_check) {
  266. ASSERT_GT(m_perf_check_threshold, 0) << "perf_check_threshold should be "
  267. "set ahead of time.";
  268. Timer timer_naive, timer_cur;
  269. megdnn_sync(m_handle_naive.get());
  270. timer_naive.start();
  271. exec_naive(tensors_naive);
  272. megdnn_sync(m_handle_naive.get());
  273. timer_naive.stop();
  274. megdnn_sync(m_handle_cur);
  275. timer_cur.start();
  276. exec_opr(tensors_cur);
  277. megdnn_sync(m_handle_cur);
  278. timer_cur.stop();
  279. size_t time_in_us_naive = timer_naive.get_time_in_us(),
  280. time_in_us_cur = timer_cur.get_time_in_us();
  281. EXPECT_GE(time_in_us_naive, static_cast<size_t>(100))
  282. << "Running time smaller than 100us "
  283. << "might be imprecise. naive_time=" << time_in_us_naive << "us.";
  284. float speedup_ratio = static_cast<float>(time_in_us_naive) / time_in_us_cur;
  285. EXPECT_GE(speedup_ratio, m_perf_check_threshold)
  286. << "speedup_ratio=" << speedup_ratio
  287. << " threshold=" << m_perf_check_threshold
  288. << " naive_time=" << time_in_us_naive
  289. << "us cur_time=" << time_in_us_cur << "us";
  290. }
  291. copy_tensors_from_device(tensors_cur_host, tensors_cur);
  292. if (m_output_canonizer) {
  293. m_output_canonizer(tensors_cur_host);
  294. m_output_canonizer(tensors_naive);
  295. }
  296. check_tensors(tensors_naive, tensors_cur_host);
  297. if (m_extra_opr_impl) {
  298. check_tensors(tensors_naive, *tensors_extra_opr_impl);
  299. }
  300. m_prev_succ = !::testing::Test::HasFailure();
  301. }
  302. std::shared_ptr<CheckerHelper::TensorValueArray> CheckerHelper::alloc_tensors(
  303. Handle* handle, const TensorLayoutArray& layouts, const size_t offset) {
  304. auto deleter = [handle, offset](TensorValueArray* ptr) {
  305. for (auto&& i : *ptr) {
  306. auto pdata = static_cast<dt_byte*>(i.raw_ptr()) + i.layout.span().low_byte -
  307. offset;
  308. megdnn_free(handle, pdata);
  309. }
  310. delete ptr;
  311. };
  312. std::shared_ptr<TensorValueArray> ret{new TensorValueArray, deleter};
  313. for (size_t i = 0; i < layouts.size(); ++i) {
  314. auto span = layouts[i].span();
  315. auto ptr = megdnn_malloc(handle, span.dist_byte() + offset);
  316. ret->emplace_back(
  317. static_cast<dt_byte*>(ptr) - span.low_byte + offset, layouts[i]);
  318. }
  319. return ret;
  320. }
  321. void CheckerHelper::init_naive_values() {
  322. auto&& tensors_naive = *m_tensors_naive;
  323. megdnn_assert(!m_input_tensors_fpath || !m_tensor_constraint);
  324. if (m_input_tensors_fpath) {
  325. auto load = load_tensors(m_input_tensors_fpath);
  326. m_input_tensors_fpath = nullptr;
  327. megdnn_assert(load.size() <= tensors_naive.size());
  328. for (size_t i = 0; i < load.size(); ++i) {
  329. auto&& src = load[i];
  330. auto&& dst = tensors_naive[i];
  331. megdnn_assert(src->layout.eq_layout(dst.layout));
  332. memcpy_noncontig(dst.raw_ptr(), src->raw_ptr(), dst.layout, memcpy);
  333. }
  334. return;
  335. }
  336. for (size_t i = 0; i < tensors_naive.size(); ++i) {
  337. auto&& tensor = tensors_naive[i];
  338. auto rng = m_rng[i];
  339. if (!rng)
  340. rng = m_default_rng.get();
  341. rng->gen(tensor);
  342. }
  343. if (m_tensor_constraint) {
  344. m_tensor_constraint(tensors_naive);
  345. }
  346. }
  347. void CheckerHelper::copy_tensors_from_device(
  348. const TensorValueArray& dest, const TensorValueArray& src) {
  349. auto impl_d2h = [this](void* dst, const void* src, size_t sz) {
  350. megdnn_memcpy_D2H(m_handle_cur, dst, src, sz);
  351. };
  352. copy_tensors(dest, src, impl_d2h);
  353. }
  354. void CheckerHelper::check_tensors(
  355. const TensorValueArray& expected, const TensorValueArray& computed) {
  356. for (size_t i = 0; i < expected.size(); ++i) {
  357. if (expected[i].layout.ndim == 0 || m_bypass.find(i) != m_bypass.end())
  358. continue;
  359. if (m_allow_invalid_check) {
  360. MEGDNN_ASSERT_TENSOR_EQ_EPS_AVG_ALLOW_INVALID(
  361. expected[i], computed[i], m_epsilon, m_max_avg_error,
  362. m_max_avg_biased_error);
  363. } else {
  364. MEGDNN_ASSERT_TENSOR_EQ_EPS_AVG(
  365. expected[i], computed[i], m_epsilon, m_max_avg_error,
  366. m_max_avg_biased_error);
  367. }
  368. }
  369. }
  370. void CheckerHelper::copy_tensors_to_device(
  371. const TensorValueArray& dest, const TensorValueArray& src) {
  372. auto impl_h2d = [this](void* dst, const void* src, size_t sz) {
  373. megdnn_memcpy_H2D(m_handle_cur, dst, src, sz);
  374. };
  375. copy_tensors(dest, src, impl_h2d);
  376. }
  377. // vim: syntax=cpp.doxygen