You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

checker.cpp 17 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. /**
  2. * \file dnn/test/common/checker.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./checker.h"
  12. #include "megdnn/tensor_format.h"
  13. #include "megdnn/tensor_iter.h"
  14. #include "test/common/tensor.h"
  15. #include "test/common/timer.h"
  16. using namespace megdnn;
  17. using namespace test;
  18. namespace {
  19. template <typename ctype, class Iter>
  20. ::testing::AssertionResult assert_tensor_eq_with_iter(
  21. const char* expr0, const char* expr1, Iter it0, Iter it1,
  22. const TensorLayout& layout, float maxerr, float maxerr_avg,
  23. float maxerr_avg_biased, bool allow_invalid) {
  24. auto nr_elem = layout.total_nr_elems();
  25. double error_sum = 0;
  26. double error_sum_biased = 0;
  27. for (size_t i = 0; i < nr_elem; ++i) {
  28. ctype iv0 = *it0, iv1 = *it1;
  29. float err = diff(iv0, iv1);
  30. error_sum += std::abs(err);
  31. error_sum_biased += err;
  32. if (!allow_invalid &&
  33. (!good_float(iv0) || !good_float(iv1) || std::abs(err) > maxerr)) {
  34. Index index(layout, i);
  35. return ::testing::AssertionFailure()
  36. << "Unequal value\n"
  37. << "Value of: " << expr1 << "\n"
  38. << " Actual: " << (iv1 + 0) << "\n"
  39. << "Expected: " << expr0 << "\n"
  40. << "Which is: " << (iv0 + 0) << "\n"
  41. << "At index: " << index.to_string() << "/"
  42. << layout.TensorShape::to_string() << "\n"
  43. << " DType: " << layout.dtype.name() << "\n"
  44. << " error: " << std::abs(err) << "/" << maxerr;
  45. }
  46. ++it0;
  47. ++it1;
  48. }
  49. float error_avg = error_sum / nr_elem;
  50. if (error_avg > maxerr_avg) {
  51. return ::testing::AssertionFailure()
  52. << "Average error exceeds the upper limit\n"
  53. << "Value of: " << expr1 << "\n"
  54. << "Expected: " << expr0 << "\n"
  55. << "Average error: " << error_avg << "/" << maxerr_avg << "\n"
  56. << "Num of elements: " << nr_elem;
  57. }
  58. float error_avg_biased = error_sum_biased / nr_elem;
  59. if (std::abs(error_avg_biased) > maxerr_avg_biased) {
  60. return ::testing::AssertionFailure()
  61. << "Average biased error exceeds the upper limit\n"
  62. << "Value of: " << expr1 << "\n"
  63. << "Expected: " << expr0 << "\n"
  64. << "Average biased error: " << error_avg_biased << "/" << maxerr_avg_biased
  65. << "\n"
  66. << "Num of elements: " << nr_elem;
  67. }
  68. return ::testing::AssertionSuccess();
  69. }
  70. template <typename ctype>
  71. ::testing::AssertionResult assert_tensor_eq_with_dtype(
  72. const char* expr0, const char* expr1, const TensorND& v0, const TensorND& v1,
  73. float maxerr, float maxerr_avg, float maxerr_avg_biased, bool allow_invalid) {
  74. if (!std::is_same<ctype, dt_qint4>::value &&
  75. !std::is_same<ctype, dt_quint4>::value) {
  76. if (v0.layout.is_physical_contiguous() && v1.layout.is_physical_contiguous()) {
  77. return assert_tensor_eq_with_iter<ctype>(
  78. expr0, expr1, v0.ptr<ctype>(), v1.ptr<ctype>(), v0.layout, maxerr,
  79. maxerr_avg, maxerr_avg_biased, allow_invalid);
  80. }
  81. }
  82. auto it0 = megdnn::tensor_iter_valonly<ctype>(v0).begin(),
  83. it1 = megdnn::tensor_iter_valonly<ctype>(v1).begin();
  84. return assert_tensor_eq_with_iter<ctype>(
  85. expr0, expr1, it0, it1, v0.layout, maxerr, maxerr_avg, maxerr_avg_biased,
  86. allow_invalid);
  87. }
  88. template <class Impl>
  89. void memcpy_noncontig(
  90. void* dst, const void* src, const TensorLayout& layout, const Impl& impl) {
  91. auto span = layout.span();
  92. dst = static_cast<dt_byte*>(dst) + span.low_byte;
  93. src = static_cast<const dt_byte*>(src) + span.low_byte;
  94. impl(dst, src, span.dist_byte());
  95. }
  96. template <typename Impl>
  97. void copy_tensors(
  98. const CheckerHelper::TensorValueArray& dest,
  99. const CheckerHelper::TensorValueArray& src, const Impl& copy_impl) {
  100. megdnn_assert(dest.size() == src.size());
  101. for (size_t i = 0; i < src.size(); i++) {
  102. auto&& tensor = src[i];
  103. if (tensor.layout.ndim == 0)
  104. continue;
  105. memcpy_noncontig(dest[i].raw_ptr, tensor.raw_ptr, tensor.layout, copy_impl);
  106. }
  107. }
  108. void copy_tensors(
  109. const CheckerHelper::TensorValueArray& dest,
  110. const CheckerHelper::TensorValueArray& src) {
  111. copy_tensors(dest, src, memcpy);
  112. }
  113. } // anonymous namespace
  114. ::testing::AssertionResult test::__assert_tensor_eq(
  115. const char* expr0, const char* expr1, const char* /*expr_maxerr*/,
  116. const char* /*expr_maxerr_avg*/, const char* /*expr_maxerr_avg*/,
  117. const TensorND& v0, const TensorND& v1, float maxerr, float maxerr_avg,
  118. float maxerr_avg_biased, bool allow_invalid) {
  119. if (!v0.layout.eq_shape(v1.layout)) {
  120. return ::testing::AssertionFailure()
  121. << "Shape mismatch\n"
  122. << "Value of: " << expr1 << "\n"
  123. << " Actual: " << v1.layout.TensorShape::to_string() << "\n"
  124. << "Expected: " << expr0 << "\n"
  125. << "Which is: " << v0.layout.TensorShape::to_string() << "\n";
  126. }
  127. auto dtype = v0.layout.dtype;
  128. if (dtype != v1.layout.dtype) {
  129. return ::testing::AssertionFailure()
  130. << "Data type mismatch\n"
  131. << "Value of: " << expr1 << "\n"
  132. << " Actual: " << v1.layout.dtype.name() << "\n"
  133. << "Expected: " << expr0 << "\n"
  134. << "Which is: " << v0.layout.dtype.name() << "\n";
  135. }
  136. switch (dtype.enumv()) {
  137. #define cb(_dt) \
  138. case DTypeTrait<_dt>::enumv: \
  139. return assert_tensor_eq_with_dtype<DTypeTrait<_dt>::ctype>( \
  140. expr0, expr1, v0, v1, maxerr, maxerr_avg, maxerr_avg_biased, \
  141. allow_invalid);
  142. MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
  143. MEGDNN_FOREACH_QUANTIZED_DTYPE(cb)
  144. //! In order to avoid an unnecessary increase in binary size, we just
  145. //! use QuantizedS16 dtype in winograd_filter_preprocess now.
  146. cb(::megdnn::dtype::QuantizedS16) MEGDNN_FOREACH_QUANTIZED_LOWBIT_DTYPE(cb)
  147. cb(::megdnn::dtype::Uint16)
  148. #undef cb
  149. default : megdnn_trap();
  150. }
  151. }
  152. ::testing::AssertionResult test::__assert_tensor_eq_allow_invalid(
  153. const char* expr0, const char* expr1, const char* expr_maxerr,
  154. const char* expr_maxerr_avg, const char* expr_maxerr_avg_biased,
  155. const TensorND& v0, const TensorND& v1, float maxerr, float maxerr_avg,
  156. float maxerr_avg_biased) {
  157. return __assert_tensor_eq(
  158. expr0, expr1, expr_maxerr, expr_maxerr_avg, expr_maxerr_avg_biased, v0, v1,
  159. maxerr, maxerr_avg, maxerr_avg_biased, true);
  160. };
  161. CheckerHelper::CheckerHelper(Handle* handle, bool check_dispatch)
  162. : m_handle_cur(handle), m_default_rng(new NormalRNG()) {
  163. //! set MGB_NO_NAIVE_CHECK=1 to close megdnn test check with naive
  164. const char* env_p = std::getenv("MGB_NO_NAIVE_CHECK");
  165. if (env_p) {
  166. int no_naive_flag = atoi(env_p);
  167. m_no_naive_and_check = no_naive_flag > 0 ? true : false;
  168. check_dispatch = false;
  169. } else {
  170. m_no_naive_and_check = false;
  171. }
  172. auto tmp_handle = create_cpu_handle(2, check_dispatch);
  173. m_handle_naive = std::move(tmp_handle);
  174. }
  175. CheckerHelper::~CheckerHelper() noexcept = default;
  176. void CheckerHelper::do_exec_with_testcases(
  177. const TensorValueArray& testcase_in, const TensorValueArray& testcase_out,
  178. const OprExec& exec_opr) {
  179. m_prev_succ = false;
  180. // Validate layouts of tensors in testcase_in and testcase_out.
  181. // It must be possible to aggregate the layouts of inputs and outputs.
  182. TensorLayoutArray layouts;
  183. for (size_t i = 0; i < testcase_in.size(); i++) {
  184. // ndim == 0 means does not apply.
  185. ASSERT_TRUE(
  186. testcase_in[i].layout.ndim == 0 || testcase_out[i].layout.ndim == 0 ||
  187. testcase_in[i].layout.eq_layout(testcase_out[i].layout));
  188. layouts.emplace_back(
  189. testcase_in[i].layout.ndim > 0 ? testcase_in[i].layout
  190. : testcase_out[i].layout);
  191. }
  192. auto tensors_cur_storage = alloc_tensors(m_handle_cur, layouts, m_offset);
  193. auto tensors_cur_host_storage =
  194. alloc_tensors(m_handle_naive.get(), layouts, m_offset);
  195. auto&& tensors_cur = *tensors_cur_storage;
  196. auto&& tensors_cur_host = *tensors_cur_host_storage;
  197. copy_tensors_to_device(tensors_cur, testcase_in);
  198. exec_opr(tensors_cur);
  199. if (m_expect_exec_fail) {
  200. m_expect_exec_fail();
  201. m_expect_exec_fail = {};
  202. return;
  203. }
  204. copy_tensors_from_device(tensors_cur_host, tensors_cur);
  205. check_tensors(testcase_out, tensors_cur_host);
  206. m_prev_succ = !::testing::Test::HasFailure();
  207. }
  208. void CheckerHelper::do_exec(
  209. const TensorLayoutArray& user_layouts, const TensorLayoutArray& deduced_layouts,
  210. const OprExec& exec_naive, const OprExec& exec_opr) {
  211. m_prev_succ = false;
  212. // check if user provided layouts are correct
  213. for (size_t i = 0; i < deduced_layouts.size(); ++i) {
  214. if (user_layouts[i].ndim > 0) {
  215. ASSERT_TRUE(deduced_layouts[i].eq_shape(user_layouts[i]))
  216. << "User provided shape is "
  217. << user_layouts[i].TensorShape::to_string()
  218. << "\nExpected shape is "
  219. << deduced_layouts[i].TensorShape::to_string();
  220. }
  221. }
  222. auto layouts = user_layouts;
  223. for (size_t i = 0; i < layouts.size(); ++i) {
  224. if (layouts[i].ndim == 0) {
  225. //! in some opr, such as conv_bias has ndim==0
  226. layouts[i] = deduced_layouts[i];
  227. }
  228. }
  229. // allocate
  230. m_tensors_naive = alloc_tensors(m_handle_naive.get(), layouts, m_offset);
  231. auto tensors_cur_storage = alloc_tensors(m_handle_cur, layouts, m_offset);
  232. auto tensors_cur_host_storage =
  233. alloc_tensors(m_handle_naive.get(), layouts, m_offset);
  234. auto&& tensors_naive = *m_tensors_naive;
  235. auto&& tensors_cur = *tensors_cur_storage;
  236. auto&& tensors_cur_host = *tensors_cur_host_storage;
  237. std::shared_ptr<TensorValueArray> tensors_extra_opr_impl;
  238. if (m_extra_opr_impl) {
  239. tensors_extra_opr_impl = alloc_tensors(m_handle_naive.get(), layouts, m_offset);
  240. }
  241. init_naive_values();
  242. copy_tensors_to_device(tensors_cur, tensors_naive);
  243. if (m_extra_opr_impl) {
  244. copy_tensors(*tensors_extra_opr_impl, tensors_naive);
  245. }
  246. // execute
  247. exec_opr(tensors_cur);
  248. if (m_expect_exec_fail) {
  249. m_expect_exec_fail();
  250. m_expect_exec_fail = {};
  251. return;
  252. }
  253. if (m_stable_check) {
  254. auto tensors_bak_host_storage =
  255. alloc_tensors(m_handle_naive.get(), layouts, m_offset);
  256. auto&& tensors_bak_host = *tensors_bak_host_storage;
  257. copy_tensors_from_device(tensors_bak_host, tensors_cur);
  258. for (int i = 0; i < 10; i++) {
  259. exec_opr(tensors_cur);
  260. copy_tensors_from_device(tensors_cur_host, tensors_cur);
  261. check_tensors(tensors_bak_host, tensors_cur_host);
  262. }
  263. }
  264. if (m_no_naive_and_check) {
  265. m_prev_succ = !::testing::Test::HasFailure();
  266. return;
  267. }
  268. exec_naive(tensors_naive);
  269. if (m_extra_opr_impl) {
  270. m_extra_opr_impl(*tensors_extra_opr_impl);
  271. }
  272. // see if we need performance regression test
  273. if (m_perf_check) {
  274. ASSERT_GT(m_perf_check_threshold, 0) << "perf_check_threshold should be "
  275. "set ahead of time.";
  276. Timer timer_naive, timer_cur;
  277. megdnn_sync(m_handle_naive.get());
  278. timer_naive.start();
  279. exec_naive(tensors_naive);
  280. megdnn_sync(m_handle_naive.get());
  281. timer_naive.stop();
  282. megdnn_sync(m_handle_cur);
  283. timer_cur.start();
  284. exec_opr(tensors_cur);
  285. megdnn_sync(m_handle_cur);
  286. timer_cur.stop();
  287. size_t time_in_us_naive = timer_naive.get_time_in_us(),
  288. time_in_us_cur = timer_cur.get_time_in_us();
  289. EXPECT_GE(time_in_us_naive, static_cast<size_t>(100))
  290. << "Running time smaller than 100us "
  291. << "might be imprecise. naive_time=" << time_in_us_naive << "us.";
  292. float speedup_ratio = static_cast<float>(time_in_us_naive) / time_in_us_cur;
  293. EXPECT_GE(speedup_ratio, m_perf_check_threshold)
  294. << "speedup_ratio=" << speedup_ratio
  295. << " threshold=" << m_perf_check_threshold
  296. << " naive_time=" << time_in_us_naive
  297. << "us cur_time=" << time_in_us_cur << "us";
  298. }
  299. copy_tensors_from_device(tensors_cur_host, tensors_cur);
  300. if (m_output_canonizer) {
  301. m_output_canonizer(tensors_cur_host);
  302. m_output_canonizer(tensors_naive);
  303. }
  304. check_tensors(tensors_naive, tensors_cur_host);
  305. if (m_extra_opr_impl) {
  306. check_tensors(tensors_naive, *tensors_extra_opr_impl);
  307. }
  308. m_prev_succ = !::testing::Test::HasFailure();
  309. }
  310. std::shared_ptr<CheckerHelper::TensorValueArray> CheckerHelper::alloc_tensors(
  311. Handle* handle, const TensorLayoutArray& layouts, const size_t offset) {
  312. auto deleter = [handle, offset](TensorValueArray* ptr) {
  313. for (auto&& i : *ptr) {
  314. auto pdata = static_cast<dt_byte*>(i.raw_ptr) + i.layout.span().low_byte -
  315. offset;
  316. megdnn_free(handle, pdata);
  317. }
  318. delete ptr;
  319. };
  320. std::shared_ptr<TensorValueArray> ret{new TensorValueArray, deleter};
  321. for (size_t i = 0; i < layouts.size(); ++i) {
  322. auto span = layouts[i].span();
  323. ret->emplace_back(
  324. static_cast<dt_byte*>(
  325. megdnn_malloc(handle, span.dist_byte() + offset)) -
  326. span.low_byte + offset,
  327. layouts[i]);
  328. }
  329. return ret;
  330. }
  331. void CheckerHelper::init_naive_values() {
  332. auto&& tensors_naive = *m_tensors_naive;
  333. megdnn_assert(!m_input_tensors_fpath || !m_tensor_constraint);
  334. if (m_input_tensors_fpath) {
  335. auto load = load_tensors(m_input_tensors_fpath);
  336. m_input_tensors_fpath = nullptr;
  337. megdnn_assert(load.size() <= tensors_naive.size());
  338. for (size_t i = 0; i < load.size(); ++i) {
  339. auto&& src = load[i];
  340. auto&& dst = tensors_naive[i];
  341. megdnn_assert(src->layout.eq_layout(dst.layout));
  342. memcpy_noncontig(dst.raw_ptr, src->raw_ptr, dst.layout, memcpy);
  343. }
  344. return;
  345. }
  346. for (size_t i = 0; i < tensors_naive.size(); ++i) {
  347. auto&& tensor = tensors_naive[i];
  348. auto rng = m_rng[i];
  349. if (!rng)
  350. rng = m_default_rng.get();
  351. rng->gen(tensor);
  352. }
  353. if (m_tensor_constraint) {
  354. m_tensor_constraint(tensors_naive);
  355. }
  356. }
  357. void CheckerHelper::copy_tensors_from_device(
  358. const TensorValueArray& dest, const TensorValueArray& src) {
  359. auto impl_d2h = [this](void* dst, const void* src, size_t sz) {
  360. megdnn_memcpy_D2H(m_handle_cur, dst, src, sz);
  361. };
  362. copy_tensors(dest, src, impl_d2h);
  363. }
  364. void CheckerHelper::check_tensors(
  365. const TensorValueArray& expected, const TensorValueArray& computed) {
  366. for (size_t i = 0; i < expected.size(); ++i) {
  367. if (expected[i].layout.ndim == 0 || m_bypass.find(i) != m_bypass.end())
  368. continue;
  369. if (m_allow_invalid_check) {
  370. MEGDNN_ASSERT_TENSOR_EQ_EPS_AVG_ALLOW_INVALID(
  371. expected[i], computed[i], m_epsilon, m_max_avg_error,
  372. m_max_avg_biased_error);
  373. } else {
  374. MEGDNN_ASSERT_TENSOR_EQ_EPS_AVG(
  375. expected[i], computed[i], m_epsilon, m_max_avg_error,
  376. m_max_avg_biased_error);
  377. }
  378. }
  379. }
  380. void CheckerHelper::copy_tensors_to_device(
  381. const TensorValueArray& dest, const TensorValueArray& src) {
  382. auto impl_h2d = [this](void* dst, const void* src, size_t sz) {
  383. megdnn_memcpy_H2D(m_handle_cur, dst, src, sz);
  384. };
  385. copy_tensors(dest, src, impl_h2d);
  386. }
  387. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台