You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

checker.h 17 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. /**
  2. * \file dnn/test/common/checker.h
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #pragma once
  12. #include "megdnn/basic_types.h"
  13. #include "megdnn/tensor_iter.h"
  14. #include "test/common/opr_algo_proxy.h"
  15. #include "test/common/opr_proxy.h"
  16. #include "test/common/rng.h"
  17. #include <gtest/gtest.h>
  18. #include <memory>
  19. #include <regex>
  20. #include <unordered_map>
  21. // clang-format off
  22. #if defined(__has_feature)
  23. #if __has_feature(address_sanitizer)
  24. #define MEGDNN_TEST_ASAN 1
  25. #else
  26. #define MEGDNN_TEST_ASAN 0
  27. #endif
  28. #elif defined(__SANITIZE_ADDRESS__)
  29. #define MEGDNN_TEST_ASAN 1
  30. #else
  31. #define MEGDNN_TEST_ASAN 0
  32. #endif
  33. // clang-format on
  34. namespace megdnn {
  35. namespace test {
  36. class CheckerHelper {
  37. // TensorLayoutArray and TensorValueArray should be protected in theory;
  38. // but g++-4.9 bugs handle access privilege wrongfully, so we change it
  39. // to public.
  40. public:
  41. using TensorValueArray = TensorNDArray;
  42. using TensorsConstriant = std::function<void(TensorValueArray& tensors)>;
  43. using ExtraOprImpl = std::function<void(const TensorNDArray&)>;
  44. using OutputCanonizer = std::function<void(const TensorValueArray&)>;
  45. static std::shared_ptr<TensorValueArray> alloc_tensors(
  46. Handle* handle, const TensorLayoutArray& layouts, size_t offset);
  47. Handle* handle() const { return m_handle_cur; }
  48. protected:
  49. //! whether to use physically contiguous (i.e. default layout) for naive
  50. //! impl
  51. bool m_enable_contig_naive = false;
  52. bool m_prev_succ = true;
  53. const char* m_input_tensors_fpath = nullptr;
  54. thin_function<void()> m_expect_exec_fail;
  55. std::unique_ptr<Handle> m_handle_naive;
  56. Handle* m_handle_cur;
  57. std::unique_ptr<RNG> m_default_rng;
  58. std::unordered_map<size_t, RNG*> m_rng;
  59. std::unordered_map<size_t, DType> m_dtype;
  60. std::unordered_map<size_t, TensorFormat> m_fmt;
  61. float_t m_epsilon = 1e-3, m_max_avg_error = 1e-3,
  62. m_max_avg_biased_error = 1e-3;
  63. float_t m_perf_check_threshold = -1;
  64. bool m_perf_check = false;
  65. ExtraOprImpl m_extra_opr_impl;
  66. OutputCanonizer m_output_canonizer;
  67. TensorsConstriant m_tensor_constraint;
  68. /**
  69. * the offset from the start of malloc memory
  70. *
  71. * \note alloc \p m_offset more memory when alloc memory for a tensor,
  72. * the start of tensor just begin at \p m_offset.
  73. * \warning current only used for opencl
  74. */
  75. size_t m_offset = 0;
  76. CheckerHelper(Handle* handle, bool check_dispatch = true);
  77. ~CheckerHelper() noexcept;
  78. using OprExec = std::function<void(const TensorValueArray&)>;
  79. void do_exec_with_testcases(const TensorValueArray& testcase_in,
  80. const TensorValueArray& testcase_out,
  81. const OprExec& exec_opr);
  82. void do_exec(const TensorLayoutArray& user_layouts,
  83. const TensorLayoutArray& deduced_layouts,
  84. const OprExec& exec_naive, const OprExec& exec_opr);
  85. void enable_contig_naive() { m_enable_contig_naive = true; }
  86. private:
  87. std::shared_ptr<TensorValueArray> m_tensors_naive;
  88. void init_naive_values();
  89. void copy_tensors_to_device(const TensorValueArray& dest,
  90. const TensorValueArray& src);
  91. void copy_tensors_from_device(const TensorValueArray& dest,
  92. const TensorValueArray& src);
  93. void check_tensors(const TensorValueArray& expected,
  94. const TensorValueArray& computed);
  95. };
  96. template <typename Opr, typename Proxy = OprProxy<Opr>>
  97. class Checker : public CheckerHelper {
  98. public:
  99. using Param = typename Opr::Param;
  100. using BeforeExecCallback =
  101. std::function<void(Opr*, const TensorValueArray&)>;
  102. Checker(Handle* handle, bool check_dispatch = true)
  103. : CheckerHelper(handle, check_dispatch), m_param(Param()) {}
  104. TensorLayoutArray make_layouts(const TensorShapeArray& shapes) {
  105. TensorLayoutArray layouts(shapes.size());
  106. for (size_t i = 0; i < shapes.size(); ++i) {
  107. DType dt = (m_dtype.find(i) != m_dtype.end() ? m_dtype[i]
  108. : dtype::Float32());
  109. TensorFormat fmt =
  110. (m_fmt.find(i) != m_fmt.end() ? m_fmt[i] : TensorFormat{});
  111. layouts[i] = TensorLayout(shapes[i], dt, fmt);
  112. }
  113. return layouts;
  114. }
  115. /*!
  116. * \brief execute opr on current param/dtype/rng config
  117. * \param shapes input/output shapes, which would be passed as
  118. * arguments to Opr::deduce_layout
  119. *
  120. * Checker would construct TensorLayout vectors from shapes and dtypes,
  121. * and call exec(TensorLayoutArray &).
  122. */
  123. Checker& exec(const TensorShapeArray& shapes) {
  124. exec(make_layouts(shapes));
  125. return *this;
  126. }
  127. void exec(TensorLayoutArray layouts);
  128. //! explicitly require argument to be TensorShape
  129. Checker& execs(const TensorShapeArray& shapes) { return exec(shapes); }
  130. //! explicitly require argument to be TensorLayout
  131. Checker& execl(const TensorLayoutArray& layouts) {
  132. exec(layouts);
  133. return *this;
  134. }
  135. Checker& exect(const TensorValueArray& testcase_in,
  136. const TensorValueArray& testcase_out);
  137. Checker& set_param(Param param) {
  138. m_param = param;
  139. opr()->param() = param;
  140. return *this;
  141. }
  142. Checker& set_dtype(size_t idx, DType dtype) {
  143. m_dtype[idx] = dtype;
  144. return *this;
  145. }
  146. Checker& set_fmt(size_t idx, TensorFormat fmt) {
  147. m_fmt[idx] = fmt;
  148. return *this;
  149. }
  150. Checker& set_rng(size_t idx, RNG* rng) {
  151. m_rng[idx] = rng;
  152. return *this;
  153. }
  154. //! max error of a single element
  155. Checker& set_epsilon(dt_float32 epsilon) {
  156. m_epsilon = epsilon;
  157. m_max_avg_error = epsilon;
  158. m_max_avg_biased_error = epsilon;
  159. return *this;
  160. }
  161. //! max average error; defaults to epsilon
  162. Checker& set_max_avg_error(dt_float32 error) {
  163. m_max_avg_error = error;
  164. return *this;
  165. }
  166. //! max average biased error; defaults to epsilon
  167. Checker& set_max_avg_biased_error(dt_float32 error) {
  168. m_max_avg_biased_error = error;
  169. return *this;
  170. }
  171. Checker& set_offset(size_t offset) {
  172. m_offset = offset;
  173. return *this;
  174. }
  175. Checker& set_proxy(const Proxy& proxy) {
  176. m_naive_proxy = proxy;
  177. m_cur_proxy = proxy;
  178. return *this;
  179. }
  180. //! set_perf_check and set_perf_check_threshold control the
  181. //! performance checking behavior.
  182. //!
  183. //! If perf_check is on (default to off), the running time of the
  184. //! current operator and the naive operator would be measured and
  185. //! checked when calling exec.
  186. //! The accelerating ratio should be larger than perf_check_threshold,
  187. //! otherwise errors would be reported.
  188. //! perf_check_threshold must be set in advance since the default value
  189. //! (which is negative) is invalid.
  190. Checker& set_perf_check(bool perf_check) {
  191. m_perf_check = perf_check;
  192. return *this;
  193. }
  194. Checker& set_perf_check_threshold(float perf_check_threshold) {
  195. m_perf_check_threshold = perf_check_threshold;
  196. return *this;
  197. }
  198. //! load input tensors from file for next run
  199. Checker& load_input_tensors(const char* fpath) {
  200. m_input_tensors_fpath = fpath;
  201. return *this;
  202. }
  203. //! add another checker to ensure naive implementation is correct
  204. Checker& set_extra_opr_impl(const ExtraOprImpl& chk) {
  205. m_extra_opr_impl = chk;
  206. return *this;
  207. }
  208. //! set a callback to be invoked before executing the operator
  209. Checker& set_before_exec_callback(const BeforeExecCallback& cb) {
  210. m_before_exec_callback = cb;
  211. return *this;
  212. }
  213. //! set a tensors constraints function, for the purpose of manipulating
  214. //! tensors when testing.
  215. Checker& set_tensors_constraint(
  216. const TensorsConstriant& tensor_constraint) {
  217. m_tensor_constraint = tensor_constraint;
  218. return *this;
  219. }
  220. /*!
  221. * \brief set that exec() on opr should fail, so naive is not called and
  222. * exec() returns directly after opr is called.
  223. *
  224. * This is only valid for next exec() call. It is usually used for
  225. * testing megcore::AsyncErrorInfo.
  226. *
  227. * \param cb callback to be invoked after opr exec (so error would not
  228. * be passed to destructor)
  229. */
  230. Checker& set_expect_exec_fail(const thin_function<void()>& cb) {
  231. m_expect_exec_fail = cb;
  232. return *this;
  233. }
  234. /*!
  235. * \brief set a function to canonize the outputs
  236. *
  237. * For some oprs maybe multiple outputs can be accepted; we can use a
  238. * function to transform them into a canonized form before comparing.
  239. *
  240. * The arguments are tensors on CPU and should be modified in-place.
  241. */
  242. Checker& set_output_canonizer(OutputCanonizer canonizer) {
  243. m_output_canonizer = std::move(canonizer);
  244. return *this;
  245. }
  246. //! get the opr impl so setting other than param() can be modified
  247. Opr* opr() {
  248. if (!m_opr_cur) {
  249. m_opr_cur = m_handle_cur->create_operator<Opr>();
  250. }
  251. return m_opr_cur.get();
  252. }
  253. //! whether previous exec succeeds
  254. bool prev_succ() const { return m_prev_succ; }
  255. private:
  256. BeforeExecCallback m_before_exec_callback;
  257. Param m_param;
  258. Proxy m_naive_proxy, m_cur_proxy;
  259. std::unique_ptr<Opr> m_opr_cur;
  260. };
  261. ::testing::AssertionResult __assert_tensor_eq(
  262. const char* expr0, const char* expr1, const char* expr_maxerr,
  263. const char* expr_maxerr_avg, const char* expr_maxerr_avg_biased,
  264. const TensorND& v0, const TensorND& v1, float maxerr, float maxerr_avg,
  265. float maxerr_avg_biased);
  266. #define MEGDNN_ASSERT_TENSOR_EQ_EPS_AVG(v0, v1, maxerr, maxerr_avg, \
  267. maxerr_avg_biased) \
  268. ASSERT_PRED_FORMAT5(::megdnn::test::__assert_tensor_eq, v0, v1, maxerr, \
  269. maxerr_avg, maxerr_avg_biased)
  270. #define MEGDNN_ASSERT_TENSOR_EQ_EPS(v0, v1, maxerr) \
  271. MEGDNN_ASSERT_TENSOR_EQ_EPS_AVG(v0, v1, maxerr, maxerr, maxerr)
  272. #define MEGDNN_ASSERT_TENSOR_EQ(v0, v1) \
  273. MEGDNN_ASSERT_TENSOR_EQ_EPS(v0, v1, 1e-3)
  274. template <typename Opr, typename Proxy>
  275. void Checker<Opr, Proxy>::exec(TensorLayoutArray layouts) {
  276. auto opr_naive = m_handle_naive->create_operator<Opr>();
  277. auto opr_relayout = m_handle_naive->create_operator<RelayoutForward>();
  278. auto opr_cur = this->opr();
  279. opr_naive->param() = m_param;
  280. opr_cur->param() = m_param;
  281. m_naive_proxy.deduce_layout(opr_naive.get(), layouts);
  282. auto exec_naive = [this, &opr_naive, &layouts,
  283. &opr_relayout](const TensorValueArray& values) {
  284. TensorValueArray contig_values = values;
  285. TensorValueArray real_values = values;
  286. std::shared_ptr<TensorValueArray> tensors_naive_contig_storage;
  287. if (m_enable_contig_naive) {
  288. TensorLayoutArray contig_layouts;
  289. for (auto&& layout : layouts) {
  290. contig_layouts.emplace_back(TensorLayout{
  291. static_cast<const TensorShape&>(layout), layout.dtype});
  292. }
  293. m_naive_proxy.deduce_layout(opr_naive.get(), contig_layouts);
  294. tensors_naive_contig_storage = alloc_tensors(
  295. m_handle_naive.get(), contig_layouts, m_offset);
  296. contig_values = *tensors_naive_contig_storage;
  297. //! relayout value to the contig_values
  298. for (size_t i = 0; i < contig_values.size(); ++i) {
  299. if (real_values[i].layout.ndim == 0)
  300. continue;
  301. real_values[i].layout.format = {};
  302. opr_relayout->exec(real_values[i], contig_values[i],
  303. m_handle_naive.get());
  304. }
  305. }
  306. m_naive_proxy.exec(opr_naive.get(), contig_values);
  307. if (m_enable_contig_naive) {
  308. //! relayout to the values
  309. for (size_t i = 0; i < contig_values.size(); ++i) {
  310. if (real_values[i].layout.ndim == 0)
  311. continue;
  312. opr_relayout->exec(contig_values[i], real_values[i],
  313. m_handle_naive.get());
  314. }
  315. }
  316. };
  317. auto exec_opr = [this, opr_cur](const TensorValueArray& values) {
  318. if (m_before_exec_callback) {
  319. m_before_exec_callback(opr_cur, values);
  320. }
  321. m_cur_proxy.exec(opr_cur, values);
  322. };
  323. auto user_layouts = layouts;
  324. do_exec(user_layouts, layouts, exec_naive, exec_opr);
  325. }
  326. template <typename Opr, typename Proxy>
  327. Checker<Opr, Proxy>& Checker<Opr, Proxy>::exect(
  328. const TensorValueArray& testcase_in,
  329. const TensorValueArray& testcase_out) {
  330. auto opr_cur = this->opr();
  331. opr_cur->param() = m_param;
  332. auto exec_opr = [this, opr_cur](const TensorValueArray& values) {
  333. if (m_before_exec_callback) {
  334. m_before_exec_callback(opr_cur, values);
  335. }
  336. m_cur_proxy.exec(opr_cur, values);
  337. };
  338. do_exec_with_testcases(testcase_in, testcase_out, exec_opr);
  339. return *this;
  340. }
  341. template <typename T, typename U>
  342. TensorND TensorValue(const TensorShape& shape, T dtype,
  343. std::initializer_list<U> values) {
  344. TensorND tensor;
  345. tensor.layout = {shape, dtype};
  346. tensor.raw_ptr =
  347. static_cast<dt_byte*>(malloc(tensor.layout.span().dist_byte()));
  348. megdnn_assert(values.size() == tensor.layout.total_nr_elems(), "%zu == %zu",
  349. values.size(), tensor.layout.total_nr_elems());
  350. auto ptr = tensor.ptr<typename DTypeTrait<T>::ctype>();
  351. for (const auto& v : values) {
  352. *ptr++ = typename DTypeTrait<T>::ctype(v);
  353. }
  354. return tensor;
  355. }
  356. template <typename T, typename U>
  357. TensorND TensorValueLowbit4(const TensorShape& shape, T dtype,
  358. std::vector<U> values) {
  359. TensorND tensor;
  360. tensor.layout = {shape, dtype};
  361. tensor.raw_ptr =
  362. static_cast<dt_byte*>(malloc(tensor.layout.span().dist_byte()));
  363. megdnn_assert(values.size() == tensor.layout.total_nr_elems());
  364. auto ptr = static_cast<U*>(tensor.raw_ptr);
  365. for (size_t i = 0; i < values.size(); i += 2) {
  366. U val0 = values[i], val1 = values[i + 1];
  367. megdnn_assert(val0 >= DTypeTrait<T>::min());
  368. megdnn_assert(val1 <= DTypeTrait<T>::max());
  369. ptr[i / 2] = (val0 & 0xF) | (val1 << 4);
  370. }
  371. return tensor;
  372. }
  373. class Testcase : public SmallVector<TensorND> {
  374. public:
  375. using SmallVector<TensorND>::SmallVector;
  376. ~Testcase() {
  377. // Suicide
  378. for (const auto& tensor : *this) {
  379. if (tensor.raw_ptr) {
  380. free(tensor.raw_ptr);
  381. }
  382. }
  383. }
  384. Testcase(const Testcase&) = delete;
  385. Testcase operator=(const Testcase&) = delete;
  386. };
  387. /*!
  388. * \brief a callable to check that given algorithm is used for heuristic
  389. * \param require_algo if its value is true, then requires
  390. * get_algorithm_heuristic() to return the expected algo; otherwise the
  391. * expected algo must exist in get_all_algorithms() and it would be set to
  392. * be used
  393. */
  394. template <class Opr, typename OprAlgoProxy = OprAlgoProxy<Opr>>
  395. class AlgoChecker {
  396. std::string m_name;
  397. typename Opr::Algorithm* m_algo = nullptr;
  398. bool* m_require_algo;
  399. public:
  400. AlgoChecker(const char* name, bool* require_algo = nullptr)
  401. : m_name{name}, m_require_algo{require_algo} {}
  402. AlgoChecker(typename Opr::Algorithm* algo, bool* require_algo = nullptr)
  403. : m_algo{algo}, m_require_algo{require_algo} {}
  404. void operator()(Opr* opr, const CheckerHelper::TensorValueArray& arr) {
  405. TensorLayoutArray layouts;
  406. for (auto&& val : arr) {
  407. layouts.push_back(val.layout);
  408. }
  409. if (m_require_algo && *m_require_algo) {
  410. auto algo =
  411. OprAlgoProxy::get_algorithm_info_heuristic(opr, layouts);
  412. if (m_name.empty()) {
  413. ASSERT_EQ(m_algo->name(), algo.name.c_str());
  414. } else {
  415. ASSERT_TRUE(std::regex_match(
  416. algo.name.c_str(), std::regex("(" + m_name + ")(.*)")));
  417. }
  418. } else {
  419. if (m_name.empty()) {
  420. opr->execution_policy().algo = m_algo->info();
  421. return;
  422. } else {
  423. for (auto i :
  424. OprAlgoProxy::get_all_algorithms_info(opr, layouts)) {
  425. if (std::regex_match(i.name,
  426. std::regex("(" + m_name + ")(.*)"))) {
  427. opr->execution_policy().algo = i;
  428. return;
  429. }
  430. }
  431. }
  432. ASSERT_TRUE(false) << "algorithm " << m_name << " not found";
  433. }
  434. }
  435. };
  436. } // namespace test
  437. } // namespace megdnn
  438. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台