You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

accuracy_shake_checker.h 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. /**
  2. * \file dnn/test/common/accuracy_shake_checker.h
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #pragma once
  12. #include <vector>
  13. #include "megdnn/oprs.h"
  14. #include "src/common/conv_bias.h"
  15. #include "src/common/utils.h"
  16. #include "test/common/checker.h"
  17. #include "test/common/index.h"
  18. namespace megdnn {
  19. namespace test {
  20. namespace {
  21. template <class Opr>
  22. struct BatchTrait {
  23. //! index of batch in tensor, 3 for CHWN4 e.g.
  24. static size_t index_of_batch(const typename Opr::Param&) { return 0; }
  25. //! indices contain batch in inputs and outputs, src(0) dst(2) for conv e.g.
  26. static std::vector<size_t> indices_contain_batch;
  27. static std::vector<size_t> indices_contain_batch_broadcast;
  28. };
  29. template <class Opr>
  30. std::vector<size_t> BatchTrait<Opr>::indices_contain_batch = {};
  31. template <class Opr>
  32. std::vector<size_t> BatchTrait<Opr>::indices_contain_batch_broadcast = {};
  33. #define DEFAULT_INDEX_OF_BATCH(opr) \
  34. static size_t index_of_batch(const opr::Param&) { return 0; }
  35. #define CONV_INDEX_OF_BATCH(opr) \
  36. static size_t index_of_batch(const opr::Param& p) { \
  37. if (p.format == opr::Param::Format::CHWN4) { \
  38. return 3; \
  39. } \
  40. return 0; \
  41. }
  42. #define OPR_WITHOUT_INPUT_BROADCAST(INDEX_OF_BATCH, opr, idxs, idxs_brdcst) \
  43. template <> \
  44. struct BatchTrait<opr> { \
  45. INDEX_OF_BATCH(opr) \
  46. static std::vector<size_t> indices_contain_batch; \
  47. static std::vector<size_t> indices_contain_batch_broadcast; \
  48. }; \
  49. std::vector<size_t> BatchTrait<opr>::indices_contain_batch = idxs; \
  50. std::vector<size_t> BatchTrait<opr>::indices_contain_batch_broadcast = \
  51. idxs_brdcst;
  52. OPR_WITHOUT_INPUT_BROADCAST(DEFAULT_INDEX_OF_BATCH,
  53. megdnn::Convolution3DForward,
  54. (std::initializer_list<size_t>{0, 2}), {})
  55. OPR_WITHOUT_INPUT_BROADCAST(DEFAULT_INDEX_OF_BATCH,
  56. megdnn::Convolution3DBackwardData,
  57. (std::initializer_list<size_t>{1, 2}), {})
  58. OPR_WITHOUT_INPUT_BROADCAST(DEFAULT_INDEX_OF_BATCH,
  59. megdnn::Convolution3DBackwardFilter,
  60. (std::initializer_list<size_t>{0, 1}), {})
  61. OPR_WITHOUT_INPUT_BROADCAST(DEFAULT_INDEX_OF_BATCH, megdnn::BatchedMatrixMul,
  62. (std::initializer_list<size_t>{0, 1, 2}), {})
  63. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::ConvolutionForward,
  64. (std::initializer_list<size_t>{0, 2}), {})
  65. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH,
  66. megdnn::ConvolutionBackwardData,
  67. (std::initializer_list<size_t>{1, 2}), {})
  68. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH,
  69. megdnn::ConvolutionBackwardFilter,
  70. (std::initializer_list<size_t>{0, 1}), {})
  71. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::LocalShareForward,
  72. (std::initializer_list<size_t>{0, 2}), {})
  73. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::LocalShareBackwardData,
  74. (std::initializer_list<size_t>{1, 2}), {})
  75. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH,
  76. megdnn::LocalShareBackwardFilter,
  77. (std::initializer_list<size_t>{0, 1}), {})
  78. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::DeformableConvForward,
  79. (std::initializer_list<size_t>{0, 2, 3, 4}), {})
  80. OPR_WITHOUT_INPUT_BROADCAST(
  81. CONV_INDEX_OF_BATCH, megdnn::DeformableConvBackwardData,
  82. (std::initializer_list<size_t>{0, 2, 3, 4, 5, 6, 7}), {})
  83. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH,
  84. megdnn::DeformableConvBackwardFilter,
  85. (std::initializer_list<size_t>{0, 1, 2, 3}), {})
  86. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::BatchConvBiasForward,
  87. (std::initializer_list<size_t>{0, 1, 2, 3, 4}), {})
  88. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::ConvBiasForward,
  89. (std::initializer_list<size_t>{0, 3, 4}), {2})
  90. #undef OPR_WITHOUT_INPUT_BROADCAST
  91. #undef DEFAULT_INDEX_OF_BATCH
  92. #undef CONV_INDEX_OF_BATCH
  93. template <class Opr>
  94. struct LayoutsModifier {
  95. static void on(TensorLayoutArray& layouts, const typename Opr::Param& p,
  96. size_t new_batch_size) {
  97. size_t batch_index = BatchTrait<Opr>::index_of_batch(p);
  98. for (size_t index : BatchTrait<Opr>::indices_contain_batch) {
  99. layouts.at(index)[batch_index] = new_batch_size;
  100. }
  101. for (size_t index : BatchTrait<Opr>::indices_contain_batch_broadcast) {
  102. if (!check_bias_share_in_channel(layouts.at(index), p.format)) {
  103. layouts.at(index)[batch_index] = new_batch_size;
  104. }
  105. }
  106. }
  107. };
  108. #define OPR_NO_BIAS(opr) \
  109. template <> \
  110. struct LayoutsModifier<opr> { \
  111. static void on(TensorLayoutArray& layouts, \
  112. const typename opr::Param& p, size_t new_batch_size) { \
  113. size_t batch_index = BatchTrait<opr>::index_of_batch(p); \
  114. for (size_t index : BatchTrait<opr>::indices_contain_batch) { \
  115. layouts.at(index)[batch_index] = new_batch_size; \
  116. } \
  117. } \
  118. };
  119. OPR_NO_BIAS(megdnn::Convolution3D)
  120. OPR_NO_BIAS(megdnn::BatchedMatrixMul)
  121. #undef OPR_NO_BIAS
  122. template <>
  123. struct LayoutsModifier<megdnn::MatrixMul> {
  124. public:
  125. static void on(TensorLayoutArray& layouts,
  126. const megdnn::MatrixMul::Param& p,
  127. size_t new_batch_size) {
  128. assert(!p.transposeA && !p.transposeB);
  129. MEGDNN_MARK_USED_VAR(p);
  130. layouts.at(0)[0] = new_batch_size;
  131. layouts.at(2)[0] = new_batch_size;
  132. }
  133. };
  134. template <class Opr, typename OprAlgoProxy = OprAlgoProxy<Opr>>
  135. class AlgoGenerator {
  136. public:
  137. AlgoGenerator(ExecutionPolicyAlgoName name)
  138. : m_policy_name{name} {}
  139. std::vector<Algorithm::Info::Desc> operator()(
  140. Opr* opr, const CheckerHelper::TensorValueArray& arr) {
  141. TensorLayoutArray layouts;
  142. for (auto&& val : arr) {
  143. layouts.push_back(val.layout);
  144. }
  145. std::vector<Algorithm::Info::Desc> ret;
  146. megdnn_assert(layouts.size() == OprTrait<Opr>::arity);
  147. for (auto algo_info :
  148. AlgoProxy<Opr, OprTrait<Opr>::arity>::get_all_algorithms_info(
  149. opr, layouts)) {
  150. if (!(algo_info.attribute &
  151. AlgoAttribute::ACCURACY_DEPEND_ON_BATCH) &&
  152. (algo_info.attribute & AlgoAttribute::REPRODUCIBLE) &&
  153. std::regex_match(
  154. algo_info.desc.name,
  155. std::regex("(.*)(" + m_policy_name.name + ")(.*)"))) {
  156. ret.push_back(algo_info.desc);
  157. } else {
  158. continue;
  159. }
  160. }
  161. return ret;
  162. }
  163. private:
  164. ExecutionPolicyAlgoName m_policy_name;
  165. };
  166. } // namespace
  167. ::testing::AssertionResult __assert_tensor_binary_eq(
  168. const char* expr0, const char* expr1, const char* expr2,
  169. const TensorND& v0, const TensorND& v1,
  170. const Algorithm::Info::Desc& algo);
  171. template <typename Opr, typename Proxy = OprProxy<Opr>>
  172. class AccuracyShakeChecker : public CheckerHelper {
  173. public:
  174. static constexpr int arity_in = OprArityTrait<Opr>::arity_in;
  175. using Param = typename Opr::Param;
  176. using BeforeExecCallback = std::function<std::vector<Algorithm::Info::Desc>(
  177. Opr*, const TensorValueArray&)>;
  178. AccuracyShakeChecker(Handle* handle, bool check_dispatch = false)
  179. : CheckerHelper(handle, check_dispatch),
  180. m_before_exec_callback{AlgoGenerator<Opr>("")},
  181. m_param(Param()) {}
  182. TensorLayoutArray make_layouts(const TensorShapeArray& shapes) {
  183. TensorLayoutArray layouts(shapes.size());
  184. for (size_t i = 0; i < shapes.size(); ++i) {
  185. DType dt = (m_dtype.find(i) != m_dtype.end() ? m_dtype[i]
  186. : dtype::Float32());
  187. TensorFormat fmt =
  188. (m_fmt.find(i) != m_fmt.end() ? m_fmt[i] : TensorFormat{});
  189. layouts[i] = TensorLayout(shapes[i], dt, fmt);
  190. }
  191. return layouts;
  192. }
  193. /*!
  194. * \brief execute opr on current param/dtype/rng config
  195. * \param shapes input/output shapes, which would be passed as
  196. * arguments to Opr::deduce_layout
  197. *
  198. * Checker would construct TensorLayout vectors from shapes and dtypes,
  199. * and call exec(TensorLayoutArray &).
  200. */
  201. AccuracyShakeChecker& exec(const TensorShapeArray& shapes) {
  202. exec(make_layouts(shapes));
  203. return *this;
  204. }
  205. void exec(TensorLayoutArray layouts);
  206. AccuracyShakeChecker& set_param(Param p) {
  207. m_param = p;
  208. opr()->param() = p;
  209. return *this;
  210. }
  211. AccuracyShakeChecker& set_dtype(size_t idx, DType dtype) {
  212. m_dtype[idx] = dtype;
  213. return *this;
  214. }
  215. AccuracyShakeChecker& set_rng(size_t idx, RNG* rng) {
  216. m_rng[idx] = rng;
  217. return *this;
  218. }
  219. //! set a callback to be invoked before executing the operator
  220. AccuracyShakeChecker& set_before_exec_callback(
  221. const BeforeExecCallback& cb) {
  222. m_before_exec_callback = cb;
  223. return *this;
  224. }
  225. AccuracyShakeChecker& reset_before_exec_callback() {
  226. m_before_exec_callback = nullptr;
  227. return *this;
  228. }
  229. //! get the opr impl so setting other than param() can be modified
  230. Opr* opr() {
  231. if (!m_opr_cur) {
  232. m_opr_cur = m_handle_cur->create_operator<Opr>();
  233. }
  234. return m_opr_cur.get();
  235. }
  236. private:
  237. BeforeExecCallback m_before_exec_callback;
  238. Param m_param;
  239. Proxy m_proxy;
  240. std::unique_ptr<Opr> m_opr_cur;
  241. std::shared_ptr<TensorValueArray> m_tensors_cur_host,
  242. m_tensors_single_batch_host;
  243. void init_host_values();
  244. void check_tensors_ignore_batch(
  245. const TensorValueArray& tensors_single_batch,
  246. const TensorValueArray& tensors, const Algorithm::Info::Desc& desc);
  247. };
  248. template <typename Opr, typename Proxy>
  249. void AccuracyShakeChecker<Opr, Proxy>::exec(TensorLayoutArray layouts) {
  250. auto opr_cur = this->opr();
  251. opr_cur->param() = m_param;
  252. m_proxy.deduce_layout(opr_cur, layouts);
  253. TensorLayoutArray layouts_single_batch = layouts;
  254. for (size_t i=0; i<layouts_single_batch.size(); ++i) {
  255. ASSERT_TRUE(layouts[i].is_physical_contiguous())
  256. << "layouts should be physical contiguous "
  257. << layouts[i].to_string();
  258. }
  259. ASSERT_TRUE(0 == BatchTrait<Opr>::index_of_batch(opr_cur->param()))
  260. << "index of batch should be 0 ";
  261. LayoutsModifier<Opr>::on(layouts_single_batch, opr_cur->param(), 1);
  262. // allocate input
  263. auto tensors_single_batch_storage =
  264. alloc_tensors(m_handle_cur, layouts_single_batch, 0);
  265. m_tensors_single_batch_host =
  266. alloc_tensors(m_handle_naive.get(), layouts_single_batch, 0);
  267. auto tensors_cur_storage = alloc_tensors(m_handle_cur, layouts, 0);
  268. m_tensors_cur_host =
  269. alloc_tensors(m_handle_naive.get(), layouts, 0);
  270. auto &&tensors_single_batch = *tensors_single_batch_storage;
  271. auto &&tensors_single_batch_host = *m_tensors_single_batch_host;
  272. auto &&tensors_cur = *tensors_cur_storage;
  273. auto &&tensors_cur_host = *m_tensors_cur_host;
  274. // allocate output
  275. auto tensors_single_batch_storage_out =
  276. alloc_tensors(m_handle_naive.get(), layouts_single_batch, 0);
  277. auto tensors_cur_storage_out =
  278. alloc_tensors(m_handle_naive.get(), layouts, 0);
  279. auto &&tensors_single_batch_out = *tensors_single_batch_storage_out;
  280. auto &&tensors_cur_out = *tensors_cur_storage_out;
  281. init_host_values();
  282. copy_tensors_to_device(tensors_cur, tensors_cur_host);
  283. copy_tensors_to_device(tensors_single_batch, tensors_single_batch_host);
  284. std::vector<Algorithm::Info::Desc> algo_desc;
  285. if (m_before_exec_callback) {
  286. algo_desc = m_before_exec_callback(opr_cur, tensors_cur);
  287. } else {
  288. algo_desc.push_back({});
  289. }
  290. for (size_t i = 0; i < algo_desc.size(); ++i) {
  291. opr_cur->execution_policy().algo = algo_desc[i];
  292. m_proxy.exec(opr_cur, tensors_cur);
  293. m_proxy.exec(opr_cur, tensors_single_batch);
  294. copy_tensors_from_device(tensors_cur_out, tensors_cur);
  295. copy_tensors_from_device(tensors_single_batch_out,
  296. tensors_single_batch);
  297. check_tensors_ignore_batch(tensors_single_batch_out, tensors_cur_out,
  298. algo_desc[i]);
  299. }
  300. }
  301. template <typename Opr, typename Proxy>
  302. void AccuracyShakeChecker<Opr, Proxy>::init_host_values() {
  303. size_t index_of_batch = 0;
  304. auto &&tensors_single_batch = *m_tensors_single_batch_host;
  305. auto &&tensors_cur = *m_tensors_cur_host;
  306. for (size_t i = 0; i < arity_in; ++i) {
  307. auto &&tensor_single_batch = tensors_single_batch[i];
  308. auto &&tensor_cur = tensors_cur[i];
  309. auto rng = m_rng[i];
  310. if (!rng)
  311. rng = m_default_rng.get();
  312. rng->gen(tensor_single_batch);
  313. dt_byte* raw_storage_cur = static_cast<dt_byte*>(tensor_cur.raw_ptr) +
  314. tensor_cur.layout.span().low_byte;
  315. dt_byte* raw_storage_single_batch =
  316. static_cast<dt_byte*>(tensor_single_batch.raw_ptr) +
  317. tensor_single_batch.layout.span().low_byte;
  318. const size_t step = tensor_single_batch.layout.span().dist_byte();
  319. if (tensor_cur.layout.eq_shape(tensor_single_batch.layout)) {
  320. memcpy(raw_storage_cur, raw_storage_single_batch, step);
  321. } else {
  322. ASSERT_TRUE(1 == tensor_single_batch.layout[index_of_batch])
  323. << "bad batch size "
  324. << tensor_single_batch.layout[index_of_batch];
  325. for (size_t b=0; b<tensor_cur.layout[index_of_batch]; ++b) {
  326. memcpy(raw_storage_cur, raw_storage_single_batch, step);
  327. raw_storage_cur += step;
  328. }
  329. }
  330. }
  331. }
  332. template <typename Opr, typename Proxy>
  333. void AccuracyShakeChecker<Opr, Proxy>::check_tensors_ignore_batch(
  334. const TensorValueArray& tensors_single_batch,
  335. const TensorValueArray& tensors, const Algorithm::Info::Desc& algo) {
  336. for (size_t i = 0; i < tensors_single_batch.size(); ++i) {
  337. if (tensors_single_batch[i].layout.ndim == 0 ||
  338. tensors_single_batch[i].layout.eq_shape(tensors[i].layout))
  339. continue;
  340. ASSERT_PRED_FORMAT3(::megdnn::test::__assert_tensor_binary_eq,
  341. tensors_single_batch[i], tensors[i], algo);
  342. }
  343. }
  344. } // namespace test
  345. } // namespace megdnn
  346. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台