You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

accuracy_shake_checker.h 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. /**
  2. * \file dnn/test/common/accuracy_shake_checker.h
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #pragma once
  12. #include <vector>
  13. #include "megdnn/oprs.h"
  14. #include "src/common/conv_bias.h"
  15. #include "src/common/utils.h"
  16. #include "test/common/checker.h"
  17. #include "test/common/index.h"
  18. namespace megdnn {
  19. namespace test {
  20. namespace {
  21. template <class Opr>
  22. struct BatchTrait {
  23. //! index of batch in tensor, 3 for CHWN4 e.g.
  24. static size_t index_of_batch(const typename Opr::Param&) { return 0; }
  25. //! indices contain batch in inputs and outputs, src(0) dst(2) for conv e.g.
  26. static std::vector<size_t> indices_contain_batch;
  27. static std::vector<size_t> indices_contain_batch_broadcast;
  28. };
  29. template <class Opr>
  30. std::vector<size_t> BatchTrait<Opr>::indices_contain_batch = {};
  31. template <class Opr>
  32. std::vector<size_t> BatchTrait<Opr>::indices_contain_batch_broadcast = {};
  33. #define DEFAULT_INDEX_OF_BATCH(opr) \
  34. static size_t index_of_batch(const opr::Param&) { return 0; }
  35. #define CONV_INDEX_OF_BATCH(opr) \
  36. static size_t index_of_batch(const opr::Param& p) { \
  37. if (p.format == opr::Param::Format::CHWN4) { \
  38. return 3; \
  39. } \
  40. return 0; \
  41. }
  42. #define OPR_WITHOUT_INPUT_BROADCAST(INDEX_OF_BATCH, opr, idxs, idxs_brdcst) \
  43. template <> \
  44. struct BatchTrait<opr> { \
  45. INDEX_OF_BATCH(opr) \
  46. static std::vector<size_t> indices_contain_batch; \
  47. static std::vector<size_t> indices_contain_batch_broadcast; \
  48. }; \
  49. std::vector<size_t> BatchTrait<opr>::indices_contain_batch = idxs; \
  50. std::vector<size_t> BatchTrait<opr>::indices_contain_batch_broadcast = \
  51. idxs_brdcst;
  52. OPR_WITHOUT_INPUT_BROADCAST(DEFAULT_INDEX_OF_BATCH,
  53. megdnn::Convolution3DForward,
  54. (std::initializer_list<size_t>{0, 2}), {})
  55. OPR_WITHOUT_INPUT_BROADCAST(DEFAULT_INDEX_OF_BATCH,
  56. megdnn::Convolution3DBackwardData,
  57. (std::initializer_list<size_t>{1, 2}), {})
  58. OPR_WITHOUT_INPUT_BROADCAST(DEFAULT_INDEX_OF_BATCH,
  59. megdnn::Convolution3DBackwardFilter,
  60. (std::initializer_list<size_t>{0, 1}), {})
  61. OPR_WITHOUT_INPUT_BROADCAST(DEFAULT_INDEX_OF_BATCH, megdnn::BatchedMatrixMul,
  62. (std::initializer_list<size_t>{0, 1, 2}), {})
  63. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::ConvolutionForward,
  64. (std::initializer_list<size_t>{0, 2}), {})
  65. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH,
  66. megdnn::ConvolutionBackwardData,
  67. (std::initializer_list<size_t>{1, 2}), {})
  68. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH,
  69. megdnn::ConvolutionBackwardFilter,
  70. (std::initializer_list<size_t>{0, 1}), {})
  71. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::LocalShareForward,
  72. (std::initializer_list<size_t>{0, 2}), {})
  73. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::LocalShareBackwardData,
  74. (std::initializer_list<size_t>{1, 2}), {})
  75. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH,
  76. megdnn::LocalShareBackwardFilter,
  77. (std::initializer_list<size_t>{0, 1}), {})
  78. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::DeformableConvForward,
  79. (std::initializer_list<size_t>{0, 2, 3, 4}), {})
  80. OPR_WITHOUT_INPUT_BROADCAST(
  81. CONV_INDEX_OF_BATCH, megdnn::DeformableConvBackwardData,
  82. (std::initializer_list<size_t>{0, 2, 3, 4, 5, 6, 7}), {})
  83. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH,
  84. megdnn::DeformableConvBackwardFilter,
  85. (std::initializer_list<size_t>{0, 1, 2, 3}), {})
  86. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::BatchConvBiasForward,
  87. (std::initializer_list<size_t>{0, 1, 2, 3, 4}), {})
  88. OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::ConvBiasForward,
  89. (std::initializer_list<size_t>{0, 3, 4}), {2})
  90. #undef OPR_WITHOUT_INPUT_BROADCAST
  91. #undef DEFAULT_INDEX_OF_BATCH
  92. #undef CONV_INDEX_OF_BATCH
  93. template <class Opr>
  94. struct LayoutsModifier {
  95. static void on(TensorLayoutArray& layouts, const typename Opr::Param& p,
  96. size_t new_batch_size) {
  97. size_t batch_index = BatchTrait<Opr>::index_of_batch(p);
  98. for (size_t index : BatchTrait<Opr>::indices_contain_batch) {
  99. layouts.at(index)[batch_index] = new_batch_size;
  100. }
  101. for (size_t index : BatchTrait<Opr>::indices_contain_batch_broadcast) {
  102. if (!check_bias_share_in_channel(layouts.at(index), p.format)) {
  103. layouts.at(index)[batch_index] = new_batch_size;
  104. }
  105. }
  106. }
  107. };
  108. #define OPR_NO_BIAS(opr) \
  109. template <> \
  110. struct LayoutsModifier<opr> { \
  111. static void on(TensorLayoutArray& layouts, \
  112. const typename opr::Param& p, size_t new_batch_size) { \
  113. size_t batch_index = BatchTrait<opr>::index_of_batch(p); \
  114. for (size_t index : BatchTrait<opr>::indices_contain_batch) { \
  115. layouts.at(index)[batch_index] = new_batch_size; \
  116. } \
  117. } \
  118. };
  119. OPR_NO_BIAS(megdnn::Convolution3D)
  120. OPR_NO_BIAS(megdnn::BatchedMatrixMul)
  121. #undef OPR_NO_BIAS
  122. template <>
  123. struct LayoutsModifier<megdnn::MatrixMul> {
  124. public:
  125. static void on(TensorLayoutArray& layouts,
  126. const megdnn::MatrixMul::Param& p,
  127. size_t new_batch_size) {
  128. assert(!p.transposeA && !p.transposeB);
  129. MEGDNN_MARK_USED_VAR(p);
  130. layouts.at(0)[0] = new_batch_size;
  131. layouts.at(2)[0] = new_batch_size;
  132. }
  133. };
  134. template <class Opr, typename OprAlgoProxy = OprAlgoProxy<Opr>>
  135. class AlgoGenerator {
  136. public:
  137. AlgoGenerator(ExecutionPolicyAlgoName name)
  138. : m_policy_name{name} {}
  139. std::vector<Algorithm::Info::Desc> operator()(
  140. Opr* opr, const CheckerHelper::TensorValueArray& arr) {
  141. TensorLayoutArray layouts;
  142. for (auto&& val : arr) {
  143. layouts.push_back(val.layout);
  144. }
  145. std::vector<Algorithm::Info::Desc> ret;
  146. megdnn_assert(layouts.size() == OprTrait<Opr>::arity);
  147. for (auto algo_info :
  148. AlgoProxy<Opr, OprTrait<Opr>::arity>::get_all_algorithms_info(
  149. opr, layouts)) {
  150. if (!(algo_info.attribute &
  151. AlgoAttribute::ACCURACY_DEPEND_ON_BATCH) &&
  152. std::regex_match(
  153. algo_info.desc.name,
  154. std::regex("(.*)(" + m_policy_name.name + ")(.*)"))) {
  155. ret.push_back(algo_info.desc);
  156. } else {
  157. continue;
  158. }
  159. }
  160. return ret;
  161. }
  162. private:
  163. ExecutionPolicyAlgoName m_policy_name;
  164. };
  165. } // namespace
  166. ::testing::AssertionResult __assert_tensor_binary_eq(
  167. const char* expr0, const char* expr1, const char* expr2,
  168. const TensorND& v0, const TensorND& v1,
  169. const Algorithm::Info::Desc& algo);
  170. template <typename Opr, typename Proxy = OprProxy<Opr>>
  171. class AccuracyShakeChecker : public CheckerHelper {
  172. public:
  173. static constexpr int arity_in = OprArityTrait<Opr>::arity_in;
  174. using Param = typename Opr::Param;
  175. using BeforeExecCallback = std::function<std::vector<Algorithm::Info::Desc>(
  176. Opr*, const TensorValueArray&)>;
  177. AccuracyShakeChecker(Handle* handle, bool check_dispatch = false)
  178. : CheckerHelper(handle, check_dispatch),
  179. m_before_exec_callback{AlgoGenerator<Opr>("")},
  180. m_param(Param()) {}
  181. TensorLayoutArray make_layouts(const TensorShapeArray& shapes) {
  182. TensorLayoutArray layouts(shapes.size());
  183. for (size_t i = 0; i < shapes.size(); ++i) {
  184. DType dt = (m_dtype.find(i) != m_dtype.end() ? m_dtype[i]
  185. : dtype::Float32());
  186. TensorFormat fmt =
  187. (m_fmt.find(i) != m_fmt.end() ? m_fmt[i] : TensorFormat{});
  188. layouts[i] = TensorLayout(shapes[i], dt, fmt);
  189. }
  190. return layouts;
  191. }
  192. /*!
  193. * \brief execute opr on current param/dtype/rng config
  194. * \param shapes input/output shapes, which would be passed as
  195. * arguments to Opr::deduce_layout
  196. *
  197. * Checker would construct TensorLayout vectors from shapes and dtypes,
  198. * and call exec(TensorLayoutArray &).
  199. */
  200. AccuracyShakeChecker& exec(const TensorShapeArray& shapes) {
  201. exec(make_layouts(shapes));
  202. return *this;
  203. }
  204. void exec(TensorLayoutArray layouts);
  205. AccuracyShakeChecker& set_param(Param p) {
  206. m_param = p;
  207. opr()->param() = p;
  208. return *this;
  209. }
  210. AccuracyShakeChecker& set_dtype(size_t idx, DType dtype) {
  211. m_dtype[idx] = dtype;
  212. return *this;
  213. }
  214. AccuracyShakeChecker& set_rng(size_t idx, RNG* rng) {
  215. m_rng[idx] = rng;
  216. return *this;
  217. }
  218. //! set a callback to be invoked before executing the operator
  219. AccuracyShakeChecker& set_before_exec_callback(
  220. const BeforeExecCallback& cb) {
  221. m_before_exec_callback = cb;
  222. return *this;
  223. }
  224. AccuracyShakeChecker& reset_before_exec_callback() {
  225. m_before_exec_callback = nullptr;
  226. return *this;
  227. }
  228. //! get the opr impl so setting other than param() can be modified
  229. Opr* opr() {
  230. if (!m_opr_cur) {
  231. m_opr_cur = m_handle_cur->create_operator<Opr>();
  232. }
  233. return m_opr_cur.get();
  234. }
  235. private:
  236. BeforeExecCallback m_before_exec_callback;
  237. Param m_param;
  238. Proxy m_proxy;
  239. std::unique_ptr<Opr> m_opr_cur;
  240. std::shared_ptr<TensorValueArray> m_tensors_cur_host,
  241. m_tensors_single_batch_host;
  242. void init_host_values();
  243. void check_tensors_ignore_batch(
  244. const TensorValueArray& tensors_single_batch,
  245. const TensorValueArray& tensors, const Algorithm::Info::Desc& desc);
  246. };
  247. template <typename Opr, typename Proxy>
  248. void AccuracyShakeChecker<Opr, Proxy>::exec(TensorLayoutArray layouts) {
  249. auto opr_cur = this->opr();
  250. opr_cur->param() = m_param;
  251. m_proxy.deduce_layout(opr_cur, layouts);
  252. TensorLayoutArray layouts_single_batch = layouts;
  253. for (size_t i=0; i<layouts_single_batch.size(); ++i) {
  254. ASSERT_TRUE(layouts[i].is_physical_contiguous())
  255. << "layouts should be physical contiguous "
  256. << layouts[i].to_string();
  257. }
  258. ASSERT_TRUE(0 == BatchTrait<Opr>::index_of_batch(opr_cur->param()))
  259. << "index of batch should be 0 ";
  260. LayoutsModifier<Opr>::on(layouts_single_batch, opr_cur->param(), 1);
  261. // allocate input
  262. auto tensors_single_batch_storage =
  263. alloc_tensors(m_handle_cur, layouts_single_batch, 0);
  264. m_tensors_single_batch_host =
  265. alloc_tensors(m_handle_naive.get(), layouts_single_batch, 0);
  266. auto tensors_cur_storage = alloc_tensors(m_handle_cur, layouts, 0);
  267. m_tensors_cur_host =
  268. alloc_tensors(m_handle_naive.get(), layouts, 0);
  269. auto &&tensors_single_batch = *tensors_single_batch_storage;
  270. auto &&tensors_single_batch_host = *m_tensors_single_batch_host;
  271. auto &&tensors_cur = *tensors_cur_storage;
  272. auto &&tensors_cur_host = *m_tensors_cur_host;
  273. // allocate output
  274. auto tensors_single_batch_storage_out =
  275. alloc_tensors(m_handle_naive.get(), layouts_single_batch, 0);
  276. auto tensors_cur_storage_out =
  277. alloc_tensors(m_handle_naive.get(), layouts, 0);
  278. auto &&tensors_single_batch_out = *tensors_single_batch_storage_out;
  279. auto &&tensors_cur_out = *tensors_cur_storage_out;
  280. init_host_values();
  281. copy_tensors_to_device(tensors_cur, tensors_cur_host);
  282. copy_tensors_to_device(tensors_single_batch, tensors_single_batch_host);
  283. std::vector<Algorithm::Info::Desc> algo_desc;
  284. if (m_before_exec_callback) {
  285. algo_desc = m_before_exec_callback(opr_cur, tensors_cur);
  286. } else {
  287. algo_desc.push_back({});
  288. }
  289. for (size_t i = 0; i < algo_desc.size(); ++i) {
  290. opr_cur->execution_policy().algo = algo_desc[i];
  291. m_proxy.exec(opr_cur, tensors_cur);
  292. m_proxy.exec(opr_cur, tensors_single_batch);
  293. copy_tensors_from_device(tensors_cur_out, tensors_cur);
  294. copy_tensors_from_device(tensors_single_batch_out,
  295. tensors_single_batch);
  296. check_tensors_ignore_batch(tensors_single_batch_out, tensors_cur_out,
  297. algo_desc[i]);
  298. }
  299. }
  300. template <typename Opr, typename Proxy>
  301. void AccuracyShakeChecker<Opr, Proxy>::init_host_values() {
  302. size_t index_of_batch = 0;
  303. auto &&tensors_single_batch = *m_tensors_single_batch_host;
  304. auto &&tensors_cur = *m_tensors_cur_host;
  305. for (size_t i = 0; i < arity_in; ++i) {
  306. auto &&tensor_single_batch = tensors_single_batch[i];
  307. auto &&tensor_cur = tensors_cur[i];
  308. auto rng = m_rng[i];
  309. if (!rng)
  310. rng = m_default_rng.get();
  311. rng->gen(tensor_single_batch);
  312. dt_byte* raw_storage_cur = static_cast<dt_byte*>(tensor_cur.raw_ptr) +
  313. tensor_cur.layout.span().low_byte;
  314. dt_byte* raw_storage_single_batch =
  315. static_cast<dt_byte*>(tensor_single_batch.raw_ptr) +
  316. tensor_single_batch.layout.span().low_byte;
  317. const size_t step = tensor_single_batch.layout.span().dist_byte();
  318. if (tensor_cur.layout.eq_shape(tensor_single_batch.layout)) {
  319. memcpy(raw_storage_cur, raw_storage_single_batch, step);
  320. } else {
  321. ASSERT_TRUE(1 == tensor_single_batch.layout[index_of_batch])
  322. << "bad batch size "
  323. << tensor_single_batch.layout[index_of_batch];
  324. for (size_t b=0; b<tensor_cur.layout[index_of_batch]; ++b) {
  325. memcpy(raw_storage_cur, raw_storage_single_batch, step);
  326. raw_storage_cur += step;
  327. }
  328. }
  329. }
  330. }
  331. template <typename Opr, typename Proxy>
  332. void AccuracyShakeChecker<Opr, Proxy>::check_tensors_ignore_batch(
  333. const TensorValueArray& tensors_single_batch,
  334. const TensorValueArray& tensors, const Algorithm::Info::Desc& algo) {
  335. for (size_t i = 0; i < tensors_single_batch.size(); ++i) {
  336. if (tensors_single_batch[i].layout.ndim == 0 ||
  337. tensors_single_batch[i].layout.eq_shape(tensors[i].layout))
  338. continue;
  339. ASSERT_PRED_FORMAT3(::megdnn::test::__assert_tensor_binary_eq,
  340. tensors_single_batch[i], tensors[i], algo);
  341. }
  342. }
  343. } // namespace test
  344. } // namespace megdnn
  345. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台