You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

accuracy_shake_checker.h 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. /**
  2. * \file dnn/test/common/accuracy_shake_checker.h
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #pragma once
  12. #include <vector>
  13. #include "megdnn/oprs.h"
  14. #include "src/common/conv_bias.h"
  15. #include "src/common/utils.h"
  16. #include "test/common/checker.h"
  17. #include "test/common/index.h"
  18. namespace megdnn {
  19. namespace test {
  20. namespace {
  21. template <class Opr>
  22. struct BatchTrait {
  23. //! index of batch in tensor, 3 for CHWN4 e.g.
  24. static size_t index_of_batch(const typename Opr::Param&) { return 0; }
  25. //! indices contain batch in inputs and outputs, src(0) dst(2) for conv e.g.
  26. static std::vector<size_t> indices_contain_batch;
  27. static std::vector<size_t> indices_contain_batch_broadcast;
  28. };
  29. template <class Opr>
  30. std::vector<size_t> BatchTrait<Opr>::indices_contain_batch = {};
  31. template <class Opr>
  32. std::vector<size_t> BatchTrait<Opr>::indices_contain_batch_broadcast = {};
  33. #define DEFAULT_INDEX_OF_BATCH(opr) \
  34. static size_t index_of_batch(const opr::Param&) { return 0; }
  35. #define CONV_INDEX_OF_BATCH(opr) \
  36. static size_t index_of_batch(const opr::Param& p) { \
  37. if (p.format == opr::Param::Format::CHWN4) { \
  38. return 3; \
  39. } \
  40. return 0; \
  41. }
  42. #define OPR_WITHOUT_INPUT_BROADCAST(INDEX_OF_BATCH, opr, idxs, idxs_brdcst) \
  43. template <> \
  44. struct BatchTrait<opr> { \
  45. INDEX_OF_BATCH(opr) \
  46. static std::vector<size_t> indices_contain_batch; \
  47. static std::vector<size_t> indices_contain_batch_broadcast; \
  48. }; \
  49. std::vector<size_t> BatchTrait<opr>::indices_contain_batch = idxs; \
  50. std::vector<size_t> BatchTrait<opr>::indices_contain_batch_broadcast = idxs_brdcst;
  51. OPR_WITHOUT_INPUT_BROADCAST(
  52. DEFAULT_INDEX_OF_BATCH, megdnn::Convolution3DForward,
  53. (std::initializer_list<size_t>{0, 2}), {})
  54. OPR_WITHOUT_INPUT_BROADCAST(
  55. DEFAULT_INDEX_OF_BATCH, megdnn::Convolution3DBackwardData,
  56. (std::initializer_list<size_t>{1, 2}), {})
  57. OPR_WITHOUT_INPUT_BROADCAST(
  58. DEFAULT_INDEX_OF_BATCH, megdnn::Convolution3DBackwardFilter,
  59. (std::initializer_list<size_t>{0, 1}), {})
  60. OPR_WITHOUT_INPUT_BROADCAST(
  61. DEFAULT_INDEX_OF_BATCH, megdnn::BatchedMatrixMul,
  62. (std::initializer_list<size_t>{0, 1, 2}), {})
  63. OPR_WITHOUT_INPUT_BROADCAST(
  64. CONV_INDEX_OF_BATCH, megdnn::ConvolutionForward,
  65. (std::initializer_list<size_t>{0, 2}), {})
  66. OPR_WITHOUT_INPUT_BROADCAST(
  67. CONV_INDEX_OF_BATCH, megdnn::ConvolutionBackwardData,
  68. (std::initializer_list<size_t>{1, 2}), {})
  69. OPR_WITHOUT_INPUT_BROADCAST(
  70. CONV_INDEX_OF_BATCH, megdnn::ConvolutionBackwardFilter,
  71. (std::initializer_list<size_t>{0, 1}), {})
  72. OPR_WITHOUT_INPUT_BROADCAST(
  73. CONV_INDEX_OF_BATCH, megdnn::LocalShareForward,
  74. (std::initializer_list<size_t>{0, 2}), {})
  75. OPR_WITHOUT_INPUT_BROADCAST(
  76. CONV_INDEX_OF_BATCH, megdnn::LocalShareBackwardData,
  77. (std::initializer_list<size_t>{1, 2}), {})
  78. OPR_WITHOUT_INPUT_BROADCAST(
  79. CONV_INDEX_OF_BATCH, megdnn::LocalShareBackwardFilter,
  80. (std::initializer_list<size_t>{0, 1}), {})
  81. OPR_WITHOUT_INPUT_BROADCAST(
  82. CONV_INDEX_OF_BATCH, megdnn::DeformableConvForward,
  83. (std::initializer_list<size_t>{0, 2, 3, 4}), {})
  84. OPR_WITHOUT_INPUT_BROADCAST(
  85. CONV_INDEX_OF_BATCH, megdnn::DeformableConvBackwardData,
  86. (std::initializer_list<size_t>{0, 2, 3, 4, 5, 6, 7}), {})
  87. OPR_WITHOUT_INPUT_BROADCAST(
  88. CONV_INDEX_OF_BATCH, megdnn::DeformableConvBackwardFilter,
  89. (std::initializer_list<size_t>{0, 1, 2, 3}), {})
  90. OPR_WITHOUT_INPUT_BROADCAST(
  91. CONV_INDEX_OF_BATCH, megdnn::BatchConvBiasForward,
  92. (std::initializer_list<size_t>{0, 1, 2, 3, 4}), {})
  93. OPR_WITHOUT_INPUT_BROADCAST(
  94. CONV_INDEX_OF_BATCH, megdnn::ConvBiasForward,
  95. (std::initializer_list<size_t>{0, 3, 4}), {2})
  96. #undef OPR_WITHOUT_INPUT_BROADCAST
  97. #undef DEFAULT_INDEX_OF_BATCH
  98. #undef CONV_INDEX_OF_BATCH
  99. template <class Opr>
  100. struct LayoutsModifier {
  101. static void on(
  102. TensorLayoutArray& layouts, const typename Opr::Param& p,
  103. size_t new_batch_size) {
  104. size_t batch_index = BatchTrait<Opr>::index_of_batch(p);
  105. for (size_t index : BatchTrait<Opr>::indices_contain_batch) {
  106. layouts.at(index)[batch_index] = new_batch_size;
  107. }
  108. for (size_t index : BatchTrait<Opr>::indices_contain_batch_broadcast) {
  109. if (!check_bias_share_in_channel(layouts.at(index), p.format)) {
  110. layouts.at(index)[batch_index] = new_batch_size;
  111. }
  112. }
  113. }
  114. };
  115. #define OPR_NO_BIAS(opr) \
  116. template <> \
  117. struct LayoutsModifier<opr> { \
  118. static void on( \
  119. TensorLayoutArray& layouts, const typename opr::Param& p, \
  120. size_t new_batch_size) { \
  121. size_t batch_index = BatchTrait<opr>::index_of_batch(p); \
  122. for (size_t index : BatchTrait<opr>::indices_contain_batch) { \
  123. layouts.at(index)[batch_index] = new_batch_size; \
  124. } \
  125. } \
  126. };
  127. OPR_NO_BIAS(megdnn::Convolution3D)
  128. OPR_NO_BIAS(megdnn::BatchedMatrixMul)
  129. #undef OPR_NO_BIAS
  130. template <>
  131. struct LayoutsModifier<megdnn::MatrixMul> {
  132. public:
  133. static void on(
  134. TensorLayoutArray& layouts, const megdnn::MatrixMul::Param& p,
  135. size_t new_batch_size) {
  136. assert(!p.transposeA && !p.transposeB);
  137. MEGDNN_MARK_USED_VAR(p);
  138. layouts.at(0)[0] = new_batch_size;
  139. layouts.at(2)[0] = new_batch_size;
  140. }
  141. };
  142. template <class Opr, typename OprAlgoProxy = OprAlgoProxy<Opr>>
  143. class AlgoGenerator {
  144. public:
  145. AlgoGenerator(ExecutionPolicyAlgoName name) : m_policy_name{name} {}
  146. std::vector<Algorithm::Info::Desc> operator()(
  147. Opr* opr, const CheckerHelper::TensorValueArray& arr) {
  148. TensorLayoutArray layouts;
  149. for (auto&& val : arr) {
  150. layouts.push_back(val.layout);
  151. }
  152. std::vector<Algorithm::Info::Desc> ret;
  153. megdnn_assert(layouts.size() == OprTrait<Opr>::arity);
  154. auto vec = AlgoProxy<Opr, OprTrait<Opr>::arity>::get_all_algorithms_info_safe(
  155. opr, layouts);
  156. for (auto algo_info : vec) {
  157. if (!(algo_info.attribute & AlgoAttribute::ACCURACY_DEPEND_ON_BATCH) &&
  158. (algo_info.attribute & AlgoAttribute::REPRODUCIBLE) &&
  159. std::regex_match(
  160. algo_info.desc.name,
  161. std::regex("(.*)(" + m_policy_name.name + ")(.*)"))) {
  162. ret.push_back(algo_info.desc);
  163. } else {
  164. continue;
  165. }
  166. }
  167. return ret;
  168. }
  169. private:
  170. ExecutionPolicyAlgoName m_policy_name;
  171. };
  172. } // namespace
  173. ::testing::AssertionResult __assert_tensor_binary_eq(
  174. const char* expr0, const char* expr1, const char* expr2, const TensorND& v0,
  175. const TensorND& v1, const Algorithm::Info::Desc& algo);
  176. template <typename Opr, typename Proxy = OprProxy<Opr>>
  177. class AccuracyShakeChecker : public CheckerHelper {
  178. public:
  179. static constexpr int arity_in = OprArityTrait<Opr>::arity_in;
  180. using Param = typename Opr::Param;
  181. using BeforeExecCallback = std::function<std::vector<Algorithm::Info::Desc>(
  182. Opr*, const TensorValueArray&)>;
  183. AccuracyShakeChecker(Handle* handle, bool check_dispatch = false)
  184. : CheckerHelper(handle, check_dispatch),
  185. m_before_exec_callback{AlgoGenerator<Opr>("")},
  186. m_param(Param()) {}
  187. TensorLayoutArray make_layouts(const TensorShapeArray& shapes) {
  188. TensorLayoutArray layouts(shapes.size());
  189. for (size_t i = 0; i < shapes.size(); ++i) {
  190. DType dt =
  191. (m_dtype.find(i) != m_dtype.end() ? m_dtype[i] : dtype::Float32());
  192. TensorFormat fmt =
  193. (m_fmt.find(i) != m_fmt.end() ? m_fmt[i] : TensorFormat{});
  194. layouts[i] = TensorLayout(shapes[i], dt, fmt);
  195. }
  196. return layouts;
  197. }
  198. /*!
  199. * \brief execute opr on current param/dtype/rng config
  200. * \param shapes input/output shapes, which would be passed as
  201. * arguments to Opr::deduce_layout
  202. *
  203. * Checker would construct TensorLayout vectors from shapes and dtypes,
  204. * and call exec(TensorLayoutArray &).
  205. */
  206. AccuracyShakeChecker& exec(const TensorShapeArray& shapes) {
  207. exec(make_layouts(shapes));
  208. return *this;
  209. }
  210. void exec(TensorLayoutArray layouts);
  211. AccuracyShakeChecker& set_param(Param p) {
  212. m_param = p;
  213. opr()->param() = p;
  214. return *this;
  215. }
  216. AccuracyShakeChecker& set_dtype(size_t idx, DType dtype) {
  217. m_dtype[idx] = dtype;
  218. return *this;
  219. }
  220. AccuracyShakeChecker& set_rng(size_t idx, RNG* rng) {
  221. m_rng[idx] = rng;
  222. return *this;
  223. }
  224. //! set a callback to be invoked before executing the operator
  225. AccuracyShakeChecker& set_before_exec_callback(const BeforeExecCallback& cb) {
  226. m_before_exec_callback = cb;
  227. return *this;
  228. }
  229. AccuracyShakeChecker& reset_before_exec_callback() {
  230. m_before_exec_callback = nullptr;
  231. return *this;
  232. }
  233. //! get the opr impl so setting other than param() can be modified
  234. Opr* opr() {
  235. if (!m_opr_cur) {
  236. m_opr_cur = m_handle_cur->create_operator<Opr>();
  237. }
  238. return m_opr_cur.get();
  239. }
  240. private:
  241. BeforeExecCallback m_before_exec_callback;
  242. Param m_param;
  243. Proxy m_proxy;
  244. std::unique_ptr<Opr> m_opr_cur;
  245. std::shared_ptr<TensorValueArray> m_tensors_cur_host, m_tensors_single_batch_host;
  246. void init_host_values();
  247. void check_tensors_ignore_batch(
  248. const TensorValueArray& tensors_single_batch,
  249. const TensorValueArray& tensors, const Algorithm::Info::Desc& desc);
  250. };
  251. template <typename Opr, typename Proxy>
  252. void AccuracyShakeChecker<Opr, Proxy>::exec(TensorLayoutArray layouts) {
  253. auto opr_cur = this->opr();
  254. opr_cur->param() = m_param;
  255. m_proxy.deduce_layout(opr_cur, layouts);
  256. TensorLayoutArray layouts_single_batch = layouts;
  257. for (size_t i = 0; i < layouts_single_batch.size(); ++i) {
  258. ASSERT_TRUE(layouts[i].is_physical_contiguous())
  259. << "layouts should be physical contiguous " << layouts[i].to_string();
  260. }
  261. ASSERT_TRUE(0 == BatchTrait<Opr>::index_of_batch(opr_cur->param()))
  262. << "index of batch should be 0 ";
  263. LayoutsModifier<Opr>::on(layouts_single_batch, opr_cur->param(), 1);
  264. // allocate input
  265. auto tensors_single_batch_storage =
  266. alloc_tensors(m_handle_cur, layouts_single_batch, 0);
  267. m_tensors_single_batch_host =
  268. alloc_tensors(m_handle_naive.get(), layouts_single_batch, 0);
  269. auto tensors_cur_storage = alloc_tensors(m_handle_cur, layouts, 0);
  270. m_tensors_cur_host = alloc_tensors(m_handle_naive.get(), layouts, 0);
  271. auto&& tensors_single_batch = *tensors_single_batch_storage;
  272. auto&& tensors_single_batch_host = *m_tensors_single_batch_host;
  273. auto&& tensors_cur = *tensors_cur_storage;
  274. auto&& tensors_cur_host = *m_tensors_cur_host;
  275. // allocate output
  276. auto tensors_single_batch_storage_out =
  277. alloc_tensors(m_handle_naive.get(), layouts_single_batch, 0);
  278. auto tensors_cur_storage_out = alloc_tensors(m_handle_naive.get(), layouts, 0);
  279. auto&& tensors_single_batch_out = *tensors_single_batch_storage_out;
  280. auto&& tensors_cur_out = *tensors_cur_storage_out;
  281. init_host_values();
  282. copy_tensors_to_device(tensors_cur, tensors_cur_host);
  283. copy_tensors_to_device(tensors_single_batch, tensors_single_batch_host);
  284. std::vector<Algorithm::Info::Desc> algo_desc;
  285. if (m_before_exec_callback) {
  286. algo_desc = m_before_exec_callback(opr_cur, tensors_cur);
  287. } else {
  288. algo_desc.push_back({});
  289. }
  290. for (size_t i = 0; i < algo_desc.size(); ++i) {
  291. opr_cur->execution_policy().algo = algo_desc[i];
  292. m_proxy.exec(opr_cur, tensors_cur);
  293. m_proxy.exec(opr_cur, tensors_single_batch);
  294. copy_tensors_from_device(tensors_cur_out, tensors_cur);
  295. copy_tensors_from_device(tensors_single_batch_out, tensors_single_batch);
  296. check_tensors_ignore_batch(
  297. tensors_single_batch_out, tensors_cur_out, algo_desc[i]);
  298. }
  299. }
  300. template <typename Opr, typename Proxy>
  301. void AccuracyShakeChecker<Opr, Proxy>::init_host_values() {
  302. size_t index_of_batch = 0;
  303. auto&& tensors_single_batch = *m_tensors_single_batch_host;
  304. auto&& tensors_cur = *m_tensors_cur_host;
  305. for (size_t i = 0; i < arity_in; ++i) {
  306. auto&& tensor_single_batch = tensors_single_batch[i];
  307. auto&& tensor_cur = tensors_cur[i];
  308. auto rng = m_rng[i];
  309. if (!rng)
  310. rng = m_default_rng.get();
  311. rng->gen(tensor_single_batch);
  312. dt_byte* raw_storage_cur = static_cast<dt_byte*>(tensor_cur.raw_ptr) +
  313. tensor_cur.layout.span().low_byte;
  314. dt_byte* raw_storage_single_batch =
  315. static_cast<dt_byte*>(tensor_single_batch.raw_ptr) +
  316. tensor_single_batch.layout.span().low_byte;
  317. const size_t step = tensor_single_batch.layout.span().dist_byte();
  318. if (tensor_cur.layout.eq_shape(tensor_single_batch.layout)) {
  319. memcpy(raw_storage_cur, raw_storage_single_batch, step);
  320. } else {
  321. ASSERT_TRUE(1 == tensor_single_batch.layout[index_of_batch])
  322. << "bad batch size " << tensor_single_batch.layout[index_of_batch];
  323. for (size_t b = 0; b < tensor_cur.layout[index_of_batch]; ++b) {
  324. memcpy(raw_storage_cur, raw_storage_single_batch, step);
  325. raw_storage_cur += step;
  326. }
  327. }
  328. }
  329. }
  330. template <typename Opr, typename Proxy>
  331. void AccuracyShakeChecker<Opr, Proxy>::check_tensors_ignore_batch(
  332. const TensorValueArray& tensors_single_batch, const TensorValueArray& tensors,
  333. const Algorithm::Info::Desc& algo) {
  334. for (size_t i = 0; i < tensors_single_batch.size(); ++i) {
  335. if (tensors_single_batch[i].layout.ndim == 0 ||
  336. tensors_single_batch[i].layout.eq_shape(tensors[i].layout))
  337. continue;
  338. ASSERT_PRED_FORMAT3(
  339. ::megdnn::test::__assert_tensor_binary_eq, tensors_single_batch[i],
  340. tensors[i], algo);
  341. }
  342. }
  343. } // namespace test
  344. } // namespace megdnn
  345. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台