You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

autocheck.cpp 14 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. /**
  2. * \file test/src/autocheck.cpp
  3. *
  4. * This file is part of MegBrain, a deep learning framework developed by Megvii.
  5. *
  6. * \copyright Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  7. *
  8. */
  9. #include "megbrain/test/autocheck.h"
  10. #include "megbrain/opr/basic_arith.h"
  11. #include "megbrain/opr/blas.h"
  12. #include "megbrain/opr/internal/megdnn_opr_wrapper.h"
  13. #include "megbrain/opr/io.h"
  14. #include "megbrain/opr/utility.h"
  15. #include "megbrain/test/numerical_diff.h"
  16. #include <cmath>
  17. using namespace mgb;
  18. #define DEF_IMPL(_ret) \
  19. template <size_t nr_inp, size_t nr_out, class dtype> \
  20. _ret AutoOprChecker<nr_inp, nr_out, dtype>
  21. #define DEF_IMPL_CHAIN() \
  22. template <size_t nr_inp, size_t nr_out, class dtype> \
  23. AutoOprChecker<nr_inp, nr_out, dtype>& AutoOprChecker<nr_inp, nr_out, dtype>
  24. DEF_IMPL()::AutoOprChecker(GraphMaker maker, FwdNumeric fwd, CompNode comp_node)
  25. : m_fwd(fwd), m_maker(maker), m_comp_node{comp_node} {
  26. InputGenerator default_input_gen = [this](HostTensorND& dest) {
  27. dest = *m_gen(dest.shape(), m_comp_node);
  28. };
  29. for (size_t i = 0; i < nr_inp; ++i) {
  30. m_inputs[i] = std::make_shared<HostTensorND>(m_comp_node, dtype());
  31. m_inputs_generator[i] = default_input_gen;
  32. }
  33. for (size_t i = 0; i < nr_inp; ++i) {
  34. m_inputs_allow_grad[i] = true;
  35. }
  36. for (size_t i = 0; i < nr_out; ++i) {
  37. m_outputs_allow_grad[i] = true;
  38. }
  39. for (size_t i = 0; i < nr_out; ++i) {
  40. m_outputs_allow_check[i] = true;
  41. }
  42. }
  43. DEF_IMPL(void)::build_graph() {
  44. mgb_assert(!m_built);
  45. m_built = true;
  46. m_graph = ComputingGraph::make();
  47. auto&& graph = m_graph;
  48. if (m_disable_graph_opt) {
  49. graph->options().graph_opt_level = 0;
  50. }
  51. SymInpArray sym_in;
  52. SymbolVar one, zero;
  53. {
  54. HostTensorND tmp{m_comp_node, mgb::dtype::Float32()};
  55. auto p = tmp.resize({1}).ptr<float>();
  56. p[0] = 1;
  57. one = opr::SharedDeviceTensor::make(*graph, tmp, {"one"});
  58. p[0] = 0;
  59. zero = opr::SharedDeviceTensor::make(*graph, tmp, {"zero"});
  60. }
  61. for (size_t i = 0; i < nr_inp; ++i) {
  62. // to trigger graph trans
  63. sym_in[i] =
  64. opr::Host2DeviceCopy::make(*graph, m_inputs[i], ssprintf("inp%zu", i));
  65. auto dt = sym_in[i].dtype();
  66. auto a = opr::TypeCvt::make(one, dt), b = opr::TypeCvt::make(zero, dt);
  67. sym_in[i] = sym_in[i] * a + b;
  68. }
  69. m_failed = true;
  70. auto sym_out = m_maker(sym_in);
  71. m_failed = false;
  72. for (size_t i = 0; i < nr_out; ++i) {
  73. m_outputs_truth[i].comp_node(m_comp_node).dtype(sym_out[i].dtype());
  74. m_outspec_fwd_grad.push_back(make_callback_copy(sym_out[i], m_outputs[i]));
  75. }
  76. if (!m_need_grad_check)
  77. return;
  78. SymbolVar loss;
  79. bool first_loss = true;
  80. for (size_t i = 0; i < nr_out; ++i) {
  81. if (m_outputs_allow_grad[i]) {
  82. m_loss_p[i] = std::make_shared<HostTensorND>(m_comp_node, dtype());
  83. auto cur = opr::Dot::make(
  84. sym_out[i].flatten(),
  85. opr::Host2DeviceCopy::make(
  86. *graph, m_loss_p[i], ssprintf("lossp%zu", i)));
  87. if (first_loss) {
  88. loss = cur;
  89. } else {
  90. loss = loss + cur;
  91. }
  92. first_loss = false;
  93. }
  94. }
  95. if (first_loss) {
  96. m_need_grad_check = false;
  97. return;
  98. }
  99. auto make_grad = [&](SymbolVar target, SymbolVar wrt) {
  100. if (m_use_virtual_grad)
  101. return opr::VirtualGrad::make(target, wrt);
  102. else
  103. return cg::grad(target, wrt);
  104. };
  105. auto loss2 = loss * 2;
  106. m_outspec_loss.push_back({make_callback_copy(loss, m_loss)});
  107. for (size_t i = 0; i < nr_inp; ++i)
  108. if (m_inputs_allow_grad[i]) {
  109. SymbolVar g = make_grad(loss, sym_in[i]);
  110. auto cb = [this, i](DeviceTensorND& dev) {
  111. if (m_should_copy_grad)
  112. m_grads[i].copy_from(dev).sync();
  113. };
  114. m_outspec_fwd_grad.push_back({g, cb});
  115. // test grad with a different loss var
  116. if (m_need_multi_loss_check) {
  117. auto g2 = make_grad(loss2, sym_in[i]);
  118. auto cb2 = [this, i](DeviceTensorND& dev) {
  119. if (m_should_copy_grad)
  120. m_grads_mul2[i].copy_from(dev).sync();
  121. };
  122. m_outspec_fwd_grad.push_back({g2, cb2});
  123. }
  124. }
  125. }
  126. DEF_IMPL()::~AutoOprChecker() {
  127. mgb_assert(
  128. m_failed || m_run_cnt >= 3,
  129. "less than 3 runs for autocheker; some paths not taken");
  130. }
  131. DEF_IMPL_CHAIN()::set_input_generator(size_t idx, const InputGenerator& gen) {
  132. mgb_assert(!m_built, "cannot set_input_generator after the first run");
  133. mgb_assert(idx < nr_inp);
  134. m_inputs_generator[idx] = gen;
  135. return *this;
  136. }
  137. DEF_IMPL_CHAIN()::set_input_coordinator(const InputCoordinator& coord) {
  138. mgb_assert(!m_built, "cannot set_input_generator after the first run");
  139. m_input_coordinator = coord;
  140. return *this;
  141. }
  142. DEF_IMPL_CHAIN()::set_input_allow_grad(size_t idx, bool allowed) {
  143. mgb_assert(!m_built, "cannot set_input_allow_grad after the first run");
  144. mgb_assert(idx < nr_inp);
  145. m_inputs_allow_grad[idx] = allowed;
  146. return *this;
  147. }
  148. DEF_IMPL_CHAIN()::set_input_default_shape(size_t idx, const TensorShape& shape) {
  149. mgb_assert(!m_built, "cannot set_input_allow_grad after the first run");
  150. mgb_assert(idx < nr_inp);
  151. m_inputs[idx]->resize(shape);
  152. return *this;
  153. }
  154. DEF_IMPL_CHAIN()::set_output_allow_grad(size_t idx, bool allowed) {
  155. mgb_assert(!m_built, "cannot set_output_allow_grad after the first run");
  156. mgb_assert(idx < nr_out);
  157. m_outputs_allow_grad[idx] = allowed;
  158. return *this;
  159. }
  160. DEF_IMPL_CHAIN()::set_output_allow_check(size_t idx, bool allowed) {
  161. mgb_assert(!m_built, "cannot set_output_allow_check after the first run");
  162. mgb_assert(idx < nr_out);
  163. m_outputs_allow_check[idx] = allowed;
  164. return *this;
  165. }
  166. DEF_IMPL(void)::do_run(const ShapeInpArray& shapes, const RunOptions& opt) {
  167. mgb_assert(m_built);
  168. auto failstr = [&](const std::string& type) {
  169. std::string ishp_str;
  170. for (auto&& i : shapes) {
  171. if (!ishp_str.empty())
  172. ishp_str.append(", ");
  173. ishp_str.append(i.to_string());
  174. }
  175. std::string msg = ssprintf(
  176. "%s failed: input shapes: [%s]", type.c_str(), ishp_str.c_str());
  177. if (m_inp_dump_on_error) {
  178. std::string extra_msg = m_inp_dump_on_error(m_inputs);
  179. if (!extra_msg.empty()) {
  180. msg.append("\nextra message:\n");
  181. msg.append(extra_msg);
  182. }
  183. }
  184. if (!m_extra_err_msg.empty()) {
  185. msg.append("\nextra message: ");
  186. msg.append(m_extra_err_msg);
  187. }
  188. return msg;
  189. };
  190. m_failed = true;
  191. // gen input data
  192. for (size_t i = 0; i < nr_inp; ++i) {
  193. m_inputs[i]->resize(shapes[i]);
  194. m_inputs_generator[i](*m_inputs[i]);
  195. mgb_assert(m_inputs[i]->shape().eq_shape(shapes[i]));
  196. }
  197. if (MGB_GETENV("MGB_AUTOCHECK_DUMP_INPUT")) {
  198. static size_t run_id;
  199. auto fname = output_file(ssprintf("autocheck-inp-%zu.bin", run_id++));
  200. for (size_t i = 0; i < nr_inp; ++i) {
  201. write_tensor_to_file(*m_inputs[i], fname.c_str(), i ? 'a' : 'w');
  202. }
  203. mgb_log("autocheck: %zu input tensors written to %s", nr_inp, fname.c_str());
  204. }
  205. if (m_input_coordinator)
  206. m_input_coordinator(m_inputs);
  207. // forward for groundtruth
  208. m_fwd(m_outputs_truth, m_inputs);
  209. for (auto&& i : m_outputs_truth) {
  210. i.comp_node().sync();
  211. }
  212. // gen loss_p
  213. if (m_need_grad_check) {
  214. float cur_loss_v = 0;
  215. for (size_t i = 0; i < nr_out; ++i) {
  216. if (m_outputs_allow_grad[i]) {
  217. auto nr = m_outputs_truth[i].shape().total_nr_elems();
  218. if (opt.cont_loss_p) {
  219. m_loss_p[i]->resize({nr});
  220. auto ptr = m_loss_p[i]->template ptr<float>();
  221. for (size_t j = 0; j < nr; ++j)
  222. ptr[j] = ++cur_loss_v;
  223. } else {
  224. *m_loss_p[i] = *m_gen({nr}, m_comp_node);
  225. auto ptr = m_loss_p[i]->template ptr<float>();
  226. for (size_t j = 0; j < nr; ++j) {
  227. auto v = ptr[j];
  228. bool vsign = v > 0;
  229. v = std::abs(v) + 0.1;
  230. ptr[j] = vsign ? v : -v;
  231. }
  232. }
  233. }
  234. }
  235. }
  236. /*
  237. * for each 3 consecutive runs:
  238. * 0 and 1: m_func generates loss and grads
  239. * 2: m_func generates only grads in fwd, and loss in numdiff
  240. *
  241. * This scheme is used for recompiling the function a few times, so more
  242. * problems can be exposed.
  243. */
  244. if (m_run_cnt % 3 == 0) {
  245. auto spec = m_outspec_loss;
  246. spec.insert(spec.end(), m_outspec_fwd_grad.begin(), m_outspec_fwd_grad.end());
  247. m_func = m_graph->compile(spec);
  248. } else if (!m_disable_check_loss_grad_seperate_compile && m_run_cnt % 3 == 2)
  249. m_func = m_graph->compile(m_outspec_fwd_grad);
  250. m_should_copy_grad = true;
  251. m_func->execute();
  252. m_should_copy_grad = false;
  253. if (m_on_grad_computed)
  254. m_on_grad_computed(m_graph.get(), m_func.get());
  255. for (size_t i = 0; i < nr_out; ++i) {
  256. if (m_outputs_allow_check[i]) {
  257. MGB_ASSERT_TENSOR_NEAR(
  258. m_outputs_truth[i], m_outputs[i], opt.outputs_max_err)
  259. << failstr(ssprintf("output[%zu]", i));
  260. }
  261. }
  262. if (!m_need_grad_check) {
  263. m_failed = false;
  264. return;
  265. }
  266. std::vector<HostTensorND*> numgrad_inp(nr_inp);
  267. if (!m_disable_check_loss_grad_seperate_compile && m_run_cnt % 3 == 2)
  268. m_func = m_graph->compile(m_outspec_loss);
  269. for (size_t i = 0; i < nr_inp; ++i)
  270. if (m_inputs_allow_grad[i])
  271. numgrad_inp[i] = m_inputs[i].get();
  272. else
  273. numgrad_inp[i] = nullptr;
  274. auto cost_f = [this] {
  275. m_func->execute();
  276. mgb_assert(m_loss.shape().is_scalar());
  277. return m_loss.ptr<float>()[0];
  278. };
  279. std::vector<Maybe<float>> numdiff_eps;
  280. for (size_t i = 0; i < nr_inp; ++i) {
  281. if (m_inputs_allow_grad[i]) {
  282. float v = opt.numdiff_eps;
  283. auto&& sv = opt.numdiff_eps_single_inp[i];
  284. if (sv.valid())
  285. v = sv.val();
  286. numdiff_eps.push_back(v);
  287. } else {
  288. numdiff_eps.push_back(None);
  289. }
  290. }
  291. auto numgrad = numerical_diff_pt2(numgrad_inp, cost_f, numdiff_eps);
  292. auto mul2_inplace = [](HostTensorND& t) -> HostTensorND& {
  293. auto ptr = t.ptr<typename DTypeTrait<dtype>::ctype>();
  294. for (size_t j = 0, jt = t.layout().total_nr_elems(); j < jt; ++j) {
  295. ptr[j] *= 2;
  296. }
  297. return t;
  298. };
  299. for (size_t i = 0; i < nr_inp; ++i) {
  300. if (m_inputs_allow_grad[i]) {
  301. auto err = opt.numdiff_max_err;
  302. {
  303. auto&& se = opt.numdiff_max_err_single_inp[i];
  304. if (se.valid())
  305. err = se.val();
  306. }
  307. MGB_ASSERT_TENSOR_NEAR(numgrad.at(i), m_grads[i], err)
  308. << failstr(ssprintf("grad[%zu]", i));
  309. // check that grad2 == 2 * grad
  310. if (m_need_multi_loss_check) {
  311. MGB_ASSERT_TENSOR_NEAR(mul2_inplace(m_grads[i]), m_grads_mul2[i], err)
  312. << failstr(ssprintf(
  313. "2 * grad[%zu] (grad with another loss var)", i));
  314. }
  315. }
  316. }
  317. m_failed = false;
  318. }
  319. DEF_IMPL_CHAIN()::run(const ShapeInpArray& shapes, const RunOptions& opt) {
  320. if (!m_built)
  321. build_graph();
  322. if (m_failed) {
  323. mgb_log_error("testcase not executed due to previous error");
  324. return *this;
  325. }
  326. do_run(shapes, opt);
  327. ++m_run_cnt;
  328. return *this;
  329. }
  330. namespace mgb {
  331. // explicit instantialization
  332. #define I(a, b) \
  333. template class AutoOprChecker<a, b, dtype::Float32>; \
  334. template class AutoOprChecker<a, b, dtype::Int32>;
  335. I(1, 1);
  336. I(1, 2);
  337. I(1, 3);
  338. I(1, 4);
  339. I(2, 1);
  340. I(2, 2);
  341. I(2, 4);
  342. I(3, 1);
  343. I(3, 2);
  344. I(3, 3);
  345. I(4, 1);
  346. I(5, 1);
  347. I(6, 1);
  348. #undef I
  349. } // namespace mgb
  350. TEST(TestAutoCheck, APlusB) {
  351. using Checker = AutoOprChecker<2, 1>;
  352. auto make_graph = [](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  353. return {inputs[0] + inputs[1] * inputs[1]};
  354. };
  355. auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
  356. DeviceTensorND i0, i1, tmp, out;
  357. i0.copy_from(*inp[0]);
  358. i1.copy_from(*inp[1]);
  359. auto opr = opr::intl::create_megdnn_opr<megdnn::Elemwise>(dest[0].comp_node());
  360. using Mode = opr::Elemwise::Mode;
  361. opr::Elemwise::perform(Mode::MUL, tmp, {i1, i1}, opr);
  362. opr::Elemwise::perform(Mode::ADD, out, {tmp, i0}, opr);
  363. dest[0].copy_from(out).sync();
  364. };
  365. Checker(make_graph, fwd)
  366. .run({TensorShape{2, 3}, TensorShape{2, 3}})
  367. .run({TensorShape{5, 2, 3}, TensorShape{5, 1, 1}})
  368. .run({TensorShape{2, 3, 4, 5}, TensorShape{1}});
  369. }
  370. #undef DEF_IMPL
  371. #undef DEF_IMPL_CHAIN
  372. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}