/** * \file test/src/autocheck.cpp * * This file is part of MegBrain, a deep learning framework developed by Megvii. * * \copyright Copyright (c) 2014-2021 Megvii Inc. All rights reserved. * */ #include "megbrain/test/autocheck.h" #include "megbrain/opr/basic_arith.h" #include "megbrain/opr/blas.h" #include "megbrain/opr/internal/megdnn_opr_wrapper.h" #include "megbrain/opr/io.h" #include "megbrain/opr/utility.h" #include "megbrain/test/numerical_diff.h" #include using namespace mgb; #define DEF_IMPL(_ret) \ template \ _ret AutoOprChecker #define DEF_IMPL_CHAIN() \ template \ AutoOprChecker& AutoOprChecker DEF_IMPL()::AutoOprChecker(GraphMaker maker, FwdNumeric fwd, CompNode comp_node) : m_fwd(fwd), m_maker(maker), m_comp_node{comp_node} { InputGenerator default_input_gen = [this](HostTensorND& dest) { dest = *m_gen(dest.shape(), m_comp_node); }; for (size_t i = 0; i < nr_inp; ++i) { m_inputs[i] = std::make_shared(m_comp_node, dtype()); m_inputs_generator[i] = default_input_gen; } for (size_t i = 0; i < nr_inp; ++i) { m_inputs_allow_grad[i] = true; } for (size_t i = 0; i < nr_out; ++i) { m_outputs_allow_grad[i] = true; } for (size_t i = 0; i < nr_out; ++i) { m_outputs_allow_check[i] = true; } } DEF_IMPL(void)::build_graph() { mgb_assert(!m_built); m_built = true; m_graph = ComputingGraph::make(); auto&& graph = m_graph; if (m_disable_graph_opt) { graph->options().graph_opt_level = 0; } SymInpArray sym_in; SymbolVar one, zero; { HostTensorND tmp{m_comp_node, mgb::dtype::Float32()}; auto p = tmp.resize({1}).ptr(); p[0] = 1; one = opr::SharedDeviceTensor::make(*graph, tmp, {"one"}); p[0] = 0; zero = opr::SharedDeviceTensor::make(*graph, tmp, {"zero"}); } for (size_t i = 0; i < nr_inp; ++i) { // to trigger graph trans sym_in[i] = opr::Host2DeviceCopy::make(*graph, m_inputs[i], ssprintf("inp%zu", i)); auto dt = sym_in[i].dtype(); auto a = opr::TypeCvt::make(one, dt), b = opr::TypeCvt::make(zero, dt); sym_in[i] = sym_in[i] * a + b; } m_failed = true; auto sym_out = m_maker(sym_in); m_failed = false; for (size_t i = 0; i < nr_out; ++i) { m_outputs_truth[i].comp_node(m_comp_node).dtype(sym_out[i].dtype()); m_outspec_fwd_grad.push_back(make_callback_copy(sym_out[i], m_outputs[i])); } if (!m_need_grad_check) return; SymbolVar loss; bool first_loss = true; for (size_t i = 0; i < nr_out; ++i) { if (m_outputs_allow_grad[i]) { m_loss_p[i] = std::make_shared(m_comp_node, dtype()); auto cur = opr::Dot::make( sym_out[i].flatten(), opr::Host2DeviceCopy::make( *graph, m_loss_p[i], ssprintf("lossp%zu", i))); if (first_loss) { loss = cur; } else { loss = loss + cur; } first_loss = false; } } if (first_loss) { m_need_grad_check = false; return; } auto make_grad = [&](SymbolVar target, SymbolVar wrt) { if (m_use_virtual_grad) return opr::VirtualGrad::make(target, wrt); else return cg::grad(target, wrt); }; auto loss2 = loss * 2; m_outspec_loss.push_back({make_callback_copy(loss, m_loss)}); for (size_t i = 0; i < nr_inp; ++i) if (m_inputs_allow_grad[i]) { SymbolVar g = make_grad(loss, sym_in[i]); auto cb = [this, i](DeviceTensorND& dev) { if (m_should_copy_grad) m_grads[i].copy_from(dev).sync(); }; m_outspec_fwd_grad.push_back({g, cb}); // test grad with a different loss var if (m_need_multi_loss_check) { auto g2 = make_grad(loss2, sym_in[i]); auto cb2 = [this, i](DeviceTensorND& dev) { if (m_should_copy_grad) m_grads_mul2[i].copy_from(dev).sync(); }; m_outspec_fwd_grad.push_back({g2, cb2}); } } } DEF_IMPL()::~AutoOprChecker() { mgb_assert( m_failed || m_run_cnt >= 3, "less than 3 runs for autocheker; some paths not taken"); } DEF_IMPL_CHAIN()::set_input_generator(size_t idx, const InputGenerator& gen) { mgb_assert(!m_built, "cannot set_input_generator after the first run"); mgb_assert(idx < nr_inp); m_inputs_generator[idx] = gen; return *this; } DEF_IMPL_CHAIN()::set_input_coordinator(const InputCoordinator& coord) { mgb_assert(!m_built, "cannot set_input_generator after the first run"); m_input_coordinator = coord; return *this; } DEF_IMPL_CHAIN()::set_input_allow_grad(size_t idx, bool allowed) { mgb_assert(!m_built, "cannot set_input_allow_grad after the first run"); mgb_assert(idx < nr_inp); m_inputs_allow_grad[idx] = allowed; return *this; } DEF_IMPL_CHAIN()::set_input_default_shape(size_t idx, const TensorShape& shape) { mgb_assert(!m_built, "cannot set_input_allow_grad after the first run"); mgb_assert(idx < nr_inp); m_inputs[idx]->resize(shape); return *this; } DEF_IMPL_CHAIN()::set_output_allow_grad(size_t idx, bool allowed) { mgb_assert(!m_built, "cannot set_output_allow_grad after the first run"); mgb_assert(idx < nr_out); m_outputs_allow_grad[idx] = allowed; return *this; } DEF_IMPL_CHAIN()::set_output_allow_check(size_t idx, bool allowed) { mgb_assert(!m_built, "cannot set_output_allow_check after the first run"); mgb_assert(idx < nr_out); m_outputs_allow_check[idx] = allowed; return *this; } DEF_IMPL(void)::do_run(const ShapeInpArray& shapes, const RunOptions& opt) { mgb_assert(m_built); auto failstr = [&](const std::string& type) { std::string ishp_str; for (auto&& i : shapes) { if (!ishp_str.empty()) ishp_str.append(", "); ishp_str.append(i.to_string()); } std::string msg = ssprintf( "%s failed: input shapes: [%s]", type.c_str(), ishp_str.c_str()); if (m_inp_dump_on_error) { std::string extra_msg = m_inp_dump_on_error(m_inputs); if (!extra_msg.empty()) { msg.append("\nextra message:\n"); msg.append(extra_msg); } } if (!m_extra_err_msg.empty()) { msg.append("\nextra message: "); msg.append(m_extra_err_msg); } return msg; }; m_failed = true; // gen input data for (size_t i = 0; i < nr_inp; ++i) { m_inputs[i]->resize(shapes[i]); m_inputs_generator[i](*m_inputs[i]); mgb_assert(m_inputs[i]->shape().eq_shape(shapes[i])); } if (MGB_GETENV("MGB_AUTOCHECK_DUMP_INPUT")) { static size_t run_id; auto fname = output_file(ssprintf("autocheck-inp-%zu.bin", run_id++)); for (size_t i = 0; i < nr_inp; ++i) { write_tensor_to_file(*m_inputs[i], fname.c_str(), i ? 'a' : 'w'); } mgb_log("autocheck: %zu input tensors written to %s", nr_inp, fname.c_str()); } if (m_input_coordinator) m_input_coordinator(m_inputs); // forward for groundtruth m_fwd(m_outputs_truth, m_inputs); for (auto&& i : m_outputs_truth) { i.comp_node().sync(); } // gen loss_p if (m_need_grad_check) { float cur_loss_v = 0; for (size_t i = 0; i < nr_out; ++i) { if (m_outputs_allow_grad[i]) { auto nr = m_outputs_truth[i].shape().total_nr_elems(); if (opt.cont_loss_p) { m_loss_p[i]->resize({nr}); auto ptr = m_loss_p[i]->template ptr(); for (size_t j = 0; j < nr; ++j) ptr[j] = ++cur_loss_v; } else { *m_loss_p[i] = *m_gen({nr}, m_comp_node); auto ptr = m_loss_p[i]->template ptr(); for (size_t j = 0; j < nr; ++j) { auto v = ptr[j]; bool vsign = v > 0; v = std::abs(v) + 0.1; ptr[j] = vsign ? v : -v; } } } } } /* * for each 3 consecutive runs: * 0 and 1: m_func generates loss and grads * 2: m_func generates only grads in fwd, and loss in numdiff * * This scheme is used for recompiling the function a few times, so more * problems can be exposed. */ if (m_run_cnt % 3 == 0) { auto spec = m_outspec_loss; spec.insert(spec.end(), m_outspec_fwd_grad.begin(), m_outspec_fwd_grad.end()); m_func = m_graph->compile(spec); } else if (!m_disable_check_loss_grad_seperate_compile && m_run_cnt % 3 == 2) m_func = m_graph->compile(m_outspec_fwd_grad); m_should_copy_grad = true; m_func->execute(); m_should_copy_grad = false; if (m_on_grad_computed) m_on_grad_computed(m_graph.get(), m_func.get()); for (size_t i = 0; i < nr_out; ++i) { if (m_outputs_allow_check[i]) { MGB_ASSERT_TENSOR_NEAR( m_outputs_truth[i], m_outputs[i], opt.outputs_max_err) << failstr(ssprintf("output[%zu]", i)); } } if (!m_need_grad_check) { m_failed = false; return; } std::vector numgrad_inp(nr_inp); if (!m_disable_check_loss_grad_seperate_compile && m_run_cnt % 3 == 2) m_func = m_graph->compile(m_outspec_loss); for (size_t i = 0; i < nr_inp; ++i) if (m_inputs_allow_grad[i]) numgrad_inp[i] = m_inputs[i].get(); else numgrad_inp[i] = nullptr; auto cost_f = [this] { m_func->execute(); mgb_assert(m_loss.shape().is_scalar()); return m_loss.ptr()[0]; }; std::vector> numdiff_eps; for (size_t i = 0; i < nr_inp; ++i) { if (m_inputs_allow_grad[i]) { float v = opt.numdiff_eps; auto&& sv = opt.numdiff_eps_single_inp[i]; if (sv.valid()) v = sv.val(); numdiff_eps.push_back(v); } else { numdiff_eps.push_back(None); } } auto numgrad = numerical_diff_pt2(numgrad_inp, cost_f, numdiff_eps); auto mul2_inplace = [](HostTensorND& t) -> HostTensorND& { auto ptr = t.ptr::ctype>(); for (size_t j = 0, jt = t.layout().total_nr_elems(); j < jt; ++j) { ptr[j] *= 2; } return t; }; for (size_t i = 0; i < nr_inp; ++i) { if (m_inputs_allow_grad[i]) { auto err = opt.numdiff_max_err; { auto&& se = opt.numdiff_max_err_single_inp[i]; if (se.valid()) err = se.val(); } MGB_ASSERT_TENSOR_NEAR(numgrad.at(i), m_grads[i], err) << failstr(ssprintf("grad[%zu]", i)); // check that grad2 == 2 * grad if (m_need_multi_loss_check) { MGB_ASSERT_TENSOR_NEAR(mul2_inplace(m_grads[i]), m_grads_mul2[i], err) << failstr(ssprintf( "2 * grad[%zu] (grad with another loss var)", i)); } } } m_failed = false; } DEF_IMPL_CHAIN()::run(const ShapeInpArray& shapes, const RunOptions& opt) { if (!m_built) build_graph(); if (m_failed) { mgb_log_error("testcase not executed due to previous error"); return *this; } do_run(shapes, opt); ++m_run_cnt; return *this; } namespace mgb { // explicit instantialization #define I(a, b) \ template class AutoOprChecker; \ template class AutoOprChecker; I(1, 1); I(1, 2); I(1, 3); I(1, 4); I(2, 1); I(2, 2); I(2, 4); I(3, 1); I(3, 2); I(3, 3); I(4, 1); I(5, 1); I(6, 1); #undef I } // namespace mgb TEST(TestAutoCheck, APlusB) { using Checker = AutoOprChecker<2, 1>; auto make_graph = [](const Checker::SymInpArray& inputs) -> Checker::SymOutArray { return {inputs[0] + inputs[1] * inputs[1]}; }; auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) { DeviceTensorND i0, i1, tmp, out; i0.copy_from(*inp[0]); i1.copy_from(*inp[1]); auto opr = opr::intl::create_megdnn_opr(dest[0].comp_node()); using Mode = opr::Elemwise::Mode; opr::Elemwise::perform(Mode::MUL, tmp, {i1, i1}, opr); opr::Elemwise::perform(Mode::ADD, out, {tmp, i0}, opr); dest[0].copy_from(out).sync(); }; Checker(make_graph, fwd) .run({TensorShape{2, 3}, TensorShape{2, 3}}) .run({TensorShape{5, 2, 3}, TensorShape{5, 1, 1}}) .run({TensorShape{2, 3, 4, 5}, TensorShape{1}}); } #undef DEF_IMPL #undef DEF_IMPL_CHAIN // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}