From 3246ee5e87be2c1cb3f59d5ca59e89bc105600dc Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Sun, 27 Sep 2020 21:05:06 +0800 Subject: [PATCH] perf(mge): use DepType::HOST_VALUE in trace when possible GitOrigin-RevId: 5d47ed263fe0c5d65f86d53d61bd1c427139c06d --- imperative/python/src/graph_rt.cpp | 6 +++--- imperative/src/impl/opr_utility.cpp | 15 +++++++++++++-- .../src/include/megbrain/imperative/opr_utility.h | 4 +++- imperative/src/test/opr_utility.cpp | 22 ++++++++++++++++++++++ 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/imperative/python/src/graph_rt.cpp b/imperative/python/src/graph_rt.cpp index f8c5fed7..fcfbd55e 100644 --- a/imperative/python/src/graph_rt.cpp +++ b/imperative/python/src/graph_rt.cpp @@ -483,13 +483,13 @@ void init_graph_rt(py::module m) { }, py::arg(), py::arg(), py::arg(), py::arg() = py::none(), py::arg() = py::tuple(), py::arg("graph") = py::none()); - auto output_callback = [](auto callback, const std::vector& inputs, bool borrow = false) { + auto output_callback = [](auto callback, const std::vector& inputs, bool borrow = false, bool prefer_host_value = false) { SymbolVarArray sinputs; for (auto i : inputs) { sinputs.emplace_back(i); } static_assert(!std::is_reference::value); - opr::OutputCallback::Param param{std::move(callback), borrow}; + opr::OutputCallback::Param param{std::move(callback), borrow, prefer_host_value}; auto output = opr::OutputCallback::make(std::move(param), sinputs); return output.node(); }; @@ -519,7 +519,7 @@ void init_graph_rt(py::module m) { hv_with_event.second->record(); p->set(std::move(hv_with_event)); }; - return output_callback(std::move(f), std::move(inputs), true); + return output_callback(std::move(f), std::move(inputs), true, true); }); m.def("attr_output_callback", [output_callback](std::shared_ptr> p, std::vector inputs) { diff --git a/imperative/src/impl/opr_utility.cpp b/imperative/src/impl/opr_utility.cpp index 4990f61e..cf0bf2a6 100644 --- a/imperative/src/impl/opr_utility.cpp +++ b/imperative/src/impl/opr_utility.cpp @@ -144,13 +144,24 @@ cg::OperatorNodeBase::NodeProp* OutputCallback::do_make_node_prop() const { prop->add_flag(NodeProp::Flag::NO_AUTOMATIC_DUP); SmallVector dep_types(input().size(), NodeProp::DepType::DEV_COMP_ORDER); - dep_types[0] = NodeProp::DepType::DEV_VALUE; + using IT = cg::static_infer::InferType; + auto host_value_avail = [&]() -> bool { + auto inp = input(0); + auto it = owner_graph()->static_infer_manager().get_infer_type(inp).value; + return it & (IT::CONST | IT::RT_STATIC | IT::MISSING_INP); + }; + m_use_host_value = m_param.prefer_host_value && host_value_avail(); + dep_types[0] = m_use_host_value ? NodeProp::DepType::HOST_VALUE : NodeProp::DepType::DEV_VALUE; prop->reset_dep_type(input(), dep_types); return prop; } void OutputCallback::scn_do_execute() { - m_param.callback(input(0)->dev_tensor()); + if (m_use_host_value) { + m_param.callback(owner_graph()->static_infer_manager().infer_value(input(0))); + } else { + m_param.callback(input(0)->dev_tensor()); + } } cg::OperatorNodeBase* OutputCallback::shallow_copy( diff --git a/imperative/src/include/megbrain/imperative/opr_utility.h b/imperative/src/include/megbrain/imperative/opr_utility.h index 9054c217..14f5f272 100644 --- a/imperative/src/include/megbrain/imperative/opr_utility.h +++ b/imperative/src/include/megbrain/imperative/opr_utility.h @@ -60,7 +60,8 @@ public: using callback_t = thin_function; struct Param { callback_t callback; - bool borrow = false; + bool borrow = false; // do not obtain shared ownership on DeviceTensorND + bool prefer_host_value = false; // use host value when possible }; OutputCallback(Param param, const VarNodeArray& inputs, @@ -81,6 +82,7 @@ protected: NodeProp* do_make_node_prop() const override; private: Param m_param; + mutable bool m_use_host_value; }; MGB_DEFINE_OPR_CLASS(NopCallback, cg::OperatorNodeBase) // { diff --git a/imperative/src/test/opr_utility.cpp b/imperative/src/test/opr_utility.cpp index c808d2cd..a078d419 100644 --- a/imperative/src/test/opr_utility.cpp +++ b/imperative/src/test/opr_utility.cpp @@ -13,6 +13,7 @@ #include "megbrain/opr/io.h" #include "megbrain/opr/basic_arith.h" #include "megbrain/opr/utility.h" +#include "megbrain/opr/tensor_manip.h" #include "megbrain/test/helper.h" using namespace mgb; @@ -50,6 +51,27 @@ TEST(TestOprUtility, OutputCallback) { MGB_ASSERT_TENSOR_EQ(hy, *hx); } +TEST(TestOprUtility, OutputCallbackPreferHost) { + HostTensorGenerator<> gen; + auto hx = gen({2, 3}); + auto graph = ComputingGraph::make(); + auto x = opr::Host2DeviceCopy::make(*graph, hx); + x = opr::GetVarShape::make(x); + HostTensorND hy; + auto callback = [&hy](DeviceTensorND dv) {hy.copy_from(dv);}; + opr::OutputCallback::Param param{callback}; + param.prefer_host_value = true; + auto dummy = opr::OutputCallback::make(param, x); + auto y = opr::VirtualDep::make({x, dummy}); + + ComputingGraph::OutputSpec outspec{{y, [](DeviceTensorND&){}}}; + auto func = graph->compile(outspec); + func->execute(); + ASSERT_TRUE(hy.comp_node() == CompNode::default_cpu()); + ASSERT_EQ(hy.ptr()[0], 2); + ASSERT_EQ(hy.ptr()[1], 3); +} + TEST(TestOprUtility, NopCallback) { HostTensorGenerator<> gen; auto hx = gen({2, 3});