From 8c2b916ef59fd28782c2a813cc801ae58c8649c0 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Tue, 15 Feb 2022 16:32:58 +0800 Subject: [PATCH] refactor(imperative): remove some methods in proxy graph GitOrigin-RevId: 1fb68a1da290dc0de8976c0537acc708bab07fa3 --- imperative/src/impl/proxy_graph.cpp | 171 ------------------------ imperative/src/impl/proxy_graph.h | 31 ----- imperative/src/impl/proxy_graph/mini_graph.h | 26 ++-- imperative/src/impl/proxy_graph/proxy_graph.cpp | 17 --- imperative/src/impl/proxy_graph_detail.cpp | 77 ----------- 5 files changed, 9 insertions(+), 313 deletions(-) diff --git a/imperative/src/impl/proxy_graph.cpp b/imperative/src/impl/proxy_graph.cpp index 002b92dc..4c8aac45 100644 --- a/imperative/src/impl/proxy_graph.cpp +++ b/imperative/src/impl/proxy_graph.cpp @@ -121,22 +121,6 @@ private: }; MGB_DYN_TYPE_OBJ_FINAL_IMPL(ProxyGraph::InputPlaceholder); -class ProxyGraph::ExecEnv final : public cg::GraphExecutable::ExecEnv { -public: - void dispatch_on_comp_node(CompNode, Task&& task) override { task(); } - - void dispatch_on_comp_node_with_mask( - CompNode, Task&& task, cg::ExecutionMask* mask) override { - mgb_throw_if( - mask, GraphError, "ExecutionMask not supported in imperative mode"); - task(); - } - - void pause_exec() override {} - - void resume_exec() override {} -}; - class ProxyGraph::StaticInferManager : public cg::static_infer::StaticInferManager { public: using Tag = cg::static_infer::Tag; @@ -183,26 +167,8 @@ public: } InferType get_infer_type(Tag var) override { - // may be called during get_proxy_opr or make_backward_graph - // don't let opr apply any immediate optimization return {InferType::MISSING_INP, InferType::MISSING_INP}; - - if (auto opr = var->owner_opr()->try_cast_final()) { - return {var->shape().ndim ? InferType::CONST : InferType::MISSING_INP, - opr->m_tensor ? InferType::CONST : InferType::MISSING_INP}; - } - if (cur_opr) { - auto&& outputs = cur_opr->output(); - auto&& it = std::find(outputs.begin(), outputs.end(), var); - if (it != outputs.end()) { - return {infer_shape_fallible(var) ? InferType::CONST - : InferType::MISSING_INP, - // value inference could be expensive - InferType::MISSING_INP}; - } - } - return {InferType::MISSING_INP, InferType::MISSING_INP}; } void update() { @@ -471,7 +437,6 @@ std::atomic ProxyGraph::ProxyGraphImpl::m_node_id = 0; ProxyGraph::ProxyGraph() : m_graph(ProxyGraphImpl::make(this)), - m_env{new ExecEnv}, m_static_infer_manager(new StaticInferManager(this)), m_seq_comp_node_optimizer(new SeqCompNodeOptimizer()) {} @@ -506,32 +471,6 @@ private: /*********************** Physical Tensor Impl ***********************/ -SmallVector ProxyGraph::infer_output_attrs( - const OpDef& opdef, const SmallVector& inputs) { - SmallVector ret; - CUR_OPR_GUARD(get_proxy_opr(opdef, inputs)); - ::mgb::opr::intl::WorkspaceLimitHook::set_impl( - m_graph.get(), ProxyGraph::get_workspace_limit); - do_shape_infer(true); - for (auto&& i : m_cur_opr->usable_output()) { - mgb_assert(i->dtype().valid() && i->comp_node().valid()); - mgb_assert(i->shape().ndim || i->contain_flag(VarNode::Flag::NO_SYS_MEM_ALLOC)); - ret.push_back({{i->shape(), i->dtype()}, i->comp_node()}); - } - return ret; -} - -void ProxyGraph::invoke_op( - const OpDef& opdef, const SmallVector& inputs, - const SmallVector& outputs, const SmallVector& workspaces) { - CUR_OPR_GUARD(get_proxy_opr(opdef, inputs)); - init_output_tensor(outputs, workspaces); - for (auto oup : m_cur_opr->output()) { - m_graph->add_used_comp_node(oup->comp_node()); - } - m_cur_opr->execute(*m_env); -} - void ProxyGraph::cleanup() { if (m_cur_opr) { for (auto&& i : m_cur_opr->input()) { @@ -545,102 +484,8 @@ void ProxyGraph::cleanup() { m_cur_opr = nullptr; } -void ProxyGraph::init_output_tensor( - const SmallVector& outputs, const SmallVector& workspaces) { - // get proxy opr - auto proxy = m_cur_opr; - - auto get_workspace_size = [=](CompNode cn, size_t old_limit) { - size_t limit = 0; - for (auto&& var : workspaces) { - limit += var->dtype().size(var->shape().total_nr_elems()); - } - return limit; - }; - ::mgb::opr::intl::WorkspaceLimitHook::set_impl(m_graph.get(), get_workspace_size); - do_shape_infer(true); - - size_t j = 0; - size_t k = 0; - for (auto&& var : proxy->output()) { - auto&& chk = var->m_mem_plan.reset_from_owner_var().chunk(); - if (var->contain_flag(VarNode::Flag::VOLATILE_CONTENT)) { - // workspace - if (workspaces.size()) { - mgb_assert(k < workspaces.size()); - auto&& layout = workspaces[k]->layout(); - mgb_assert( - var->comp_node() == workspaces[k]->comp_node() && - var->shape().eq_shape(layout) && var->dtype() == layout.dtype); - var->m_dev_tensor = workspaces[k]->dev_tensor(); - ++k; - } else { - TensorLayout layout{var->shape(), var->dtype(), var->format()}; - var->m_dev_tensor = BlobManager::inst()->alloc_workspace_with_defrag( - var->comp_node(), layout); - } - } else { - mgb_assert(j < outputs.size()); - auto&& tensor = outputs[j]; - auto&& layout = tensor->layout(); - mgb_assert( - var->comp_node() == tensor->comp_node() && - var->shape().eq_shape(layout) && var->dtype() == layout.dtype); - var->assign_dev_tensor_from_tensor(tensor->dev_tensor()); - ++j; - } - chk.mem_alloc_status.set_from_owner_var(); - } - mgb_assert(j == outputs.size()); - mgb_assert(k == workspaces.size()); - - // Memory forwarding was bypassed in megbrain with graph option - // imerative_proxy_graph on, here we call mem_plan_fwd_in2out_readonly - // to initialize some opr(e.g. Subtensor)'s internal state - // TODO: implement memory forwarding - proxy->mem_plan_fwd_in2out_readonly(); - { - // some opr (e.g. Reduce) rely on on_mem_status_changed to set - // input/output tensor corretly, since we bypass var_node_mem_mgr - // on_mem_status_changed should be called here - auto&& cb = proxy->get_opr_event_callback().on_mem_status_changed; - if (cb.valid()) { - cb.val()(); - } - } -} - -cg::OperatorNodeBase* ProxyGraph::get_proxy_opr( - const OpDef& opdef, const SmallVector& inputs) { - VarNodeArray vinputs(inputs.size()); - for (size_t i = 0; i < inputs.size(); ++i) { - vinputs[i] = InputPlaceholder::make(*m_graph, *inputs[i]).node(); - } - auto opr = OpDef::apply_on_var_node(opdef, vinputs)[0]->owner_opr(); - mgb_assert(!opr->same_type()); - for (auto&& i : opr->input()) { - mgb_assert(i->owner_opr()->same_type()); - } - return opr; -} - /*********************** Logical Tensor Impl ***********************/ -std::tuple, bool> ProxyGraph:: - infer_output_attrs_fallible( - const OpDef& opdef, const SmallVector& inputs) { - // this function is just a placeholder - // it will be overrided by ProxyGraphTypeI::infer_output_attrs_fallible in minigraph - mgb_assert(0); -} - -struct ProxyGraph::GradGraph { - cg::VarNodeArray inputs; - cg::VarNodeArray outputs; - cg::VarNodeArray output_grads; - cg::VarNode* grad; -}; - EncodedSubgraph ProxyGraph::make_backward_graph( const OpDef& opdef, const SmallVector& input_descs, const SmallVector& input_requires_grad, @@ -793,22 +638,6 @@ VarNodeArray ProxyGraph::make_input_place_holders( /*********************** Common Impl ***********************/ -bool ProxyGraph::do_shape_infer(bool sync_value) { - m_static_infer_manager->update(); - - bool validated = true; - for (auto* var : m_cur_opr->output()) { - if (sync_value) { - var->shape(m_static_infer_manager->infer_shape(var)); - } else if (auto* shape = m_static_infer_manager->infer_shape_fallible(var)) { - var->shape(*shape); - } else { - validated = false; - } - } - return validated; -} - TensorPtr ProxyGraph::as_tensor(cg::OperatorNodeBase* opr, bool share) { // TODO : maybe some tensor should copy value from origin opr rather than // share the RawStorage diff --git a/imperative/src/impl/proxy_graph.h b/imperative/src/impl/proxy_graph.h index a7a1a6ba..56d193b6 100644 --- a/imperative/src/impl/proxy_graph.h +++ b/imperative/src/impl/proxy_graph.h @@ -27,44 +27,22 @@ public: static std::unique_ptr get_async_error() { return std::move(tm_async_error); } - static size_t get_workspace_limit(CompNode cn, size_t old_limit) { - size_t free = cn.get_free_mem(); - size_t lmt = cn.get_max_block_size_available(); - return std::max(lmt, free); - } /********************** Physical Tensor API **********************/ - SmallVector infer_output_attrs( - const OpDef& opdef, const SmallVector& inputs); - - void invoke_op( - const OpDef& opdef, const SmallVector& inputs, - const SmallVector& outputs, const SmallVector& workspace); - EncodedSubgraph make_backward_graph( const OpDef& opdef, const SmallVector& input_descs, const SmallVector& input_requires_grad, const SmallVector& output_has_grad); - /********************** Logical Tensor API **********************/ - - size_t get_opr_output_size( - const OpDef& opdef, const SmallVector& inputs); - - std::tuple, bool> infer_output_attrs_fallible( - const OpDef& opdef, const SmallVector& inputs); - private: ProxyGraph(); class ProxyGraphImpl; - class ExecEnv; class StaticInferManager; class SeqCompNodeOptimizer; class InputPlaceholder; struct ProxyGraphInst; - struct GradGraph; class CurOprGuard; void reset(); @@ -73,12 +51,6 @@ private: void cleanup(); - void init_output_tensor( - const SmallVector& outputs, const SmallVector& workspace); - - cg::OperatorNodeBase* get_proxy_opr( - const OpDef& opdef, const SmallVector& inputs); - /********************** Logical Tensor Helper **********************/ cg::VarNodeArray make_input_place_holders( @@ -86,14 +58,11 @@ private: /********************** Common Helper **********************/ - bool do_shape_infer(bool sync_value); - TensorPtr as_tensor(cg::OperatorNodeBase* opr, bool share = true); cg::OperatorNodeBase* m_cur_opr = nullptr; std::unique_ptr m_graph; size_t m_max_op_cnt = 100; - std::unique_ptr m_env; std::unique_ptr m_static_infer_manager; std::unique_ptr m_seq_comp_node_optimizer; diff --git a/imperative/src/impl/proxy_graph/mini_graph.h b/imperative/src/impl/proxy_graph/mini_graph.h index ee605774..69ba85df 100644 --- a/imperative/src/impl/proxy_graph/mini_graph.h +++ b/imperative/src/impl/proxy_graph/mini_graph.h @@ -801,18 +801,19 @@ public: return ret; } - SmallVector infer_output_attrs( - const OpDef& def, const SmallVector& inputs) { - SmallVector descs; - auto& minigraph = get_cached_minigraph(def, inputs); + SmallVector apply_on_physical_tensor( + const OpDef& def, SmallVector inputs) { + auto raw_inputs = to_raw_ptr_array(inputs); + auto& minigraph = get_cached_minigraph(def, raw_inputs); auto _ = scoped_attach(&minigraph); - auto sess = minigraph.infer_session(inputs); + auto sess = minigraph.infer_session(raw_inputs); + ::mgb::opr::intl::WorkspaceLimitHook::set_impl( + minigraph.opr()->owner_graph(), get_workspace_limit); // some output var in minigraph.opr()->output() may not appears in // minigraph.opr()->usable_output() bug execution may use the attrs for those // output var, so we infer attrs for all outputs, but only return // LogicalTensorDesc for minigraph.opr()->usable_output() - ::mgb::opr::intl::WorkspaceLimitHook::set_impl( - minigraph.opr()->owner_graph(), get_workspace_limit); + SmallVector output_descs; for (size_t i = 0; i < minigraph.opr()->output().size(); ++i) { auto* shape = sess.infer(sess.output_data[i].shape_infer, true); mgb_assert(shape); @@ -825,15 +826,9 @@ public: mgb_assert( ovar->shape().ndim || ovar->contain_flag(VarNode::Flag::NO_SYS_MEM_ALLOC)); - descs.push_back({{ovar->shape(), ovar->dtype()}, ovar->comp_node()}); + output_descs.push_back({{ovar->shape(), ovar->dtype()}, ovar->comp_node()}); } - return descs; - } - SmallVector apply_on_physical_tensor( - const OpDef& def, SmallVector inputs) { - auto raw_inputs = to_raw_ptr_array(inputs); - auto output_descs = infer_output_attrs(def, raw_inputs); SmallVector outputs(output_descs.size(), {}); for (size_t i = 0; i < outputs.size(); i++) { outputs[i] = @@ -853,11 +848,8 @@ public: } } } - auto& minigraph = get_cached_minigraph(def, raw_inputs); - auto _ = scoped_attach(&minigraph); // some opr (e.g. Subtensor) may invoke infer_value during execution, // so we need create inference session here - auto sess = minigraph.infer_session(raw_inputs); minigraph.execute(raw_inputs, raw_outputs, m_env); for (auto&& cn : used_cns) { for (auto&& in : inputs) { diff --git a/imperative/src/impl/proxy_graph/proxy_graph.cpp b/imperative/src/impl/proxy_graph/proxy_graph.cpp index 07a47d0d..2b26905d 100644 --- a/imperative/src/impl/proxy_graph/proxy_graph.cpp +++ b/imperative/src/impl/proxy_graph/proxy_graph.cpp @@ -10,11 +10,6 @@ */ #include "./mini_graph.h" -#if 0 -// ../proxy_graph.h is deprecated, leave here for debug purpose -// uncomment #if 0 macro to debug -#include "../proxy_graph.h" -#endif namespace mgb::imperative::proxy_graph { MGB_DYN_TYPE_OBJ_FINAL_IMPL(ProxyGraph::InputPlaceholder); @@ -28,18 +23,6 @@ std::tuple, bool> infer_output_attrs_fallible( const OpDef& def, const SmallVector& inputs) { auto ret = proxy_graph::ProxyGraphTypeI::inst().infer_output_attrs_fallible( def, inputs); -#if 0 - // delete me after the new implementation is stable - auto ref = ProxyGraph::get_default_graph()->infer_output_attrs_fallible(def, inputs); - auto& [a, _1] = ret; - auto& [b, _2] = ref; - if (a.size() != b.size()) mgb_trap(); - for (size_t i = 0; i < a.size(); ++i) { - if (a[i].layout.dtype != b[i].layout.dtype) mgb_trap(); - if (a[i].comp_node != b[i].comp_node) mgb_trap(); - if (!a[i].layout.eq_shape(b[i].layout)) mgb_trap(); - } -#endif return ret; } diff --git a/imperative/src/impl/proxy_graph_detail.cpp b/imperative/src/impl/proxy_graph_detail.cpp index 07d0d584..f15be26f 100644 --- a/imperative/src/impl/proxy_graph_detail.cpp +++ b/imperative/src/impl/proxy_graph_detail.cpp @@ -17,83 +17,6 @@ namespace mgb { namespace imperative { namespace proxy_graph_detail { -// those functions are reimplemented with opr cache -// in ./proxy_graph/mini_graph.h -#if 0 -namespace { -SmallVector to_raw_ptr_array( - const SmallVector& inputs, bool ensure_storage = true) { - SmallVector ret; - for (auto&& i : inputs) { - mgb_assert(i); - ret.push_back(i.get()); - if (ensure_storage) { - // apply lazy allocation - i->blob()->storage(); - } - } - return ret; -} - -SmallVector infer_output_attrs( - const OpDef& def, const SmallVector& inputs) { - auto&& graph = ProxyGraph::get_default_graph(); - return graph->infer_output_attrs(def, to_raw_ptr_array(inputs)); -} -} // anonymous namespace - -void exec( - const OpDef& def, const SmallVector& inputs, - const SmallVector& outputs, - const SmallVector& workspaces) { - auto&& graph = ProxyGraph::get_default_graph(); - auto raw_inputs = to_raw_ptr_array(inputs), raw_outputs = to_raw_ptr_array(outputs), - raw_workspaces = to_raw_ptr_array(workspaces); - CompNode::UnorderedSet used_cns; - for (auto&& out : raw_outputs) { - auto cn = out->comp_node(); - if (used_cns.insert(cn).second) { - for (auto&& in : inputs) { - if (in->comp_node() != cn) { - auto&& e = in->get_or_create_event(); - e->device_wait_by(cn); - } - } - } - } - graph->invoke_op(def, raw_inputs, raw_outputs, raw_workspaces); - for (auto&& cn : used_cns) { - for (auto&& in : inputs) { - if (in->comp_node() != cn) { - in->add_release_callback(cn); - } - } - } -} - -SmallVector apply_on_physical_tensor( - const OpDef& def, SmallVector inputs) { - auto output_descs = infer_output_attrs(def, inputs); - SmallVector outputs(output_descs.size(), {}); - for (size_t i = 0; i < outputs.size(); i++) { - outputs[i] = Tensor::make(output_descs[i].layout, output_descs[i].comp_node); - } - exec(def, inputs, outputs, {}); - auto async_error = ProxyGraph::get_async_error(); - if (async_error) { - throw *async_error; - } - return outputs; -} - - -std::tuple, bool> infer_output_attrs_fallible(const OpDef& def, - const SmallVector& inputs) { - auto&& graph = ProxyGraph::get_default_graph(); - return graph->infer_output_attrs_fallible(def, inputs); -} -#endif - EncodedSubgraph make_backward_graph( const OpDef& def, const SmallVector& inputs, const SmallVector& input_requires_grad,