From b8f810ee31bdd6139e977e5cc4fcfc1c7857f7c8 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Mon, 25 Oct 2021 19:57:49 +0800 Subject: [PATCH] feat(megbrain): add mc20 runtime opr GitOrigin-RevId: 2ee07b213bbdf465621842b072f3b450165c286a --- src/core/impl/comp_node/comp_node.cpp | 10 +- src/opr/impl/mc20_runtime_op.cpp | 379 +++++++++++++++++++++++++ src/opr/impl/mc20_runtime_op.oprdecl | 17 ++ src/opr/impl/mc20_runtime_op.sereg.h | 72 +++++ src/opr/include/megbrain/opr/mc20_runtime_op.h | 83 ++++++ 5 files changed, 556 insertions(+), 5 deletions(-) create mode 100644 src/opr/impl/mc20_runtime_op.cpp create mode 100644 src/opr/impl/mc20_runtime_op.oprdecl create mode 100644 src/opr/impl/mc20_runtime_op.sereg.h create mode 100644 src/opr/include/megbrain/opr/mc20_runtime_op.h diff --git a/src/core/impl/comp_node/comp_node.cpp b/src/core/impl/comp_node/comp_node.cpp index c37c6247..af2d13df 100644 --- a/src/core/impl/comp_node/comp_node.cpp +++ b/src/core/impl/comp_node/comp_node.cpp @@ -173,11 +173,11 @@ CompNode::Locator CompNode::Locator::parse(const std::string& id) { dev_type = DeviceType::CAMBRICON; ptr += 9; } else if (ptr[0] == 'm') { - if (strncmp(ptr, "multithread", 11)) { - err(); - } - dev_type = DeviceType::MULTITHREAD; - ptr += 11; + if (strncmp(ptr, "multithread", 11)) { + err(); + } + dev_type = DeviceType::MULTITHREAD; + ptr += 11; } else { if (ptr[1] != 'p' || ptr[2] != 'u') { diff --git a/src/opr/impl/mc20_runtime_op.cpp b/src/opr/impl/mc20_runtime_op.cpp new file mode 100644 index 00000000..79ce214f --- /dev/null +++ b/src/opr/impl/mc20_runtime_op.cpp @@ -0,0 +1,379 @@ +/** + * \file src/opr/impl/mc20_runtime_op.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ + +#include "megbrain/opr/mc20_runtime_op.h" +#include "megbrain/common.h" +#include "megbrain/graph/event.h" +#include "megdnn/dtype.h" + +#include + +#if MGB_MC20 + +using namespace mgb; +using namespace opr; + +namespace { +TensorShape mc20_shape_to_mgb_shape(AX_NPU_SDK_EX_TENSOR_META_T tensor_meta) { + TensorShape ret; + ret.ndim = tensor_meta.nShapeNDim; + for (size_t i = 0; i < ret.ndim; ++i) { + ret[i] = tensor_meta.pShape[i]; + } + return ret; +} +DType mc20_dtype_to_mgb_dtype(AX_NPU_SDK_EX_ADV_TENSOR_DTYPE data_type) { + switch (data_type) { + case AX_NPU_TDT_UINT8: + return dtype::Uint8(); + case AX_NPU_TDT_FLOAT32: + return dtype::Float32(); + case AX_NPU_TDT_INT16: + return dtype::Int16(); + case AX_NPU_TDT_INT32: + return dtype::Int32(); + default: + mgb_throw( + MegBrainError, "MC20DataType %d is not supported by MegBrain.", + static_cast(data_type)); + } +} + +}; // namespace + +constexpr AX_NPU_SDK_EX_HANDLE_T MC20RuntimeOpr::INVALID_MODEL_HANDLE; + +/* ====================== MC20RuntimeOpr ==================== */ +MGB_DYN_TYPE_OBJ_FINAL_IMPL(MC20RuntimeOpr); +MC20RuntimeOpr::MC20RuntimeOpr( + SharedBuffer buf, AX_NPU_SDK_EX_HANDLE_T model_handle, + const VarNodeArray& inputs, const OperatorNodeConfig& config) + : Super(inputs[0]->owner_graph(), config, "mc20_runtime", inputs), + m_buffer{std::move(buf)}, + m_model_handle(model_handle) { + mgb_assert( + inputs[0]->comp_node().device_type() == CompNode::DeviceType::MC20, + "MC20RuntimeOpr can only be used on mc20 comp node; " + "got %s", + inputs[0]->comp_node().to_string().c_str()); + + for (auto i : inputs) { + add_input({i}); + } + if (m_model_handle == INVALID_MODEL_HANDLE) { + MGB_MC20_CHECK(AX_NPU_SDK_EX_Create_handle( + &m_model_handle, m_buffer.data(), m_buffer.size())); + m_is_model_holder = true; + } + + const AX_NPU_SDK_EX_ADV_IO_INFO_T* io_info = + AX_NPU_SDK_EX_ADV_Get_io_info(m_model_handle); + + size_t nr_outputs = io_info->nOutputSize; + bool has_workspace = false; + if (nr_outputs == 1) { + const auto& tensor_meta = *(io_info->pOutputs[0].pTensorMeta); + add_output(std::string(reinterpret_cast(tensor_meta.pName))); + if (tensor_meta.eMemoryType == AX_NPU_MT_VIRTUAL) { + mgb_assert(tensor_meta.nInnerSize > 0); + has_workspace = true; + } + + } else { + for (size_t i = 0; i < nr_outputs; ++i) { + const auto& tensor_meta = *(io_info->pOutputs[i].pTensorMeta); + add_output(std::string(reinterpret_cast(tensor_meta.pName))); + if (tensor_meta.eMemoryType == AX_NPU_MT_VIRTUAL) { + mgb_assert(tensor_meta.nInnerSize > 0); + has_workspace = true; + } + } + } + mgb_assert(has_workspace, "Currently only support model with cpu tail"); + + //! \warning There is no interface in MC20 to get the batch size of + //! model.MC20 supports multi-batch by changing the input of n-batch to n + //! 1-batch input. + mgb_assert( + io_info->nInputSize % inputs.size() == 0, + "The number of inputs in the neu model should be multiple of " + "the number of inputs in megbrain, but got %zu(neu model) vs " + "%zu(mgb model)", + io_info->nInputSize, inputs.size()); + m_model_batch = reinterpret_cast(io_info->nInputSize / inputs.size()); + + add_equivalence_component>(m_buffer.data()); + cg::add_workspace_output(this); +}; + +MC20RuntimeOpr::~MC20RuntimeOpr() { + if (m_is_model_holder) { + MGB_MC20_CHECK(AX_NPU_SDK_EX_Destroy_handle(m_model_handle)); + } +} + +void MC20RuntimeOpr::execute_mc20() { + auto&& mc20_env = CompNodeEnv::from_comp_node(input(0)->comp_node()).mc20_env(); + mc20_env.activate(); + + const AX_NPU_SDK_EX_ADV_IO_INFO_T* io_info = + AX_NPU_SDK_EX_ADV_Get_io_info(m_model_handle); + + AX_NPU_SDK_EX_IO_T npu_io; + memset(&npu_io, 0, sizeof(npu_io)); + size_t batch_size = input(0)->dev_tensor().layout().shape[0]; + for (size_t batch_idx = 0; batch_idx < batch_size; batch_idx += m_model_batch) { + //! prepare input + npu_io.nInputSize = io_info->nInputSize; + auto inputs = std::make_unique(npu_io.nInputSize); + npu_io.pInputs = inputs.get(); + for (size_t i = 0; i < npu_io.nInputSize; i++) { + // get input addr info + size_t inp_idx = reinterpret_cast(i / m_model_batch); + AX_VOID* p_virtual_addr = input(inp_idx)->dev_tensor().raw_ptr(); + AX_U64 phy_addr = MC20MemoryManager::Instance().get_phyaddr(p_virtual_addr); + auto nr_bytes_per_batch = + input(inp_idx)->layout().span().dist_byte() / batch_size; + // add batch offset + p_virtual_addr = reinterpret_cast( + reinterpret_cast(p_virtual_addr) + + nr_bytes_per_batch * (batch_idx + i % m_model_batch)); + phy_addr += nr_bytes_per_batch * (batch_idx + i % m_model_batch); + + MGB_MC20_CHECK(AX_NPU_SDK_EX_ADV_Make_io_buffer( + phy_addr, p_virtual_addr, nr_bytes_per_batch, phy_addr, + p_virtual_addr, nr_bytes_per_batch, &npu_io.pInputs[i])); + } + + //! prepare output + npu_io.nOutputSize = io_info->nOutputSize; + auto outputs = std::make_unique(npu_io.nOutputSize); + npu_io.pOutputs = outputs.get(); + AX_U32 offset = 0; + AX_VOID* inner_virtual_addr_start = nullptr; + AX_U64 inner_phy_addr_start = 0; + // get innder addr form workspace + inner_virtual_addr_start = output(npu_io.nOutputSize)->dev_tensor().raw_ptr(); + inner_phy_addr_start = + MC20MemoryManager::Instance().get_phyaddr(inner_virtual_addr_start); + for (size_t i = 0; i < npu_io.nOutputSize; i++) { + // get output addr info + AX_VOID* p_virtual_addr = output(i)->dev_tensor().raw_ptr(); + AX_U64 phy_addr = 0; + auto nr_bytes_per_batch = + output(i)->layout().span().dist_byte() / batch_size; + // add batch offset + p_virtual_addr = reinterpret_cast( + reinterpret_cast(p_virtual_addr) + + nr_bytes_per_batch * batch_idx); + phy_addr += nr_bytes_per_batch * batch_idx; + + const auto& tensor_meta = *(io_info->pOutputs[i].pTensorMeta); + if (tensor_meta.eMemoryType == AX_NPU_MT_PHYSICAL) { + MGB_MC20_CHECK(AX_NPU_SDK_EX_ADV_Make_io_buffer( + phy_addr, p_virtual_addr, nr_bytes_per_batch, phy_addr, + p_virtual_addr, nr_bytes_per_batch, &npu_io.pOutputs[i])); + } else if (tensor_meta.eMemoryType == AX_NPU_MT_VIRTUAL) { + auto p_inner_virtual_addr = reinterpret_cast( + reinterpret_cast(inner_virtual_addr_start) + offset); + auto innerphy_addr = inner_phy_addr_start + offset; + MGB_MC20_CHECK(AX_NPU_SDK_EX_ADV_Make_io_buffer( + phy_addr, p_virtual_addr, nr_bytes_per_batch, innerphy_addr, + p_inner_virtual_addr, tensor_meta.nInnerSize, + &npu_io.pOutputs[i])); + + offset += tensor_meta.nInnerSize; + } + } + + MGB_MC20_CHECK(AX_NPU_SDK_EX_Run_task_sync(m_model_handle, &npu_io)); + } +} + +void MC20RuntimeOpr::init_output_comp_node() { + //! set output to cpu compnode if has cpu tail + const AX_NPU_SDK_EX_ADV_IO_INFO_T* io_info = + AX_NPU_SDK_EX_ADV_Get_io_info(m_model_handle); + + CompNode input_cn; + for (auto&& i : input()) { + if (!input_cn.valid()) { + input_cn = i->comp_node(); + } else { + mgb_assert( + input_cn.mem_node() == i->comp_node().mem_node(), + "opr %s{%s} requires all input to be on the same memory " + "node expect=%s cur_var=%s cur_cn=%s", + this->cname(), this->dyn_typeinfo()->name, + input_cn.to_string().c_str(), i->cname(), + i->comp_node().to_string().c_str()); + } + } + for (size_t i = 0; i < io_info->nOutputSize; i++) { + //! compnode of the var should be default_cpu as the output will be + //! proxy to user + output(i)->comp_node(CompNode::default_cpu()); + } + //! the last output is workspace, which should be the same as input + output(io_info->nOutputSize)->comp_node(input_cn); +} + +MC20RuntimeOpr::NodeProp* MC20RuntimeOpr::do_make_node_prop() const { + auto ret = Super::do_make_node_prop(); + ret->add_flag(NodeProp::Flag::CROSS_COMP_NODE_MEMORY); + return ret; +} + +void MC20RuntimeOpr::do_execute(ExecEnv& env) { + CompNode cn = output(0)->comp_node(); + auto runner = [this, cn]() { + this->owner_graph()->event().signal_inplace(this, cn); + cn.activate(); + execute_mc20(); + this->owner_graph()->event().signal_inplace(this, cn); + }; + env.dispatch_on_comp_node(cn, runner); + + // Send BeforeKernel/AfterKernel event on every different comp_node + ThinHashSet st = cg::get_opr_comp_node_set(this); + for (auto cn : st) { + auto send_event = [this, cn]() { + this->owner_graph()->event().signal_inplace( + this, cn); + this->owner_graph()->event().signal_inplace( + this, cn); + }; + env.dispatch_on_comp_node(cn, send_event); + } +} + +void MC20RuntimeOpr::on_output_comp_node_stream_changed() { + mgb_throw(SystemError, "comp node of output should not change"); +} + +void MC20RuntimeOpr::get_output_var_shape( + const TensorShapeArray& inp_shape, TensorShapeArray& out_shape) const { + const AX_NPU_SDK_EX_ADV_IO_INFO_T* io_info = + AX_NPU_SDK_EX_ADV_Get_io_info(m_model_handle); + size_t nr_inputs = io_info->nInputSize; + + for (size_t i = 0; i < nr_inputs; ++i) { + const auto& tensor_meta = *(io_info->pInputs[i].pTensorMeta); + auto model_shape = mc20_shape_to_mgb_shape(tensor_meta); + size_t inp_idx = reinterpret_cast(i / m_model_batch); + // enable mutibatch + mgb_assert( + inp_shape[inp_idx][0] % model_shape[0] == 0 && + (inp_shape[inp_idx][0] / model_shape[0]) % m_model_batch == 0, + "input %zu batch is %zu, while model's input batch is %zu", i, + inp_shape[inp_idx][0], model_shape[0]); + model_shape[0] = inp_shape[inp_idx][0]; + mgb_assert( + model_shape.eq_shape(inp_shape[inp_idx]), + "shape mismatch of input %zu, expected: %s got: %s", i, + model_shape.to_string().c_str(), + inp_shape[inp_idx].to_string().c_str()); + } + size_t input_batch = (io_info->pInputs[0].pTensorMeta)->pShape[0]; + //! \warning mc20 sdk implement multi-batch by breaking an n-batch input up + //! into n 1-batch inputs + mgb_assert(input_batch == 1, "input batch: %d, net's input batch: 1", input_batch); + AX_U32 workspace_size = 0; + for (size_t i = 0; i < io_info->nOutputSize; ++i) { + const auto& tensor_meta = *(io_info->pOutputs[i].pTensorMeta); + out_shape[i] = mc20_shape_to_mgb_shape(tensor_meta); + // enable mutibatch + out_shape[i][0] = + out_shape[i][0] * inp_shape[0][0] / input_batch / m_model_batch; + if (tensor_meta.eMemoryType == AX_NPU_MT_VIRTUAL) { + workspace_size += tensor_meta.nInnerSize; + } + } + out_shape.back() = {workspace_size}; +} + +void MC20RuntimeOpr::add_input_layout_constraint() { + //! default contiguous + for (auto i : input()) { + i->add_layout_constraint_contiguous(); + } +} + +void MC20RuntimeOpr::init_output_dtype() { + DType dt_mc20, dt_input; + const AX_NPU_SDK_EX_ADV_IO_INFO_T* io_info = + AX_NPU_SDK_EX_ADV_Get_io_info(m_model_handle); + for (size_t i = 0; i < io_info->nInputSize; ++i) { + dt_mc20 = mc20_dtype_to_mgb_dtype(io_info->pInputs[i].eDType); + size_t inp_idx = reinterpret_cast(i / m_model_batch); + dt_input = input(inp_idx)->dtype(); + mgb_assert( + dt_mc20.valid() && dt_input.valid() && + dt_mc20.enumv() == dt_input.enumv(), + "dtype mismatch of input %zu: expected %s, " + "got %s", + i, dt_mc20.name(), dt_input.name()); + } + + for (size_t i = 0; i < io_info->nOutputSize; ++i) { + dt_mc20 = mc20_dtype_to_mgb_dtype(io_info->pOutputs[i].eDType); + mgb_assert( + dt_mc20.valid(), + "output dtype checking failed: invalid dtype returned."); + if (!output(i)->dtype().valid()) + output(i)->dtype(dt_mc20); + } +} + +SymbolVarArray MC20RuntimeOpr::make( + SharedBuffer buf, const SymbolVarArray& src, const OperatorNodeConfig& config) { + VarNodeArray var_node_array = cg::to_var_node_array(src); + auto mc20_runtime_opr = std::make_unique( + std::move(buf), INVALID_MODEL_HANDLE, var_node_array, config); + auto ret = cg::to_symbol_var_array(src[0].node() + ->owner_graph() + ->insert_opr(std::move(mc20_runtime_opr)) + ->output()); + ret.pop_back(); // remove workspace + return ret; +} + +SymbolVarArray MC20RuntimeOpr::make( + const void* buf, size_t size, const SymbolVarArray& src, + const OperatorNodeConfig& config) { + mgb_throw_if( + !CompNode::get_device_count(CompNode::DeviceType::MC20), SystemError, + "can not create MC20RuntimeOpr when mc20 is not " + "available"); + std::shared_ptr shptr{new uint8_t[size], [](uint8_t* p) { delete[] p; }}; + memcpy(shptr.get(), buf, size); + SharedBuffer buffer{std::move(shptr), size}; + return make(std::move(buffer), src, config); +} + +SymbolVarArray MC20RuntimeOpr::make( + SharedBuffer buf, AX_NPU_SDK_EX_HANDLE_T model_handle, + const SymbolVarArray& src, const OperatorNodeConfig& config) { + VarNodeArray var_node_array = cg::to_var_node_array(src); + auto mc20_runtime_opr = std::make_unique( + std::move(buf), model_handle, var_node_array, config); + auto ret = cg::to_symbol_var_array(src[0].node() + ->owner_graph() + ->insert_opr(std::move(mc20_runtime_opr)) + ->output()); + ret.pop_back(); // remove workspace + return ret; +} + +#endif // MGB_MC20 + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/opr/impl/mc20_runtime_op.oprdecl b/src/opr/impl/mc20_runtime_op.oprdecl new file mode 100644 index 00000000..3cc4c366 --- /dev/null +++ b/src/opr/impl/mc20_runtime_op.oprdecl @@ -0,0 +1,17 @@ +decl_raw_opr( + 'mc20_runtime', + desc='create an operator that could load and run mc20 model', + inputs=[ + Doc('inputs', 'input vars', 'list of :class:`.SymbolVar`'), + Doc('data_bytes', 'serialized mc20 model'), + ], + body=[ + 'assert isinstance(data_bytes, bytes), ' + '"data must be bytes; got {}".format(type(data_bytes))', + 'output = _mgb._Opr.mc20_runtime(inputs, data_bytes, config)', + 'cvt_result_kwargs["explode_single"] = False', + ], +) + +# vim: ft=python + diff --git a/src/opr/impl/mc20_runtime_op.sereg.h b/src/opr/impl/mc20_runtime_op.sereg.h new file mode 100644 index 00000000..dbedb692 --- /dev/null +++ b/src/opr/impl/mc20_runtime_op.sereg.h @@ -0,0 +1,72 @@ +/** + * \file src/opr/impl/mc20_runtime_op.sereg.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ + +#include "megbrain/opr/mc20_runtime_op.h" +#include "megbrain/serialization/sereg.h" + +#if MGB_MC20 +namespace mgb { +using MC20RuntimeOpr = opr::MC20RuntimeOpr; +namespace serialization { + +template <> +struct OprLoadDumpImpl { + static void dump(OprDumpContext& ctx, const cg::OperatorNodeBase& opr_) { + auto&& opr = opr_.cast_final_safe(); + auto&& buf = opr.buffer(); + auto&& name = opr.name(); + ctx.dump_buf_with_len(buf.data(), buf.size()); + ctx.dump_buf_with_len(name.c_str(), name.size()); + } + + static cg::OperatorNodeBase* load( + OprLoadContext& ctx, const cg::VarNodeArray& inputs, + const OperatorNodeConfig& config) { + inputs.at(0)->comp_node().activate(); + auto buf = ctx.load_shared_buf_with_len(); + auto name = ctx.load_shared_buf_with_len(); + std::string c_name(reinterpret_cast(name.data()), name.size()); + OperatorNodeConfig& c_config = const_cast(config); + c_config.name(c_name); + return opr::MC20RuntimeOpr::make( + std::move(buf), cg::to_symbol_var_array(inputs), c_config) + .at(0) + .node() + ->owner_opr(); + } +}; + +} // namespace serialization + +namespace opr { +cg::OperatorNodeBase* opr_shallow_copy_mc20_runtime_opr( + const serialization::OprShallowCopyContext& ctx, + const cg::OperatorNodeBase& opr_, const VarNodeArray& inputs, + const OperatorNodeConfig& config) { + MGB_MARK_USED_VAR(ctx); + auto&& opr = opr_.cast_final_safe(); + return MC20RuntimeOpr::make( + opr.buffer(), opr.model_handle(), cg::to_symbol_var_array(inputs), + config) + .at(0) + .node() + ->owner_opr(); +} + +MGB_SEREG_OPR(MC20RuntimeOpr, 0); +MGB_REG_OPR_SHALLOW_COPY(MC20RuntimeOpr, opr_shallow_copy_mc20_runtime_opr); +} // namespace opr +} // namespace mgb + +#endif + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/opr/include/megbrain/opr/mc20_runtime_op.h b/src/opr/include/megbrain/opr/mc20_runtime_op.h new file mode 100644 index 00000000..fba0f454 --- /dev/null +++ b/src/opr/include/megbrain/opr/mc20_runtime_op.h @@ -0,0 +1,83 @@ +/** + * \file src/opr/include/megbrain/opr/mc20_runtime_op.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ + +#pragma once + +#include +#include "megbrain/comp_node_env.h" +#include "megbrain/graph.h" +#include "megbrain/graph/operator_node.h" +#include "megbrain/serialization/file.h" +#include "megdnn/thin/function.h" + +#if MGB_MC20 +#include "megbrain/mc20/mc20_memory_manager.h" + +namespace mgb { +namespace opr { + +MGB_DEFINE_OPR_CLASS( + MC20RuntimeOpr, cg::OutshapePureByInshapeOpr) // { +public: + using SharedBuffer = mgb::serialization::SharedBuffer; + + void do_execute(ExecEnv& env) override; + void get_output_var_shape( + const TensorShapeArray& inp_shape, + TensorShapeArray& out_shape) const override; + void add_input_layout_constraint() override; + void init_output_dtype() override; + void init_output_comp_node() override; + void on_output_comp_node_stream_changed() override; + + /** + * \brief create MC20RuntimeOpr with buf + */ + MC20RuntimeOpr( + SharedBuffer buf, AX_NPU_SDK_EX_HANDLE_T m_model_handle, + const VarNodeArray& inputs, const OperatorNodeConfig& config); + ~MC20RuntimeOpr(); + + const SharedBuffer& buffer() const { return m_buffer; } + + AX_NPU_SDK_EX_HANDLE_T model_handle() const { return m_model_handle; } + + static SymbolVarArray make( + SharedBuffer buf, const SymbolVarArray& src, + const OperatorNodeConfig& config = {}); + + static SymbolVarArray make( + const void* buf, size_t size, const SymbolVarArray& src, + const OperatorNodeConfig& config = {}); + + static SymbolVarArray make( + SharedBuffer buf, AX_NPU_SDK_EX_HANDLE_T model_handle, + const SymbolVarArray& src, const OperatorNodeConfig& config = {}); + +private: + NodeProp* do_make_node_prop() const override; + + void execute_mc20(); + size_t m_model_batch; + SharedBuffer m_buffer; + constexpr static AX_NPU_SDK_EX_HANDLE_T INVALID_MODEL_HANDLE = nullptr; + AX_NPU_SDK_EX_HANDLE_T m_model_handle = INVALID_MODEL_HANDLE; + //! if set true, it will release model + bool m_is_model_holder = false; +}; // namespace opr + +} // namespace opr +} // namespace mgb + +#endif // MGB_MC20 + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}