From: @xchu42 Reviewed-by: @wqtshg,@ji_chen Signed-off-by: @lbisdaddytags/v1.2.0
@@ -399,41 +399,6 @@ static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchor | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) { | |||||
if (graph->GetGraphUnknownFlag()) { | |||||
GELOGI("Graph %s is unknown graph, ignore gen_task for constant.", graph->GetName().c_str()); | |||||
return SUCCESS; | |||||
} | |||||
for (auto &node : graph->GetDirectNode()) { | |||||
// CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT | |||||
auto op_desc = node->GetOpDesc(); | |||||
if (op_desc == nullptr) { | |||||
continue; | |||||
} | |||||
auto op_type = op_desc->GetType(); | |||||
if (op_type == NETOUTPUT) { | |||||
for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { | |||||
const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | |||||
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | |||||
NodePtr in_node = peer_out_anchor->GetOwnerNode(); | |||||
GE_CHECK_NOTNULL(in_node); | |||||
std::string in_node_op_type = in_node->GetType(); | |||||
if (in_node_op_type == CONSTANT) { | |||||
GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); | |||||
std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; | |||||
if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) { | |||||
GELOGE(FAILED, "Insert memcpy between %s and %s failed.", | |||||
in_node->GetName().c_str(), node->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { | Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { | ||||
bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag(); | bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag(); | ||||
com_graph->SetGraphUnknownFlag(false); | com_graph->SetGraphUnknownFlag(false); | ||||
@@ -516,9 +481,6 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||||
!sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { | !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { | ||||
continue; | continue; | ||||
} | } | ||||
GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed."); | |||||
if (sub_graph->GetGraphUnknownFlag()) { | if (sub_graph->GetGraphUnknownFlag()) { | ||||
// unknown shape build flow | // unknown shape build flow | ||||
GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), | GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), | ||||
@@ -68,7 +68,7 @@ struct GraphExecutionContext { | |||||
DumpProperties dump_properties; | DumpProperties dump_properties; | ||||
bool trace_enabled = false; | bool trace_enabled = false; | ||||
bool dump_enabled = false; | bool dump_enabled = false; | ||||
std::atomic_bool is_eos_; | |||||
std::atomic_bool is_eos_{false}; | |||||
long profiling_level = 0; | long profiling_level = 0; | ||||
long iteration = 0; | long iteration = 0; | ||||
void *global_step = nullptr; | void *global_step = nullptr; | ||||
@@ -33,9 +33,6 @@ HybridModelExecutor::~HybridModelExecutor() { | |||||
if (context_.rt_gen_context != nullptr) { | if (context_.rt_gen_context != nullptr) { | ||||
(void) rtCtxDestroy(context_.rt_gen_context); | (void) rtCtxDestroy(context_.rt_gen_context); | ||||
} | } | ||||
if (context_.global_step != nullptr) { | |||||
(void) rtFree(context_.global_step); | |||||
} | |||||
} | } | ||||
Status HybridModelExecutor::Init() { | Status HybridModelExecutor::Init() { | ||||
@@ -49,9 +46,10 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { | |||||
GELOGD("Start to execute model."); | GELOGD("Start to execute model."); | ||||
auto root_graph_item = model_->GetRootGraphItem(); | auto root_graph_item = model_->GetRootGraphItem(); | ||||
GE_CHECK_NOTNULL(root_graph_item); | GE_CHECK_NOTNULL(root_graph_item); | ||||
GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration, | |||||
sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream)); | |||||
if (context_.global_step != nullptr) { | |||||
GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration, | |||||
sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream)); | |||||
} | |||||
SubgraphExecutor executor(model_->GetRootGraphItem(), &context_); | SubgraphExecutor executor(model_->GetRootGraphItem(), &context_); | ||||
auto ret = ExecuteGraphInternal(executor, args); | auto ret = ExecuteGraphInternal(executor, args); | ||||
Cleanup(); | Cleanup(); | ||||
@@ -102,8 +100,8 @@ Status HybridModelExecutor::InitExecutionContext() { | |||||
GE_CHK_RT_RET(rtCtxGetCurrent(&context_.rt_context)); | GE_CHK_RT_RET(rtCtxGetCurrent(&context_.rt_context)); | ||||
GE_CHK_RT_RET(rtCtxCreate(&context_.rt_gen_context, RT_CTX_GEN_MODE, 0)); | GE_CHK_RT_RET(rtCtxCreate(&context_.rt_gen_context, RT_CTX_GEN_MODE, 0)); | ||||
GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context)); | GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context)); | ||||
GE_CHK_RT_RET(rtMalloc(&context_.global_step, sizeof(uint64_t), RT_MEMORY_HBM)); | |||||
context_.global_step = model_->GetGlobalStep(); | |||||
context_.stream = stream_; | context_.stream = stream_; | ||||
context_.model = model_; | context_.model = model_; | ||||
context_.is_eos_ = false; | context_.is_eos_ = false; | ||||
@@ -136,6 +134,16 @@ Status HybridModelExecutor::ResetExecutionContext(GraphExecutionContext &context | |||||
string ctx_id = std::to_string(context.context_id); | string ctx_id = std::to_string(context.context_id); | ||||
RuntimeInferenceContext::DestroyContext(ctx_id); | RuntimeInferenceContext::DestroyContext(ctx_id); | ||||
GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); | GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); | ||||
RuntimeInferenceContext *ctx = nullptr; | |||||
GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(ctx_id, &ctx), "Failed to get context"); | |||||
for (auto &host_tensor : context.model->GetHostTensors()) { | |||||
auto node_id = host_tensor.first; | |||||
for (const auto &output_idx_and_tensor : host_tensor.second) { | |||||
auto output_idx = output_idx_and_tensor.first; | |||||
GELOGD("Preload const host tensor, node_id = %ld, output id = %d", node_id, output_idx); | |||||
ctx->SetTensor(node_id, output_idx, output_idx_and_tensor.second.Clone()); | |||||
} | |||||
} | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
} // namespace hybrid | } // namespace hybrid | ||||
@@ -38,6 +38,16 @@ Status StageExecutor::ResetExecutionContext(GraphExecutionContext &context) { | |||||
string ctx_id = std::to_string(context.context_id); | string ctx_id = std::to_string(context.context_id); | ||||
RuntimeInferenceContext::DestroyContext(ctx_id); | RuntimeInferenceContext::DestroyContext(ctx_id); | ||||
GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); | GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); | ||||
RuntimeInferenceContext *ctx = nullptr; | |||||
GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(ctx_id, &ctx), "Failed to get context"); | |||||
for (auto &host_tensor : context.model->GetHostTensors()) { | |||||
auto node_id = host_tensor.first; | |||||
for (const auto &output_idx_and_tensor : host_tensor.second) { | |||||
auto output_idx = output_idx_and_tensor.first; | |||||
GELOGD("Preload const host tensor, node_id = %ld, output id = %d", node_id, output_idx); | |||||
ctx->SetTensor(node_id, output_idx, output_idx_and_tensor.second.Clone()); | |||||
} | |||||
} | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -357,5 +357,25 @@ TensorValue *HybridModel::GetTensor(const NodePtr &node) const { | |||||
return GetVariable(node->GetName()); | return GetVariable(node->GetName()); | ||||
} | } | ||||
const map<int64_t, std::vector<std::pair<int, Tensor>>> &HybridModel::GetHostTensors() const { | |||||
return host_tensors_; | |||||
} | |||||
void *HybridModel::GetGlobalStep() const { | |||||
if (global_step_ == nullptr) { | |||||
return nullptr; | |||||
} | |||||
return global_step_->GetData(); | |||||
} | |||||
TensorBuffer *HybridModel::GetModelWeight(const string &subgraph_name) const { | |||||
auto it = weight_buffer_map_.find(subgraph_name); | |||||
if (it == weight_buffer_map_.end()) { | |||||
GELOGD("Model weight not found, subgraph name = %s", subgraph_name.c_str()); | |||||
return nullptr; | |||||
} | |||||
return it->second.get(); | |||||
} | |||||
} // namespace hybrid | } // namespace hybrid | ||||
} // namespace ge | } // namespace ge |
@@ -45,6 +45,8 @@ class HybridModel { | |||||
return root_runtime_param_.session_id; | return root_runtime_param_.session_id; | ||||
} | } | ||||
void *GetGlobalStep() const; | |||||
GeModelPtr GetGeModel(const NodePtr &node) const; | GeModelPtr GetGeModel(const NodePtr &node) const; | ||||
NodeItem *MutableNodeItem(const NodePtr &node); | NodeItem *MutableNodeItem(const NodePtr &node); | ||||
@@ -91,6 +93,10 @@ class HybridModel { | |||||
TensorValue* GetTensor(const NodePtr &node) const; | TensorValue* GetTensor(const NodePtr &node) const; | ||||
TensorBuffer* GetModelWeight(const std::string &subgraph_name) const; | |||||
const std::map<int64_t, std::vector<std::pair<int, Tensor>>> &GetHostTensors() const; | |||||
const std::vector<domi::TaskDef>* GetTaskDefs(const NodePtr &node) const; | const std::vector<domi::TaskDef>* GetTaskDefs(const NodePtr &node) const; | ||||
const GraphItem *GetRootGraphItem() const; | const GraphItem *GetRootGraphItem() const; | ||||
@@ -146,6 +152,7 @@ class HybridModel { | |||||
std::unique_ptr<GraphItem> root_graph_item_; | std::unique_ptr<GraphItem> root_graph_item_; | ||||
std::map<std::string, std::unique_ptr<GraphItem>> subgraph_items_; | std::map<std::string, std::unique_ptr<GraphItem>> subgraph_items_; | ||||
std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_; | std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_; | ||||
std::map<int64_t, std::vector<std::pair<int, Tensor>>> host_tensors_; | |||||
bool is_new_model_desc_ = false; // support aipp | bool is_new_model_desc_ = false; // support aipp | ||||
bool is_single_op_ = false; | bool is_single_op_ = false; | ||||
@@ -154,10 +161,10 @@ class HybridModel { | |||||
uint32_t device_id_ = 0; | uint32_t device_id_ = 0; | ||||
uint32_t model_id_ = 0; | uint32_t model_id_ = 0; | ||||
uint8_t *var_mem_base_ = nullptr; | uint8_t *var_mem_base_ = nullptr; | ||||
std::unique_ptr<TensorBuffer> weight_buffer_; | |||||
std::map<string, std::unique_ptr<TensorBuffer>> weight_buffer_map_; | std::map<string, std::unique_ptr<TensorBuffer>> weight_buffer_map_; | ||||
RuntimeParam root_runtime_param_; | RuntimeParam root_runtime_param_; | ||||
string om_name_; | string om_name_; | ||||
std::unique_ptr<TensorBuffer> global_step_; | |||||
}; | }; | ||||
} // namespace hybrid | } // namespace hybrid | ||||
} // namespace ge | } // namespace ge | ||||
@@ -145,6 +145,9 @@ Status HybridModelBuilder::Build() { | |||||
GE_CHK_STATUS_RET(InitConstantOps(), "[%s] Failed to init constant op", GetGraphName()); | GE_CHK_STATUS_RET(InitConstantOps(), "[%s] Failed to init constant op", GetGraphName()); | ||||
GE_CHK_STATUS_RET(InitVariableTensors(), "[%s] Failed to init variables", GetGraphName()); | GE_CHK_STATUS_RET(InitVariableTensors(), "[%s] Failed to init variables", GetGraphName()); | ||||
GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName()); | GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName()); | ||||
GE_CHK_STATUS_RET(OptimizeDependenciesForConstantInputs(), | |||||
"[%s] Failed to optimize dependencies for constant inputs", | |||||
GetGraphName()); | |||||
GELOGI("[%s] Done building hybrid model successfully.", GetGraphName()); | GELOGI("[%s] Done building hybrid model successfully.", GetGraphName()); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -346,6 +349,7 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s | |||||
auto src_node_item = MutableNodeItem(src_node); | auto src_node_item = MutableNodeItem(src_node); | ||||
src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx()); | src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx()); | ||||
dependent_for_shape_inference.emplace(src_node); | dependent_for_shape_inference.emplace(src_node); | ||||
host_input_value_dependencies_[&node_item].emplace_back(peer_out_anchor->GetIdx(), src_node_item); | |||||
GELOGD("[%s] Dependent added from output of [%s:%d]", | GELOGD("[%s] Dependent added from output of [%s:%d]", | ||||
node_item.NodeName().c_str(), | node_item.NodeName().c_str(), | ||||
src_node_item->NodeName().c_str(), | src_node_item->NodeName().c_str(), | ||||
@@ -1494,7 +1498,7 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) { | |||||
src_node->GetName().c_str(), | src_node->GetName().c_str(), | ||||
src_op_type.c_str()); | src_op_type.c_str()); | ||||
if (src_op_type != CONSTANTOP && src_op_type != VARIABLE) { | |||||
if (src_op_type != CONSTANTOP && src_op_type != CONSTANT && src_op_type != VARIABLE) { | |||||
continue; | continue; | ||||
} | } | ||||
@@ -1503,6 +1507,9 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) { | |||||
GELOGD("Got parent output index = %u", parent_index); | GELOGD("Got parent output index = %u", parent_index); | ||||
GE_CHECK_LE(parent_index, INT32_MAX); | GE_CHECK_LE(parent_index, INT32_MAX); | ||||
node_item.ref_outputs.emplace(static_cast<int>(parent_index), src_node); | node_item.ref_outputs.emplace(static_cast<int>(parent_index), src_node); | ||||
if (src_op_type == CONSTANTOP || src_op_type == CONSTANT) { | |||||
known_subgraph_constant_output_refs_[&node_item].emplace(parent_index, src_node); | |||||
} | |||||
} | } | ||||
// Data nodes marked with REF_VAR_SRC_VAR_NAME | // Data nodes marked with REF_VAR_SRC_VAR_NAME | ||||
@@ -1568,6 +1575,10 @@ Status HybridModelBuilder::InitModelMem() { | |||||
} | } | ||||
runtime_param_.var_base = hybrid_model_.var_mem_base_; | runtime_param_.var_base = hybrid_model_.var_mem_base_; | ||||
auto allocator = NpuMemoryAllocator::GetAllocator(); | |||||
GE_CHECK_NOTNULL(allocator); | |||||
hybrid_model_.global_step_ = TensorBuffer::Create(allocator, sizeof(int64_t)); | |||||
GE_CHECK_NOTNULL(hybrid_model_.global_step_); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -2127,5 +2138,88 @@ Status HybridModelBuilder::ParseDependentByParallelGroup() { | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status HybridModelBuilder::OptimizeDependenciesForConstantInputs() { | |||||
std::map<NodePtr, std::set<uint32_t>> converted; | |||||
for (auto &it : host_input_value_dependencies_) { | |||||
auto node_item = it.first; | |||||
std::map<NodeItem *, int> ref_counts; | |||||
bool changed = false; | |||||
for (auto output_idx_and_node : it.second) { | |||||
auto output_idx = output_idx_and_node.first; | |||||
auto src_node_item = output_idx_and_node.second; | |||||
++ref_counts[src_node_item]; | |||||
NodePtr constant_node; | |||||
if (src_node_item->node_type == CONSTANT || src_node_item->node_type == CONSTANTOP) { | |||||
constant_node = src_node_item->node; | |||||
GELOGD("src node [%s] is a constant", src_node_item->NodeName().c_str()); | |||||
} else { | |||||
auto iter = known_subgraph_constant_output_refs_.find(src_node_item); | |||||
if (iter != known_subgraph_constant_output_refs_.end()) { | |||||
constant_node = iter->second[output_idx]; | |||||
if (constant_node != nullptr) { | |||||
GELOGD("Output[%u] of subgraph [%s] is a constant", output_idx, src_node_item->NodeName().c_str()); | |||||
} | |||||
} | |||||
} | |||||
if (constant_node == nullptr) { | |||||
GELOGD("Output[%u] of [%s] is not a constant", output_idx, src_node_item->NodeName().c_str()); | |||||
continue; | |||||
} | |||||
if (converted[constant_node].count(output_idx) == 0) { | |||||
GE_CHK_STATUS_RET(Convert2HostTensor(constant_node, src_node_item->node_id, output_idx), | |||||
"[%s] Failed to convert constant to host tensor", constant_node->GetName().c_str()); | |||||
converted[constant_node].emplace(output_idx); | |||||
} | |||||
src_node_item->to_const_output_id_list.erase(output_idx); | |||||
--ref_counts[src_node_item]; | |||||
changed = true; | |||||
} | |||||
if (changed) { | |||||
std::vector<NodePtr> depends_to_keep; | |||||
for (auto &ref_count_it : ref_counts) { | |||||
if (ref_count_it.second == 0) { | |||||
GELOGD("[%s] no longer depends on [%s] for shape inference", | |||||
node_item->NodeName().c_str(), | |||||
ref_count_it.first->NodeName().c_str()); | |||||
} else { | |||||
depends_to_keep.emplace_back(ref_count_it.first->node); | |||||
} | |||||
} | |||||
node_item->dependents_for_shape_inference.swap(depends_to_keep); | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status HybridModelBuilder::Convert2HostTensor(const NodePtr &node, int node_id, uint32_t output_idx) { | |||||
auto tensor_value = hybrid_model_.GetTensor(node); | |||||
GE_CHECK_NOTNULL(tensor_value); | |||||
auto tensor_desc = node->GetOpDesc()->MutableOutputDesc(0); | |||||
GE_CHECK_NOTNULL(tensor_desc); | |||||
Tensor tensor(TensorAdapter::GeTensorDesc2TensorDesc(*tensor_desc)); | |||||
int64_t tensor_size = -1; | |||||
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorSizeInBytes(*tensor_desc, tensor_size), | |||||
"[%s] Failed to get tensor size", node->GetName().c_str()); | |||||
if (tensor_size > 0) { | |||||
auto copy_size = static_cast<size_t>(tensor_size); | |||||
GE_CHECK_GE(tensor_value->GetSize(), copy_size); | |||||
std::vector<uint8_t> buffer(copy_size); | |||||
GE_CHK_RT_RET(rtMemcpy(buffer.data(), | |||||
copy_size, | |||||
tensor_value->GetData(), | |||||
copy_size, | |||||
RT_MEMCPY_DEVICE_TO_HOST)); | |||||
tensor.SetData(std::move(buffer)); | |||||
GELOGD("[%s] Copy constant tensor to host successfully, size = %zu", node->GetName().c_str(), copy_size); | |||||
} | |||||
hybrid_model_.host_tensors_[node_id].emplace_back(output_idx, std::move(tensor)); | |||||
return SUCCESS; | |||||
} | |||||
} // namespace hybrid | } // namespace hybrid | ||||
} // namespace ge | } // namespace ge |
@@ -91,6 +91,8 @@ class HybridModelBuilder { | |||||
Status GenerateBpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list); | Status GenerateBpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list); | ||||
Status GenerateEndProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list); | Status GenerateEndProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list); | ||||
Status GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, vector<domi::TaskDef> &task_def_list); | Status GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, vector<domi::TaskDef> &task_def_list); | ||||
Status OptimizeDependenciesForConstantInputs(); | |||||
Status Convert2HostTensor(const NodePtr &node, int node_id, uint32_t output_idx); | |||||
const char* GetGraphName() const { | const char* GetGraphName() const { | ||||
return hybrid_model_.model_name_.c_str(); | return hybrid_model_.model_name_.c_str(); | ||||
@@ -110,6 +112,12 @@ class HybridModelBuilder { | |||||
RuntimeParam &runtime_param_; | RuntimeParam &runtime_param_; | ||||
VarManager *var_manager_ = nullptr; | VarManager *var_manager_ = nullptr; | ||||
// map<known_node_item, map<output_idx, constant_node>> | |||||
std::map<NodeItem *, std::map<uint32_t, NodePtr>> known_subgraph_constant_output_refs_; | |||||
// map<dst_node_item, vector<output_idx, src_node_item>> | |||||
std::map<NodeItem *, std::vector<std::pair<uint32_t, NodeItem *>>> host_input_value_dependencies_; | |||||
}; | }; | ||||
} // namespace hybrid | } // namespace hybrid | ||||
} // namespace ge | } // namespace ge | ||||
@@ -18,6 +18,7 @@ | |||||
#include "cce/aicpu_engine_struct.h" | #include "cce/aicpu_engine_struct.h" | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "framework/common/fmk_error_codes.h" | #include "framework/common/fmk_error_codes.h" | ||||
#include "common/dump/dump_manager.h" | |||||
#include "common/ge/ge_util.h" | #include "common/ge/ge_util.h" | ||||
#include "graph/attr_value.h" | #include "graph/attr_value.h" | ||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
@@ -110,15 +111,6 @@ Status KnownNodeTask::Init(TaskContext &context) { | |||||
GELOGI("KnownNodeTask::Init mem base is %p, size %lu.", | GELOGI("KnownNodeTask::Init mem base is %p, size %lu.", | ||||
davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size); | davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size); | ||||
} | } | ||||
if (!load_flag_) { | |||||
auto dump_properties = context.GetDumpProperties(); | |||||
if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { | |||||
davinci_model_->SetDumpProperties(dump_properties); | |||||
void *global_step = context.GetExecutionContext()->global_step; | |||||
davinci_model_->SetKnownShapeGlobalStep(global_step); | |||||
} | |||||
load_flag_ = true; | |||||
} | |||||
GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(), | GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(), | ||||
davinci_model_->Id(), davinci_model_->SubModelId()), | davinci_model_->Id(), davinci_model_->SubModelId()), | ||||
"KnownNodeTask::Init destroy aicpu kernel failed."); | "KnownNodeTask::Init destroy aicpu kernel failed."); | ||||
@@ -126,20 +118,35 @@ Status KnownNodeTask::Init(TaskContext &context) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status KnownNodeTask::InitDavinciModel() { | |||||
GELOGD("[Init][Model] start"); | |||||
Status KnownNodeTask::InitDavinciModel(const HybridModel &model, TensorBuffer *weight_buffer) { | |||||
GELOGD("[Init][DavinciModel] start"); | |||||
davinci_model_->InitRuntimeParams(); | davinci_model_->InitRuntimeParams(); | ||||
GE_CHK_STATUS_RET(davinci_model_->InitVariableMem(), "init variable mem failed"); | GE_CHK_STATUS_RET(davinci_model_->InitVariableMem(), "init variable mem failed"); | ||||
int32_t device_id = 0; | int32_t device_id = 0; | ||||
GE_CHK_RT_RET(rtGetDevice(&device_id)); | GE_CHK_RT_RET(rtGetDevice(&device_id)); | ||||
davinci_model_->SetDeviceId(static_cast<uint32_t>(device_id)); | davinci_model_->SetDeviceId(static_cast<uint32_t>(device_id)); | ||||
GE_CHK_STATUS_RET(DoInitDavinciModel(), "[Init][Model] Failed to init davinci model."); | |||||
auto dump_properties = DumpManager::GetInstance().GetDumpProperties(model.GetSessionId()); | |||||
if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { | |||||
davinci_model_->SetDumpProperties(dump_properties); | |||||
void *global_step = model.GetGlobalStep(); | |||||
davinci_model_->SetKnownShapeGlobalStep(global_step); | |||||
} | |||||
void *weight = nullptr; | |||||
size_t weight_size = 0; | |||||
if (weight_buffer != nullptr) { | |||||
weight = weight_buffer->GetData(); | |||||
weight_size = weight_buffer->GetSize(); | |||||
} | |||||
GELOGD("Start to init davinci model, weight size = %zu", weight_size); | |||||
GE_CHK_STATUS_RET(DoInitDavinciModel(weight, weight_size), "[Init][Model] Failed to init davinci model."); | |||||
GELOGD("[Init][Model] success"); | GELOGD("[Init][Model] success"); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status KnownNodeTask::DoInitDavinciModel() { | |||||
return davinci_model_->Init(); | |||||
Status KnownNodeTask::DoInitDavinciModel(void *weight, size_t weight_size) { | |||||
return davinci_model_->Init(nullptr, 0, weight, weight_size); | |||||
} | } | ||||
Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { | Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { | ||||
@@ -165,6 +172,10 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node | |||||
const GeModelPtr ge_model = model.GetGeModel(node); | const GeModelPtr ge_model = model.GetGeModel(node); | ||||
GE_CHECK_NOTNULL(ge_model); | GE_CHECK_NOTNULL(ge_model); | ||||
AscendString graph_name; | |||||
GE_CHK_GRAPH_STATUS_RET(ge_model->GetGraph().GetName(graph_name), "Failed to get graph name"); | |||||
auto weight_buffer = model.GetModelWeight(graph_name.GetString()); | |||||
std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, nullptr); | std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, nullptr); | ||||
GE_CHECK_NOTNULL(davinci_model); | GE_CHECK_NOTNULL(davinci_model); | ||||
@@ -181,7 +192,7 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node | |||||
auto known_node_task = MakeShared<KnownNodeTask>(davinci_model); | auto known_node_task = MakeShared<KnownNodeTask>(davinci_model); | ||||
GE_CHECK_NOTNULL(known_node_task); | GE_CHECK_NOTNULL(known_node_task); | ||||
GE_CHK_STATUS_RET_NOLOG(known_node_task->InitDavinciModel()); | |||||
GE_CHK_STATUS_RET_NOLOG(known_node_task->InitDavinciModel(model, weight_buffer)); | |||||
GELOGI("[%s] KnownNodeExecutor::LoadTask success.", node->GetName().c_str()); | GELOGI("[%s] KnownNodeExecutor::LoadTask success.", node->GetName().c_str()); | ||||
task = std::move(known_node_task); | task = std::move(known_node_task); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -36,13 +36,12 @@ class KnownNodeTask : public NodeTask { | |||||
Status UpdateArgs(TaskContext &context) override; | Status UpdateArgs(TaskContext &context) override; | ||||
Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; | Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; | ||||
Status Init(TaskContext &context) override; | Status Init(TaskContext &context) override; | ||||
Status InitDavinciModel(); | |||||
Status InitDavinciModel(const HybridModel &model, TensorBuffer *weight_buffer); | |||||
protected: | protected: | ||||
virtual Status DoInitDavinciModel(); | |||||
virtual Status DoInitDavinciModel(void *weight, size_t weight_size); | |||||
private: | private: | ||||
std::shared_ptr<DavinciModel> davinci_model_ = nullptr; | std::shared_ptr<DavinciModel> davinci_model_ = nullptr; | ||||
bool load_flag_ = false; | |||||
}; | }; | ||||
class KnownNodeExecutor : public NodeExecutor { | class KnownNodeExecutor : public NodeExecutor { | ||||