From: @ni100die Reviewed-by: @xchu42,@ji_chen Signed-off-by:tags/v1.2.0
@@ -434,7 +434,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||||
"Assign node %s continuous input memory failed.", node->GetName().c_str()) | "Assign node %s continuous input memory failed.", node->GetName().c_str()) | ||||
} | } | ||||
for (auto pair : memory_offset_) { | for (auto pair : memory_offset_) { | ||||
GELOGD("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first, | |||||
GELOGD("After reassign continuous memory, memory type = %ld, mem_offset = %zu.", pair.first, | |||||
pair.second.mem_offset_); | pair.second.mem_offset_); | ||||
} | } | ||||
return ge::SUCCESS; | return ge::SUCCESS; | ||||
@@ -512,11 +512,11 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx()); | auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx()); | ||||
output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first); | output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first); | ||||
peer_op_desc->SetOutputOffset(output_list); | peer_op_desc->SetOutputOffset(output_list); | ||||
GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld", node->GetName().c_str(), | |||||
GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld.", node->GetName().c_str(), | |||||
out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), | out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), | ||||
output_list_this.at(out2ins.begin()->first), peer_output_offset); | output_list_this.at(out2ins.begin()->first), peer_output_offset); | ||||
} else { | } else { | ||||
GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu", node->GetName().c_str(), | |||||
GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu.", node->GetName().c_str(), | |||||
out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size()); | out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size()); | ||||
} | } | ||||
// first input is beginning offset | // first input is beginning offset | ||||
@@ -542,7 +542,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
} | } | ||||
GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " | GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " | ||||
"size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), | |||||
"size[%zu] realsize[%ld] nopadding size[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), | |||||
peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(), | peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(), | ||||
output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, | output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, | ||||
is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); | is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); | ||||
@@ -1549,7 +1549,7 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly( | |||||
auto continuous_type = iter->second; | auto continuous_type = iter->second; | ||||
bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); | bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); | ||||
if (continuous_input) { | if (continuous_input) { | ||||
GELOGI("node %s 's precursor node %s need assign continuous input memory, store node firstly.", | |||||
GELOGI("Node %s 's precursor node %s need assign continuous input memory, store node firstly.", | |||||
input_continuous_node->GetName().c_str(), in_node->GetName().c_str()); | input_continuous_node->GetName().c_str(), in_node->GetName().c_str()); | ||||
return false; | return false; | ||||
} | } | ||||
@@ -1559,7 +1559,7 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly( | |||||
node_2_continuous_type.emplace(out_node, continuous_type); | node_2_continuous_type.emplace(out_node, continuous_type); | ||||
bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); | bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); | ||||
if (continuous_input) { | if (continuous_input) { | ||||
GELOGI("node %s 's succeed node %s need assign continuous input memory, store node firstly.", | |||||
GELOGI("Node %s 's succeed node %s need assign continuous input memory, store node firstly.", | |||||
input_continuous_node->GetName().c_str(), out_node->GetName().c_str()); | input_continuous_node->GetName().c_str(), out_node->GetName().c_str()); | ||||
return false; | return false; | ||||
} | } | ||||
@@ -366,8 +366,11 @@ void ModelBuilder::InitL1FusionOption() { | |||||
string buffer_optimize = "off_optimize"; | string buffer_optimize = "off_optimize"; | ||||
graphStatus ret = ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); | graphStatus ret = ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); | ||||
if (ret == GRAPH_SUCCESS) { | if (ret == GRAPH_SUCCESS) { | ||||
is_l1_fusion_enable_ = (buffer_optimize == "l1_optimize"); | |||||
GELOGD("The value of %s is %s.", BUFFER_OPTIMIZE.c_str(), buffer_optimize.c_str()); | |||||
bool off_superkernel = false; | |||||
(void)AttrUtils::GetBool(compute_graph_, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel); | |||||
is_l1_fusion_enable_ = ((buffer_optimize == "l1_optimize") && (!off_superkernel)); | |||||
GELOGI("Compute graph %s the value of %s is %s, superkernel flag %d.", compute_graph_->GetName().c_str(), | |||||
BUFFER_OPTIMIZE.c_str(), buffer_optimize.c_str(), is_l1_fusion_enable_); | |||||
} else { | } else { | ||||
GELOGW("The value of %s is empty.", kEnableL1Fusion.c_str()); | GELOGW("The value of %s is empty.", kEnableL1Fusion.c_str()); | ||||
} | } | ||||
@@ -709,7 +712,7 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) { | |||||
GE_TIMESTAMP_START(SetInputOutputOffset); | GE_TIMESTAMP_START(SetInputOutputOffset); | ||||
SetInputOutputOffsetPass input_output_offset; | SetInputOutputOffsetPass input_output_offset; | ||||
GE_CHK_STATUS_RET(input_output_offset.Run(compute_graph_), "Set input output offset failed."); | GE_CHK_STATUS_RET(input_output_offset.Run(compute_graph_), "Set input output offset failed."); | ||||
GE_TIMESTAMP_END(SetInputOutputOffset, "SetInputOutputOffsetPass::Run."); | |||||
GE_TIMESTAMP_END(SetInputOutputOffset, "SetInputOutputOffsetPass::Run"); | |||||
// Compile single op in graph build stage | // Compile single op in graph build stage | ||||
GE_TIMESTAMP_START(CompileSingleOp); | GE_TIMESTAMP_START(CompileSingleOp); | ||||
@@ -532,20 +532,20 @@ Status DavinciModel::DoTaskSink() { | |||||
GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed."); | GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed."); | ||||
if (known_node_) { | if (known_node_) { | ||||
GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed."); | |||||
GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node's args failed"); | |||||
} | } | ||||
GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed."); | |||||
GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed"); | |||||
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); | |||||
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed"); | |||||
GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed."); | |||||
GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed"); | |||||
GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed."); | |||||
GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed"); | |||||
GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed."); | |||||
GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed"); | |||||
GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed."); | |||||
GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed"); | |||||
GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_)); | GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_)); | ||||
@@ -3090,6 +3090,15 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra | |||||
sub_graph->SetSessionID(session_id); | sub_graph->SetSessionID(session_id); | ||||
sub_graph->SetGraphID(graph_node->GetGraphId()); | sub_graph->SetGraphID(graph_node->GetGraphId()); | ||||
} | } | ||||
bool off_superkernel = false; | |||||
if (AttrUtils::GetBool(compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) { | |||||
GELOGI("Compute graph %s get superkernel flag %d.", compute_graph->GetName().c_str(), off_superkernel); | |||||
if (!AttrUtils::SetBool(merged_compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) { | |||||
GELOGE(FAILED, "Compute graph %s set superkernel flag %d failed", merged_compute_graph->GetName().c_str(), | |||||
off_superkernel); | |||||
return FAILED; | |||||
} | |||||
} | |||||
GE_TIMESTAMP_EVENT_END(MergeSubgraph, "OptimizeSubgraph::MergeSubGraph"); | GE_TIMESTAMP_EVENT_END(MergeSubgraph, "OptimizeSubgraph::MergeSubGraph"); | ||||
GE_DUMP(merged_compute_graph, "mergedComputeGraph"); | GE_DUMP(merged_compute_graph, "mergedComputeGraph"); | ||||
compute_graph = merged_compute_graph; | compute_graph = merged_compute_graph; | ||||
@@ -111,8 +111,9 @@ void DynamicStitchKernel::ComputeMergedShape(const vector<ConstGeTensorPtr> &inp | |||||
int32_t merged_first_dim = 0; | int32_t merged_first_dim = 0; | ||||
int64_t indices_shape_size = 0; | int64_t indices_shape_size = 0; | ||||
for (int i = 0; i < n_; i++) { | for (int i = 0; i < n_; i++) { | ||||
indices_shape_size = input[i]->GetTensorDesc().GetShape().GetShapeSize(); | |||||
indices_shape_size = indices_shape_size == 0 ? 1 : indices_shape_size; | |||||
// shape is [] means scalar | |||||
indices_shape_size = | |||||
input[i]->GetTensorDesc().GetShape().GetDims().empty() ? 1 : input[i]->GetTensorDesc().GetShape().GetShapeSize(); | |||||
const int32_t *input_indices = reinterpret_cast<const int32_t *>(input[i]->GetData().data()); | const int32_t *input_indices = reinterpret_cast<const int32_t *>(input[i]->GetData().data()); | ||||
for (int64_t j = 0; j < indices_shape_size; j++) { | for (int64_t j = 0; j < indices_shape_size; j++) { | ||||
merged_first_dim = std::max(merged_first_dim, input_indices[j]); | merged_first_dim = std::max(merged_first_dim, input_indices[j]); | ||||