@@ -18,14 +18,11 @@ | |||||
#include "common/ge/ge_util.h" | #include "common/ge/ge_util.h" | ||||
#include "common/helper/model_helper.h" | #include "common/helper/model_helper.h" | ||||
#include "common/opskernel/ops_kernel_info_types.h" | #include "common/opskernel/ops_kernel_info_types.h" | ||||
#include "graph/build/logical_stream_allocator.h" | |||||
#include "graph/build/run_context.h" | #include "graph/build/run_context.h" | ||||
#include "graph/build/stream_graph_optimizer.h" | #include "graph/build/stream_graph_optimizer.h" | ||||
#include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
#include "graph/passes/mark_same_addr_pass.h" | |||||
#include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
#include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
#include "graph/common/ge_call_wrapper.h" | |||||
#include "init/gelib.h" | #include "init/gelib.h" | ||||
#include "model/ge_model.h" | #include "model/ge_model.h" | ||||
@@ -37,21 +34,6 @@ const int32_t kInvalidPerfLevel = -1; | |||||
namespace ge { | namespace ge { | ||||
GraphBuilder::GraphBuilder() : build_mode_(BuildMode::GEN_TASK_WITH_FUSION), hcom_parallel_(false) {} | GraphBuilder::GraphBuilder() : build_mode_(BuildMode::GEN_TASK_WITH_FUSION), hcom_parallel_(false) {} | ||||
Status GraphBuilder::MarkGraph(ComputeGraphPtr &graph) { | |||||
GE_CHECK_NOTNULL(graph); | |||||
bool is_unknown_shape = false; | |||||
for (const auto &node : graph->GetDirectNode()) { | |||||
GE_CHK_STATUS_RET(ge::NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown_shape), | |||||
"Get node[%s] shape status failed!", node->GetName().c_str()); | |||||
if (is_unknown_shape) { | |||||
break; | |||||
} | |||||
} | |||||
graph->SetGraphUnknownFlag(is_unknown_shape); | |||||
GELOGD("mark graph [%s] unknown status success! value is %d", graph->GetName().c_str(), is_unknown_shape); | |||||
return SUCCESS; | |||||
} | |||||
void GraphBuilder::SetOptions(const ge::GraphManagerOptions &options) { | void GraphBuilder::SetOptions(const ge::GraphManagerOptions &options) { | ||||
stream_max_parallel_num_ = options.stream_max_parallel_num; | stream_max_parallel_num_ = options.stream_max_parallel_num; | ||||
hcom_parallel_ = options.hcom_parallel; | hcom_parallel_ = options.hcom_parallel; | ||||
@@ -72,7 +54,7 @@ Status GraphBuilder::CalcOpParam(const ge::ComputeGraphPtr &graph) { | |||||
return GE_CLI_GE_NOT_INITIALIZED; | return GE_CLI_GE_NOT_INITIALIZED; | ||||
} | } | ||||
for (const auto &node_ptr : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
for (const auto &node_ptr : graph->GetAllNodes()) { | |||||
GE_CHECK_NOTNULL(node_ptr->GetOpDesc()); | GE_CHECK_NOTNULL(node_ptr->GetOpDesc()); | ||||
std::string kernel_lib_name = node_ptr->GetOpDesc()->GetOpKernelLibName(); | std::string kernel_lib_name = node_ptr->GetOpDesc()->GetOpKernelLibName(); | ||||
if (kernel_lib_name.empty()) { | if (kernel_lib_name.empty()) { | ||||
@@ -120,7 +102,11 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph | |||||
graph->GetName().c_str()); | graph->GetName().c_str()); | ||||
auto parent_op_desc = parent_node_ptr->GetOpDesc(); | auto parent_op_desc = parent_node_ptr->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(parent_op_desc); | GE_CHECK_NOTNULL(parent_op_desc); | ||||
bool is_unknown_shape = graph->GetGraphUnknownFlag(); | |||||
bool is_unknown_shape = false; | |||||
if (!AttrUtils::GetBool(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape)) { | |||||
GELOGE(PARAM_INVALID, "Get op %s unknown shape attr failed.", parent_op_desc->GetName().c_str()); | |||||
return PARAM_INVALID; | |||||
} | |||||
if (is_unknown_shape) { | if (is_unknown_shape) { | ||||
GELOGI("Current graph[%s] is unknown, no need to update parent node[%s] output size.", graph->GetName().c_str(), | GELOGI("Current graph[%s] is unknown, no need to update parent node[%s] output size.", graph->GetName().c_str(), | ||||
parent_node_ptr->GetName().c_str()); | parent_node_ptr->GetName().c_str()); | ||||
@@ -135,14 +121,14 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph | |||||
for (const auto &in_data_anchor : node_ptr->GetAllInDataAnchors()) { | for (const auto &in_data_anchor : node_ptr->GetAllInDataAnchors()) { | ||||
auto index = in_data_anchor->GetIdx(); | auto index = in_data_anchor->GetIdx(); | ||||
ge::GeTensorDesc desc_temp = op_desc->GetInputDesc(index); | ge::GeTensorDesc desc_temp = op_desc->GetInputDesc(index); | ||||
int64_t size = 0; | |||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(desc_temp, size) != SUCCESS, GELOGI("Get size failed!")); | |||||
uint32_t parent_index = 0; | uint32_t parent_index = 0; | ||||
if (!AttrUtils::GetInt(desc_temp, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | if (!AttrUtils::GetInt(desc_temp, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | ||||
GELOGI("NetOutput input tensor %d, attr %s not found.", index, ATTR_NAME_PARENT_NODE_INDEX.c_str()); | |||||
continue; | |||||
GELOGE(INTERNAL_ERROR, "NetOutput input tensor %d, attr %s not found.", index, | |||||
ATTR_NAME_PARENT_NODE_INDEX.c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | } | ||||
int64_t size = 0; | |||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(desc_temp, size) != SUCCESS, GELOGI("Get size failed!")); | |||||
ge::GeTensorDesc parent_desc_temp = parent_op_desc->GetOutputDesc(parent_index); | ge::GeTensorDesc parent_desc_temp = parent_op_desc->GetOutputDesc(parent_index); | ||||
ge::TensorUtils::SetSize(parent_desc_temp, size); | ge::TensorUtils::SetSize(parent_desc_temp, size); | ||||
GE_CHK_STATUS_RET(parent_op_desc->UpdateOutputDesc(parent_index, parent_desc_temp)); | GE_CHK_STATUS_RET(parent_op_desc->UpdateOutputDesc(parent_index, parent_desc_temp)); | ||||
@@ -190,7 +176,7 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, | |||||
auto subgraph_map = graph_partitioner_.GetSubGraphMap(); | auto subgraph_map = graph_partitioner_.GetSubGraphMap(); | ||||
GE_TIMESTAMP_START(BuildSubgraph); | GE_TIMESTAMP_START(BuildSubgraph); | ||||
ge::ModelBuilder builder(session_id, comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_); | |||||
ge::ModelBuilder builder(comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_); | |||||
GE_DUMP(comp_graph, "BeforePreBuildModel"); | GE_DUMP(comp_graph, "BeforePreBuildModel"); | ||||
GE_TIMESTAMP_START(PreBuildModel); | GE_TIMESTAMP_START(PreBuildModel); | ||||
GE_CHK_STATUS_RET(builder.PreBuildModel(), "Graph[%s] builder PreBuildModel() return fail.", | GE_CHK_STATUS_RET(builder.PreBuildModel(), "Graph[%s] builder PreBuildModel() return fail.", | ||||
@@ -243,7 +229,7 @@ Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeMo | |||||
GE_TIMESTAMP_END(CalcOpParam, "GraphBuilder::CalcOpParam"); | GE_TIMESTAMP_END(CalcOpParam, "GraphBuilder::CalcOpParam"); | ||||
GE_DUMP(comp_graph, "AfterCalcOpParam"); | GE_DUMP(comp_graph, "AfterCalcOpParam"); | ||||
Graph2SubGraphInfoList subgraph_map; | Graph2SubGraphInfoList subgraph_map; | ||||
ge::ModelBuilder builder(session_id, comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_); | |||||
ge::ModelBuilder builder(comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_); | |||||
ModelPtr model_ptr = MakeShared<ge::Model>(); | ModelPtr model_ptr = MakeShared<ge::Model>(); | ||||
if (model_ptr == nullptr) { | if (model_ptr == nullptr) { | ||||
return MEMALLOC_FAILED; | return MEMALLOC_FAILED; | ||||
@@ -277,38 +263,51 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||||
GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | ||||
uint64_t session_id) { | uint64_t session_id) { | ||||
GELOGI("Start to build BuildForDynamicShape for dynamic shape."); | GELOGI("Start to build BuildForDynamicShape for dynamic shape."); | ||||
// mark unknown shape attr | |||||
for (auto &sub_graph : comp_graph->GetAllSubgraphs()) { | |||||
auto status = MarkGraph(sub_graph); | |||||
if (status != SUCCESS) { | |||||
GELOGE(FAILED, "mark graph failed!"); | |||||
return status; | |||||
} | |||||
} | |||||
// Update Root Graph Data size | |||||
for (auto &node : comp_graph->GetDirectNode()) { | |||||
for (const auto &node : comp_graph->GetDirectNode()) { | |||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
op_desc->SetStreamId(kInvalidStream); | |||||
if (node->GetType() == DATA) { | if (node->GetType() == DATA) { | ||||
GE_CHK_STATUS_RET(CalcDynShapeRootGraphDataSize(op_desc), "Calc dynamic shape root graph data[%s] size failed.", | GE_CHK_STATUS_RET(CalcDynShapeRootGraphDataSize(op_desc), "Calc dynamic shape root graph data[%s] size failed.", | ||||
op_desc->GetName().c_str()); | op_desc->GetName().c_str()); | ||||
} | } | ||||
} | |||||
// | |||||
for (auto &sub_graph : comp_graph->GetAllSubgraphs()) { | |||||
if (sub_graph->GetGraphUnknownFlag()) { | |||||
// unknown shape build flow | |||||
GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), | |||||
"Build for unknown shape graph failed."); | |||||
} else { | |||||
// known shape build flow | |||||
GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, subgraph_ptr_list, ge_model_ptr, session_id), | |||||
"Build for known shape graph failed."); | |||||
// ATTR_NAME_IS_UNKNOWN_SHAPE is set on "graph partion" stage, but afer fusion , the graph may | |||||
// be changed so here need to renew. For example , the scene followed: | |||||
// (known)partioncall(known) (known)partioncall(known) | |||||
// After fusion | |||||
// | --> | |||||
// (known)Unique(unknown)--->(unknow)Shape(unknown) (known)FuncDef(known) | |||||
// if scene like this , it should be process as known shape graph | |||||
bool is_unknown_shape = false; | |||||
GE_CHK_STATUS_RET(ge::NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown_shape), | |||||
"Get node[%s] shape status failed!", node->GetName().c_str()); | |||||
if (!is_unknown_shape) { | |||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape), return FAILED, | |||||
"Renew node [%s] attr[%s] failed!", node->GetName().c_str(), ATTR_NAME_IS_UNKNOWN_SHAPE.c_str()); | |||||
GELOGD("renew node [%s] attr[%s] success! value is %d", node->GetName().c_str(), | |||||
ATTR_NAME_IS_UNKNOWN_SHAPE.c_str(), is_unknown_shape); | |||||
} | } | ||||
ge_root_model_ptr->SetSubgraphInstanceNameToModel(sub_graph->GetName(), ge_model_ptr); | |||||
} | |||||
vector<string> subgraph_names = op_desc->GetSubgraphInstanceNames(); | |||||
for (auto subgraph_name : subgraph_names) { | |||||
ComputeGraphPtr subgraph = comp_graph->GetSubgraph(subgraph_name); | |||||
bool is_unknown_shape = false; | |||||
if (!AttrUtils::GetBool(op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape)) { | |||||
GELOGE(PARAM_INVALID, "Get op %s unknown shape attr failed.", op_desc->GetName().c_str()); | |||||
return PARAM_INVALID; | |||||
} | |||||
if (is_unknown_shape) { | |||||
// unknown shape build flow | |||||
GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(subgraph, ge_model_ptr, session_id), | |||||
"Build for unknown shape graph failed."); | |||||
} else { | |||||
// known shape build flow | |||||
GE_CHK_STATUS_RET(BuildForKnownShapeGraph(subgraph, subgraph_ptr_list, ge_model_ptr, session_id), | |||||
"Build for known shape graph failed."); | |||||
} | |||||
ge_root_model_ptr->SetSubgraphInstanceNameToModel(subgraph_name, ge_model_ptr); | |||||
} | |||||
} | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -328,9 +327,8 @@ Status GraphBuilder::GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr | |||||
GELOGE(INTERNAL_ERROR, "Get weight memory size fail."); | GELOGE(INTERNAL_ERROR, "Get weight memory size fail."); | ||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
auto var_manager = VarManager::Instance(session_id); | |||||
auto *get_mem_base = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(var_manager->GetVarMemMaxSize())); | |||||
auto *get_mem_base = | |||||
reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(ge::VarManager::Instance(0)->GetVarMemMaxSize())); | |||||
uint8_t *get_weight_mem_base = get_mem_base; | uint8_t *get_weight_mem_base = get_mem_base; | ||||
if (weight_size > 0) { | if (weight_size > 0) { | ||||
get_weight_mem_base = get_mem_base + memory_size; | get_weight_mem_base = get_mem_base + memory_size; | ||||
@@ -356,8 +354,11 @@ Status GraphBuilder::GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr | |||||
return ret; | return ret; | ||||
} | } | ||||
GE_DUMP(comp_graph, "AfterOptimizeStreamedSubGraph"); | GE_DUMP(comp_graph, "AfterOptimizeStreamedSubGraph"); | ||||
auto *get_var_mem_base = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(var_manager->GetVarMemLogicBase())); | |||||
uint64_t var_size = (var_manager->GetVarMemSize(RT_MEMORY_HBM) > 0) ? var_manager->GetVarMemMaxSize() : 0; | |||||
auto *get_var_mem_base = | |||||
reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(ge::VarManager::Instance(0)->GetVarMemLogicBase())); | |||||
uint64_t var_size = (ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) > 0) | |||||
? ge::VarManager::Instance(0)->GetVarMemMaxSize() | |||||
: 0; | |||||
TaskGenerator task_generator(get_var_mem_base, var_size); | TaskGenerator task_generator(get_var_mem_base, var_size); | ||||
ret = task_generator.GetTaskInfo(*model_ptr, comp_graph, session_id, run_context.GetRunContext()); | ret = task_generator.GetTaskInfo(*model_ptr, comp_graph, session_id, run_context.GetRunContext()); | ||||
@@ -367,13 +368,6 @@ Status GraphBuilder::GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr | |||||
Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) { | Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) { | ||||
// set input_desc.size = src_node.output_desc.size | // set input_desc.size = src_node.output_desc.size | ||||
if (node_ptr->GetType() == DATA) { | if (node_ptr->GetType() == DATA) { | ||||
bool is_unknown_shape = false; | |||||
GE_CHK_STATUS_RET(ge::NodeUtils::GetNodeUnknownShapeStatus(*node_ptr, is_unknown_shape), | |||||
"Get data node[%s] shape status failed!", node_ptr->GetName().c_str()); | |||||
if (is_unknown_shape) { | |||||
GELOGD("data node: %s is unknown shape, do not set input size!", node_ptr->GetName().c_str()); | |||||
return SUCCESS; | |||||
} | |||||
if (UpdateDataInputSize(node_ptr) != SUCCESS) { | if (UpdateDataInputSize(node_ptr) != SUCCESS) { | ||||
GELOGE(FAILED, "Update data input size failed."); | GELOGE(FAILED, "Update data input size failed."); | ||||
return FAILED; | return FAILED; | ||||
@@ -404,7 +398,7 @@ Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) { | |||||
GE_CHECK_NOTNULL(input_desc); | GE_CHECK_NOTNULL(input_desc); | ||||
ge::TensorUtils::SetSize(const_cast<GeTensorDesc &>(*input_desc), size); | ge::TensorUtils::SetSize(const_cast<GeTensorDesc &>(*input_desc), size); | ||||
GE_CHK_STATUS_RET(node_op_desc->UpdateInputDesc(in_data_anchor->GetIdx(), *input_desc)); | GE_CHK_STATUS_RET(node_op_desc->UpdateInputDesc(in_data_anchor->GetIdx(), *input_desc)); | ||||
GELOGD("%s input desc, dim_size: %zu, mem_size: %ld, format: %s, type: %s.", node_ptr->GetName().c_str(), | |||||
GELOGD("%s input desc, dim_size: %zu, mem_size: %u, format: %s, type: %s.", node_ptr->GetName().c_str(), | |||||
input_desc->GetShape().GetDimNum(), size, TypeUtils::FormatToSerialString(input_desc->GetFormat()).c_str(), | input_desc->GetShape().GetDimNum(), size, TypeUtils::FormatToSerialString(input_desc->GetFormat()).c_str(), | ||||
TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str()); | TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str()); | ||||
} | } | ||||
@@ -67,7 +67,6 @@ class GraphBuilder { | |||||
GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | ||||
Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | ||||
uint64_t session_id = INVALID_SESSION_ID); | uint64_t session_id = INVALID_SESSION_ID); | ||||
Status MarkGraph(ComputeGraphPtr &graph); | |||||
int build_mode_; | int build_mode_; | ||||
std::map<std::string, int> stream_max_parallel_num_; | std::map<std::string, int> stream_max_parallel_num_; | ||||
@@ -24,6 +24,7 @@ | |||||
#include "graph/label/label_maker.h" | #include "graph/label/label_maker.h" | ||||
namespace ge { | namespace ge { | ||||
LabelAllocator::LabelAllocator(const ComputeGraphPtr &graph) : compute_graph_(graph) {} | LabelAllocator::LabelAllocator(const ComputeGraphPtr &graph) : compute_graph_(graph) {} | ||||
Status LabelAllocator::AssignFunctionalLabels(uint32_t &label_index) { | Status LabelAllocator::AssignFunctionalLabels(uint32_t &label_index) { | ||||
@@ -75,4 +76,5 @@ bool LabelAllocator::CollectFunctionalNode(ComputeGraphPtr &graph, std::set<Node | |||||
(void)functional_nodes.insert(parent); // unique functional node. | (void)functional_nodes.insert(parent); // unique functional node. | ||||
return true; | return true; | ||||
} | } | ||||
} // namespace ge | } // namespace ge |
@@ -22,7 +22,6 @@ | |||||
#include "framework/common/types.h" | #include "framework/common/types.h" | ||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
#include "graph/common/ge_call_wrapper.h" | |||||
using std::map; | using std::map; | ||||
using std::queue; | using std::queue; | ||||
@@ -30,6 +29,12 @@ using std::set; | |||||
using std::string; | using std::string; | ||||
using std::vector; | using std::vector; | ||||
namespace { | |||||
const char *const kAICPUEngineName = "DNN_VM_AICPU"; | |||||
const char *const kAttrNameParentOpType = "parentOpType"; | |||||
const size_t kHeadNodeMaxNum = 820; // calculated by 1024 * 0.8 | |||||
} // namespace | |||||
namespace ge { | namespace ge { | ||||
LogicalStreamPass::LogicalStreamPass(const string &name) : name_(name) {} | LogicalStreamPass::LogicalStreamPass(const string &name) : name_(name) {} | ||||
@@ -49,6 +54,24 @@ bool LogicalStreamPass::HasAssignedStream(const Subgraph &subgraph) const { | |||||
return subgraph.stream_id != kInvalidStream; | return subgraph.stream_id != kInvalidStream; | ||||
} | } | ||||
bool LogicalStreamPass::HasNonConstInputNode(const Subgraph &subgraph) const { | |||||
const SubGraphInfo &subgraph_info = subgraph.subgraph_info; | |||||
const auto &pld_to_end_map = subgraph_info.GetPld2EndMap(); | |||||
for (const auto &pld_to_end : pld_to_end_map) { | |||||
const NodePtr &placeholder = pld_to_end.first; | |||||
if (placeholder != nullptr) { | |||||
string parent_op_type; | |||||
if (AttrUtils::GetStr(placeholder->GetOpDesc(), kAttrNameParentOpType, parent_op_type)) { | |||||
if ((parent_op_type != CONSTANT) && (parent_op_type != CONSTANTOP)) { | |||||
return true; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
return false; | |||||
} | |||||
Status AssignByLabelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) { | Status AssignByLabelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) { | ||||
bool changed = false; | bool changed = false; | ||||
int64_t &next_stream = context.next_stream; | int64_t &next_stream = context.next_stream; | ||||
@@ -110,6 +133,21 @@ Status IndependentStreamPass::Run(ComputeGraphPtr graph, const vector<SubgraphPt | |||||
Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) { | Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) { | ||||
bool changed = false; | bool changed = false; | ||||
if (IsHeadNodeExceeded(subgraphs)) { | |||||
int64_t &next_stream = context.next_stream; | |||||
for (const SubgraphPtr &subgraph : subgraphs) { | |||||
if (!HasAssignedStream(*subgraph)) { | |||||
subgraph->stream_id = next_stream; | |||||
changed = true; | |||||
} | |||||
} | |||||
if (changed) { | |||||
++next_stream; | |||||
return SUCCESS; | |||||
} | |||||
return NOT_CHANGED; | |||||
} | |||||
map<NodePtr, SubgraphPtr> end_subgraph_map; | map<NodePtr, SubgraphPtr> end_subgraph_map; | ||||
map<NodePtr, SubgraphPtr> pld_subgraph_map; | map<NodePtr, SubgraphPtr> pld_subgraph_map; | ||||
InitEndSubgraphMap(subgraphs, end_subgraph_map); | InitEndSubgraphMap(subgraphs, end_subgraph_map); | ||||
@@ -152,6 +190,24 @@ Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vector<SubgraphP | |||||
return changed ? SUCCESS : NOT_CHANGED; | return changed ? SUCCESS : NOT_CHANGED; | ||||
} | } | ||||
bool AssignByDependencyPass::IsHeadNodeExceeded(const vector<SubgraphPtr> &subgraphs) const { | |||||
size_t aicpu_node_num = 0; | |||||
for (const SubgraphPtr &subgraph : subgraphs) { | |||||
if (subgraph->engine_conf.id == kAICPUEngineName && !HasNonConstInputNode(*subgraph)) { | |||||
const SubGraphInfo &subgraph_info = subgraph->subgraph_info; | |||||
auto compute_graph = subgraph_info.GetSubGraph(); | |||||
aicpu_node_num += compute_graph->GetDirectNode().size() - subgraph_info.GetPld2EndMap().size() - | |||||
subgraph_info.GetEnd2PldMap().size(); | |||||
if (aicpu_node_num > kHeadNodeMaxNum) { | |||||
GELOGI("aicpu_node_num, %zu", aicpu_node_num); | |||||
return true; | |||||
} | |||||
} | |||||
} | |||||
return false; | |||||
} | |||||
void AssignByDependencyPass::InitEndSubgraphMap(const vector<SubgraphPtr> &subgraphs, | void AssignByDependencyPass::InitEndSubgraphMap(const vector<SubgraphPtr> &subgraphs, | ||||
map<NodePtr, SubgraphPtr> &end_subgraph_map) { | map<NodePtr, SubgraphPtr> &end_subgraph_map) { | ||||
for (const auto &subgraph : subgraphs) { | for (const auto &subgraph : subgraphs) { | ||||
@@ -671,7 +727,7 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra | |||||
int64_t stream_num = context_.next_stream; | int64_t stream_num = context_.next_stream; | ||||
vector<bool> stream_has_node(stream_num); | vector<bool> stream_has_node(stream_num); | ||||
for (const NodePtr &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
for (const NodePtr &node : graph->GetAllNodes()) { | |||||
if (node != nullptr) { | if (node != nullptr) { | ||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
if (op_desc != nullptr) { | if (op_desc != nullptr) { | ||||
@@ -692,7 +748,7 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra | |||||
} | } | ||||
} | } | ||||
for (const NodePtr &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
for (const NodePtr &node : graph->GetAllNodes()) { | |||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
if (op_desc != nullptr) { | if (op_desc != nullptr) { | ||||
int64_t stream_id = op_desc->GetStreamId(); | int64_t stream_id = op_desc->GetStreamId(); | ||||
@@ -81,6 +81,9 @@ class LogicalStreamPass { | |||||
bool HasStreamLabel(const Subgraph &subgraph) const; | bool HasStreamLabel(const Subgraph &subgraph) const; | ||||
bool HasAssignedStream(const Subgraph &subgraph) const; | bool HasAssignedStream(const Subgraph &subgraph) const; | ||||
// Determine if the input of the subgraph is a constant. | |||||
bool HasNonConstInputNode(const Subgraph &subgraph) const; | |||||
private: | private: | ||||
std::string name_; | std::string name_; | ||||
}; | }; | ||||
@@ -118,6 +121,7 @@ class AssignByDependencyPass : public LogicalStreamPass { | |||||
void UpdateAssignedSubgraphs(Context &context); | void UpdateAssignedSubgraphs(Context &context); | ||||
void UpdateReusedSubgraphs(); | void UpdateReusedSubgraphs(); | ||||
bool IsHeadNodeExceeded(const std::vector<SubgraphPtr> &subgraphs) const; | |||||
bool CouldReuse(const SubgraphPtr &subgraph, const SubgraphPtr &pred_subgraph, | bool CouldReuse(const SubgraphPtr &subgraph, const SubgraphPtr &pred_subgraph, | ||||
const std::map<NodePtr, SubgraphPtr> &pld_subgraph_map); | const std::map<NodePtr, SubgraphPtr> &pld_subgraph_map); | ||||
@@ -18,7 +18,6 @@ | |||||
#include <algorithm> | #include <algorithm> | ||||
#include <sstream> | #include <sstream> | ||||
#include "external/ge/ge_api_types.h" | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/anchor.h" | #include "graph/anchor.h" | ||||
#include "graph/buffer.h" | #include "graph/buffer.h" | ||||
@@ -40,6 +39,7 @@ namespace { | |||||
const char *const kAttrNameWorkspaceReuseFlag = "workspace_reuse_flag"; | const char *const kAttrNameWorkspaceReuseFlag = "workspace_reuse_flag"; | ||||
const char *const kL2FusionDynamicConvergeOp = "l2fusion_dynamic_converge_op"; | const char *const kL2FusionDynamicConvergeOp = "l2fusion_dynamic_converge_op"; | ||||
const char *const kOpNoReuseMem = "no_reuse_mem_flag"; | const char *const kOpNoReuseMem = "no_reuse_mem_flag"; | ||||
const char *const kDisableReuseMemory = "ge.exec.disableReuseMemory"; | |||||
const char *const OP_NO_REUSE_MEM = "OP_NO_REUSE_MEM"; | const char *const OP_NO_REUSE_MEM = "OP_NO_REUSE_MEM"; | ||||
const int kReuseMaxCount = 10; | const int kReuseMaxCount = 10; | ||||
const int kReuseMaxOpNum = 10; | const int kReuseMaxOpNum = 10; | ||||
@@ -133,20 +133,21 @@ bool MemoryBlock::IsSameLabel(std::string &first_batch_label) { | |||||
} | } | ||||
bool CanNotLifeReuse(MemoryBlock *block) { | bool CanNotLifeReuse(MemoryBlock *block) { | ||||
if ((block == nullptr) || !block->reuse_mem_ || block->deleted_block_ || block->continuous_block_) { | |||||
if (block == nullptr || !block->reuse_mem_ || block->deleted_block_ || block->continuous_block_ || | |||||
block->GetLifeEnd() == kMaxLifeTime) { | |||||
return true; | return true; | ||||
} | } | ||||
return false; | return false; | ||||
} | } | ||||
void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life) { | |||||
void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block) { | |||||
if (CanNotLifeReuse(this) || CanNotLifeReuse(block)) { | if (CanNotLifeReuse(this) || CanNotLifeReuse(block)) { | ||||
return; | return; | ||||
} | } | ||||
MemoryBlock *parent = nullptr; | MemoryBlock *parent = nullptr; | ||||
MemoryBlock *child = nullptr; | MemoryBlock *child = nullptr; | ||||
// merge small block to large block | // merge small block to large block | ||||
if (block->GetDependLifeBegin(stream_id_, total_node_depend_stream_life) > GetLifeEnd()) { | |||||
if ((block->GetLifeBegin() > GetLifeEnd()) && (block->stream_id_ == stream_id_)) { | |||||
if ((child_offset_ + block->block_size_) <= block_size_) { | if ((child_offset_ + block->block_size_) <= block_size_) { | ||||
parent = this; | parent = this; | ||||
child = block; | child = block; | ||||
@@ -180,87 +181,6 @@ size_t MemoryBlock::GetLifeBegin() { | |||||
return life_time; | return life_time; | ||||
} | } | ||||
/// |-stream 1-| |-stream 2-| | |||||
/// |--block1--| |--block---| | |||||
/// |--block2--| |--block---| | |||||
/// |--block3--|\ |--block---| | |||||
/// |--block---| \ |--block---| | |||||
/// |--block---| \|--block---| | |||||
/// |--block---| |--block7--| | |||||
/// |--block---| |--block---| | |||||
/// block7's first node's input node's life begin > block2's life end, block7 can reuse block1~block2 | |||||
size_t MemoryBlock::GetDependLifeBegin(int64_t stream_id, DependStreamLife &total_node_depend_stream_life) { | |||||
AddDependLifeBegin(total_node_depend_stream_life); | |||||
auto it = depend_stream_life_.find(stream_id); | |||||
if (it == depend_stream_life_.end()) { | |||||
return 0; | |||||
} | |||||
return it->second; | |||||
} | |||||
void AddDependLife(const ge::NodePtr &org_node, const ge::NodePtr &node, int64_t stream_id, | |||||
std::map<int64_t, size_t> &depend_stream_life, DependStreamLife &total_node_depend_stream_life) { | |||||
GE_CHECK_NOTNULL_EXEC(node, return ); | |||||
auto node_desc = node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL_EXEC(node_desc, return ); | |||||
auto node_id = node_desc->GetId(); | |||||
auto stream_life = total_node_depend_stream_life.find(node_id); | |||||
if (stream_life != total_node_depend_stream_life.end()) { | |||||
for (auto &it : stream_life->second) { | |||||
if (depend_stream_life.find(it.first) == depend_stream_life.end()) { | |||||
depend_stream_life[it.first] = it.second; | |||||
} | |||||
} | |||||
return; | |||||
} | |||||
for (const auto &in_anchor : node->GetAllInAnchors()) { | |||||
GE_CHECK_NOTNULL_EXEC(in_anchor, continue); | |||||
for (auto peer_out_anchor : in_anchor->GetPeerAnchors()) { | |||||
GE_CHECK_NOTNULL_EXEC(peer_out_anchor, continue); | |||||
auto peer_node = peer_out_anchor->GetOwnerNode(); | |||||
GE_CHECK_NOTNULL_EXEC(peer_node, continue); | |||||
auto peer_node_desc = peer_node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL_EXEC(peer_node_desc, continue); | |||||
auto peer_node_stream_id = peer_node_desc->GetStreamId(); | |||||
if (peer_node_stream_id < 0) { | |||||
continue; | |||||
} | |||||
size_t peer_node_life_time = peer_node_desc->GetId(); | |||||
auto it = depend_stream_life.find(peer_node_stream_id); | |||||
if (it == depend_stream_life.end() || peer_node_life_time > it->second) { | |||||
depend_stream_life[peer_node_stream_id] = peer_node_life_time; | |||||
if (peer_node_stream_id != stream_id) { | |||||
GELOGI("Node:%s stream id:%ld depend node:%s stream id:%ld index[%d] life time[%zu].", | |||||
org_node->GetName().c_str(), stream_id, peer_node_desc->GetName().c_str(), peer_node_stream_id, | |||||
peer_out_anchor->GetIdx(), peer_node_life_time); | |||||
} | |||||
AddDependLife(org_node, peer_node, stream_id, depend_stream_life, total_node_depend_stream_life); | |||||
} | |||||
} | |||||
} | |||||
// save on node to save next calculation | |||||
for (auto &it : depend_stream_life) { | |||||
if (total_node_depend_stream_life[node_id].find(it.first) == total_node_depend_stream_life[node_id].end()) { | |||||
total_node_depend_stream_life[node_id][it.first] = it.second; | |||||
} | |||||
} | |||||
} | |||||
void MemoryBlock::AddDependLifeBegin(DependStreamLife &total_node_depend_stream_life) { | |||||
if (!depend_stream_life_.empty()) { | |||||
return; | |||||
} | |||||
if (!node_type_index_list_.empty()) { | |||||
auto node = node_type_index_list_.front().node; | |||||
if (node != nullptr) { | |||||
AddDependLife(node, node, stream_id_, depend_stream_life_, total_node_depend_stream_life); | |||||
} | |||||
} | |||||
depend_stream_life_[stream_id_] = GetLifeBegin(); | |||||
} | |||||
size_t MemoryBlock::GetLifeEnd() { | size_t MemoryBlock::GetLifeEnd() { | ||||
if (!node_type_index_list_.empty()) { | if (!node_type_index_list_.empty()) { | ||||
return node_type_index_list_.back().life_time_end; | return node_type_index_list_.back().life_time_end; | ||||
@@ -382,7 +302,7 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | |||||
if (iter1 == anchor_to_symbol_.end()) { | if (iter1 == anchor_to_symbol_.end()) { | ||||
continue; | continue; | ||||
} | } | ||||
const std::string &symbol = iter1->second; | |||||
std::string symbol = iter1->second; | |||||
auto iter2 = symbol_size_.find(symbol); | auto iter2 = symbol_size_.find(symbol); | ||||
if (iter2 == symbol_size_.end()) { | if (iter2 == symbol_size_.end()) { | ||||
symbol_size_[symbol] = size; | symbol_size_[symbol] = size; | ||||
@@ -397,7 +317,7 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | |||||
all_memory_size.insert(all_memory_size.end(), temp.begin(), temp.end()); | all_memory_size.insert(all_memory_size.end(), temp.begin(), temp.end()); | ||||
} | } | ||||
GELOGI("The last atomic_addr_clean node id: %ld", atomic_addr_clean_id_); | GELOGI("The last atomic_addr_clean node id: %ld", atomic_addr_clean_id_); | ||||
for (const auto &pair : symbol_size_) { | |||||
for (auto &pair : symbol_size_) { | |||||
all_memory_size.emplace_back(pair.second); | all_memory_size.emplace_back(pair.second); | ||||
} | } | ||||
sort(all_memory_size.begin(), all_memory_size.end()); | sort(all_memory_size.begin(), all_memory_size.end()); | ||||
@@ -507,6 +427,14 @@ bool CanReuseBySize(const map<string, uint64_t> &reusable_block_counts, const Me | |||||
return can_reuse; | return can_reuse; | ||||
} | } | ||||
bool CanReuseByStream(const std::unordered_set<int64_t> &reuse_stream, MemoryBlock &reusable_block) { | |||||
bool can_reuse = false; | |||||
if (reuse_stream.find(reusable_block.stream_id_) != reuse_stream.cend()) { | |||||
can_reuse = true; | |||||
} | |||||
return can_reuse; | |||||
} | |||||
bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, | bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, | ||||
uint32_t &peer_input_index) { | uint32_t &peer_input_index) { | ||||
if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { | if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { | ||||
@@ -567,11 +495,11 @@ void BlockMemAssigner::InitReuseFlag() { | |||||
ge::CONSTANT, ge::CONSTANTOP}; | ge::CONSTANT, ge::CONSTANTOP}; | ||||
static const std::set<std::string> kPostReuseTypes = {ge::DATA_TYPE, ge::AIPP_DATA_TYPE, ge::ENTER, | static const std::set<std::string> kPostReuseTypes = {ge::DATA_TYPE, ge::AIPP_DATA_TYPE, ge::ENTER, | ||||
ge::REFENTER, ge::NEXTITERATION, ge::REFNEXTITERATION}; | ge::REFENTER, ge::NEXTITERATION, ge::REFNEXTITERATION}; | ||||
for (const auto &pair : symbol_to_anchors_) { | |||||
for (auto &pair : symbol_to_anchors_) { | |||||
std::string symbol = pair.first; | std::string symbol = pair.first; | ||||
bool pre_reuse_flag = true; | bool pre_reuse_flag = true; | ||||
bool post_reuse_flag = true; | bool post_reuse_flag = true; | ||||
for (const auto &node_index_io : pair.second) { | |||||
for (auto &node_index_io : pair.second) { | |||||
if (node_index_io.io_type_ == kIn) { | if (node_index_io.io_type_ == kIn) { | ||||
continue; | continue; | ||||
} | } | ||||
@@ -585,13 +513,13 @@ void BlockMemAssigner::InitReuseFlag() { | |||||
if (node_index_io.node_->GetOutDataNodes().empty()) { | if (node_index_io.node_->GetOutDataNodes().empty()) { | ||||
out_flg = true; | out_flg = true; | ||||
} | } | ||||
for (const auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { | |||||
for (auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { | |||||
if (IsDirectOutputNode(in_anchor->GetOwnerNode(), in_anchor->GetIdx())) { | if (IsDirectOutputNode(in_anchor->GetOwnerNode(), in_anchor->GetIdx())) { | ||||
out_flg = true; | out_flg = true; | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
const std::string &type = out_anchor->GetOwnerNode()->GetType(); | |||||
std::string type = out_anchor->GetOwnerNode()->GetType(); | |||||
pre_reuse_flag = pre_reuse_flag && !out_flg && (kPreReuseTypes.count(type) == 0); | pre_reuse_flag = pre_reuse_flag && !out_flg && (kPreReuseTypes.count(type) == 0); | ||||
post_reuse_flag = post_reuse_flag && (kPostReuseTypes.count(type) == 0); | post_reuse_flag = post_reuse_flag && (kPostReuseTypes.count(type) == 0); | ||||
if (!pre_reuse_flag && !post_reuse_flag) { | if (!pre_reuse_flag && !post_reuse_flag) { | ||||
@@ -624,7 +552,7 @@ bool BlockMemAssigner::IsPreReuse(const NodePtr &node, uint32_t out_index) const | |||||
return false; | return false; | ||||
} | } | ||||
const std::string &symbol = iter1->second; | |||||
std::string symbol = iter1->second; | |||||
auto iter2 = pre_reuse_flag_.find(symbol); | auto iter2 = pre_reuse_flag_.find(symbol); | ||||
if (iter2 == pre_reuse_flag_.end()) { | if (iter2 == pre_reuse_flag_.end()) { | ||||
return false; | return false; | ||||
@@ -642,7 +570,7 @@ bool BlockMemAssigner::IsPostReuse(const MemoryBlock *mem_block) const { | |||||
if (mem_block == nullptr) { | if (mem_block == nullptr) { | ||||
return false; | return false; | ||||
} | } | ||||
for (const auto &symbol : mem_block->SymbolList()) { | |||||
for (auto &symbol : mem_block->SymbolList()) { | |||||
auto iter = post_reuse_flag_.find(symbol); | auto iter = post_reuse_flag_.find(symbol); | ||||
if (iter == post_reuse_flag_.end()) { | if (iter == post_reuse_flag_.end()) { | ||||
continue; | continue; | ||||
@@ -665,7 +593,8 @@ bool BlockMemAssigner::IsSymbolExist(const NodeIndexIO &node_index_io) { | |||||
if (iter == anchor_to_symbol_.end()) { | if (iter == anchor_to_symbol_.end()) { | ||||
return false; | return false; | ||||
} | } | ||||
return symbol_blocks_.find(iter->second) != symbol_blocks_.end(); | |||||
std::string symbol = iter->second; | |||||
return symbol_blocks_.find(symbol) != symbol_blocks_.end(); | |||||
} | } | ||||
/// | /// | ||||
@@ -674,10 +603,10 @@ bool BlockMemAssigner::IsSymbolExist(const NodeIndexIO &node_index_io) { | |||||
/// @return void | /// @return void | ||||
/// | /// | ||||
void BlockMemAssigner::PrintSymbolMap() { | void BlockMemAssigner::PrintSymbolMap() { | ||||
for (const auto &pair : symbol_to_anchors_) { | |||||
for (auto &pair : symbol_to_anchors_) { | |||||
GELOGD("symbol=%s, max_size=%zu, pre_reuse=%s, post_reuse=%s", pair.first.c_str(), symbol_size_[pair.first], | GELOGD("symbol=%s, max_size=%zu, pre_reuse=%s, post_reuse=%s", pair.first.c_str(), symbol_size_[pair.first], | ||||
pre_reuse_flag_[pair.first] ? "true" : "false", post_reuse_flag_[pair.first] ? "true" : "false"); | pre_reuse_flag_[pair.first] ? "true" : "false", post_reuse_flag_[pair.first] ? "true" : "false"); | ||||
for (const auto &node_index_io : pair.second) { | |||||
for (auto &node_index_io : pair.second) { | |||||
GELOGD("anchor:%s", node_index_io.ToString().c_str()); | GELOGD("anchor:%s", node_index_io.ToString().c_str()); | ||||
} | } | ||||
} | } | ||||
@@ -693,14 +622,15 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
bool is_reuse_memory = false; | bool is_reuse_memory = false; | ||||
string ge_disable_reuse_mem_env = "0"; | string ge_disable_reuse_mem_env = "0"; | ||||
(void)ge::GetContext().GetOption(OPTION_EXEC_DISABLE_REUSED_MEMORY, ge_disable_reuse_mem_env); | |||||
(void)ge::GetContext().GetOption(kDisableReuseMemory, ge_disable_reuse_mem_env); | |||||
if (ge_disable_reuse_mem_env != "1") { | if (ge_disable_reuse_mem_env != "1") { | ||||
bool reuse_mem_flag = !((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]); | bool reuse_mem_flag = !((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]); | ||||
is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !node_op_desc->HasAttr(kOpNoReuseMem) && | is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !node_op_desc->HasAttr(kOpNoReuseMem) && | ||||
reuse_mem_flag && is_op_reuse_mem && (IsPreReuse(n, out_index)); | reuse_mem_flag && is_op_reuse_mem && (IsPreReuse(n, out_index)); | ||||
auto stream_id = node_op_desc->GetStreamId(); | auto stream_id = node_op_desc->GetStreamId(); | ||||
if (is_reuse_memory) { | |||||
for (auto it = reusable_blocks_[stream_id].begin(); it != reusable_blocks_[stream_id].end(); ++it) { | |||||
auto map_iter = reusable_streams_map_.find(stream_id); | |||||
if (is_reuse_memory && map_iter != reusable_streams_map_.end()) { | |||||
for (auto it = reusable_blocks_.begin(); it != reusable_blocks_.end(); ++it) { | |||||
MemoryBlock *reusable_block = *it; | MemoryBlock *reusable_block = *it; | ||||
if (!IsPostReuse(reusable_block)) { | if (!IsPostReuse(reusable_block)) { | ||||
reusable_block->reuse_mem_ = false; | reusable_block->reuse_mem_ = false; | ||||
@@ -710,7 +640,10 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
// A node can reuse blocks of the same stream and preorder streams | // A node can reuse blocks of the same stream and preorder streams | ||||
auto id = GetAtomicAddrCleanId(); | auto id = GetAtomicAddrCleanId(); | ||||
if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous, id)) { | |||||
if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous, id) && | |||||
CanReuseByStream(map_iter->second, *reusable_block)) { | |||||
GELOGD("Cross stream mem reuse, target stream:%ld, current stream:%ld", reusable_block->stream_id_, | |||||
stream_id); | |||||
reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size); | reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size); | ||||
if (mem_type == kOutput) { | if (mem_type == kOutput) { | ||||
auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); | auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); | ||||
@@ -721,7 +654,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
reusable_block->continuous_block_ = continuous; | reusable_block->continuous_block_ = continuous; | ||||
reusable_block->ref_count_++; | reusable_block->ref_count_++; | ||||
ReduceReusableBlockCount(*reusable_block, reusable_block_counts_); | ReduceReusableBlockCount(*reusable_block, reusable_block_counts_); | ||||
reusable_blocks_[stream_id].erase(it); | |||||
reusable_blocks_.erase(it); | |||||
return reusable_block; | return reusable_block; | ||||
} | } | ||||
} | } | ||||
@@ -767,7 +700,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
"Get no align size failed"); | "Get no align size failed"); | ||||
if (IsSymbolExist(node_index_io)) { | if (IsSymbolExist(node_index_io)) { | ||||
const std::string &symbol = anchor_to_symbol_[node_index_io.ToString()]; | |||||
std::string symbol = anchor_to_symbol_[node_index_io.ToString()]; | |||||
block = symbol_blocks_[symbol]; | block = symbol_blocks_[symbol]; | ||||
block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size); | block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size); | ||||
block->ref_count_++; | block->ref_count_++; | ||||
@@ -990,7 +923,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
(void)ge::AttrUtils::GetBool(op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic); | (void)ge::AttrUtils::GetBool(op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic); | ||||
// Allocate memory for the current node and release node memory of the same size in the workspace | // Allocate memory for the current node and release node memory of the same size in the workspace | ||||
GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", | GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", | ||||
ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_[stream_id]);) | |||||
ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_);) | |||||
for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) { | for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) { | ||||
int64_t size = 0; | int64_t size = 0; | ||||
auto output_op_desc = op_desc->GetOutputDescPtr(i); | auto output_op_desc = op_desc->GetOutputDescPtr(i); | ||||
@@ -1044,7 +977,10 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
/// @return Status result | /// @return Status result | ||||
/// | /// | ||||
void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | ||||
(void)ge::GetContext().GetOption(OPTION_EXEC_DISABLE_REUSED_MEMORY, ge_disable_reuse_mem_env_); | |||||
// Init reusable streams map | |||||
InitReusableStreamMap(); | |||||
(void)ge::GetContext().GetOption(kDisableReuseMemory, ge_disable_reuse_mem_env_); | |||||
GEEVENT("Reuse memory %s", ge_disable_reuse_mem_env_ == "1" ? "close" : "open"); | GEEVENT("Reuse memory %s", ge_disable_reuse_mem_env_ == "1" ? "close" : "open"); | ||||
string op_no_reuse_mem_str; | string op_no_reuse_mem_str; | ||||
const char *op_no_reuse_mem = std::getenv(OP_NO_REUSE_MEM); | const char *op_no_reuse_mem = std::getenv(OP_NO_REUSE_MEM); | ||||
@@ -1097,7 +1033,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mem_block == nullptr, continue, "failed to apply memory block."); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mem_block == nullptr, continue, "failed to apply memory block."); | ||||
CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block); | CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block); | ||||
} | } | ||||
ReleaseInputNodeOutMemory(node_out_blocks_, reusable_blocks_[stream_id], n); | |||||
ReleaseInputNodeOutMemory(node_out_blocks_, reusable_blocks_, n); | |||||
} | } | ||||
GELOGD("Assigned memory blocks:"); | GELOGD("Assigned memory blocks:"); | ||||
@@ -1108,7 +1044,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||||
bool merge_dynamic_batch = false; | bool merge_dynamic_batch = false; | ||||
GE_IF_BOOL_EXEC(!(ge_disable_reuse_mem_env_ == "1"), merge_dynamic_batch = MergeDynamicBatchBlocks();) | GE_IF_BOOL_EXEC(!(ge_disable_reuse_mem_env_ == "1"), merge_dynamic_batch = MergeDynamicBatchBlocks();) | ||||
GE_IF_BOOL_EXEC((!(ge_disable_reuse_mem_env_ == "1") && !merge_dynamic_batch), ReuseBlocksByLifeTime(ranges.size());) | |||||
GE_IF_BOOL_EXEC(!merge_dynamic_batch, ReuseBlocksByLifeTime();) | |||||
AssignContinuousBlocks(); | AssignContinuousBlocks(); | ||||
ResizeMemoryBlocks(); | ResizeMemoryBlocks(); | ||||
@@ -1285,11 +1221,7 @@ void BlockMemAssigner::AssignContinuousBlocks() { | |||||
} | } | ||||
} | } | ||||
void BlockMemAssigner::ReuseBlocksByLifeTime(size_t range_size) { | |||||
// 1 means block size is same so no need to do this | |||||
if (range_size <= 1) { | |||||
return; | |||||
} | |||||
void BlockMemAssigner::ReuseBlocksByLifeTime() { | |||||
for (size_t i = 0; i < memory_blocks_.size(); ++i) { | for (size_t i = 0; i < memory_blocks_.size(); ++i) { | ||||
auto parent = memory_blocks_[i]; | auto parent = memory_blocks_[i]; | ||||
if (parent == nullptr || parent->deleted_block_) { | if (parent == nullptr || parent->deleted_block_) { | ||||
@@ -1299,7 +1231,7 @@ void BlockMemAssigner::ReuseBlocksByLifeTime(size_t range_size) { | |||||
parent->reuse_mem_ = false; | parent->reuse_mem_ = false; | ||||
} | } | ||||
for (size_t j = i + 1; j < memory_blocks_.size(); ++j) { | for (size_t j = i + 1; j < memory_blocks_.size(); ++j) { | ||||
parent->AddLifeReuseBlock(memory_blocks_[j], total_node_depend_stream_life_); | |||||
parent->AddLifeReuseBlock(memory_blocks_[j]); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -1386,10 +1318,10 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, siz | |||||
} | } | ||||
GELOGI( | GELOGI( | ||||
"[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]" | "[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]" | ||||
" noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d] isref[%d].", | |||||
" noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d] isref[%d].", | |||||
graph_name.c_str(), op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, | graph_name.c_str(), op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, | ||||
op_desc->GetStreamId(), block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block, | op_desc->GetStreamId(), block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block, | ||||
block->reuse_mem_, block->continuous_block_, block->deleted_block_, node_type.ref_input); | |||||
node_type.ref_input); | |||||
} | } | ||||
void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) { | void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) { | ||||
@@ -1448,6 +1380,139 @@ Status BlockMemAssigner::Assign() { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
void BlockMemAssigner::InitReusableStreamMap() { | |||||
// save a stream's id and its first Node and last node. | |||||
map<int64_t, pair<NodePtr, NodePtr>> stream_head_tail_node_map; | |||||
// save a stream's id and its directly child stream. | |||||
map<int64_t, unordered_set<int64_t>> stream_dependency_map; | |||||
// save a stream's id and its occupied memory. | |||||
unordered_map<int64_t, int64_t> stream_mem_map; | |||||
// Find streams's first and last node. | |||||
FindHeadAndTailNodesForStream(stream_head_tail_node_map, stream_mem_map); | |||||
// If streamB's first node is the output of streamA's last node, then B depends on A. | |||||
FindDependentStream(stream_head_tail_node_map, stream_dependency_map); | |||||
// If a stream has more than one child stream, select the one that occupies the closest memory | |||||
for (const auto &iter : stream_dependency_map) { | |||||
if (iter.second.empty()) { | |||||
continue; | |||||
} | |||||
int64_t target_size = stream_mem_map[iter.first]; | |||||
int64_t min_size_gap = LONG_MAX; | |||||
int64_t target_reuse_stream_id = 0; | |||||
for (auto id : iter.second) { | |||||
if (labs(stream_mem_map[id] - target_size) < min_size_gap) { | |||||
target_reuse_stream_id = id; | |||||
min_size_gap = labs(stream_mem_map[id] - target_size); | |||||
} | |||||
} | |||||
// If b can reuse a, then b should also be able to reuse all blocks that a can reuse. | |||||
reusable_streams_map_[target_reuse_stream_id].insert(reusable_streams_map_[iter.first].begin(), | |||||
reusable_streams_map_[iter.first].end()); | |||||
} | |||||
} | |||||
void BlockMemAssigner::FindHeadAndTailNodesForStream(map<int64_t, pair<NodePtr, NodePtr>> &stream_head_tail_node_map, | |||||
unordered_map<int64_t, int64_t> &stream_mem_map) { | |||||
for (const auto &n : compute_graph_->GetAllNodes()) { | |||||
GE_IF_BOOL_EXEC(n->GetOpDesc() == nullptr, GELOGW("Op desc is nullptr"); continue); | |||||
auto stream_id = n->GetOpDesc()->GetStreamId(); | |||||
// traverse to find streams's first and last node. | |||||
if (stream_head_tail_node_map.find(stream_id) == stream_head_tail_node_map.end()) { | |||||
stream_head_tail_node_map[stream_id] = std::make_pair(n, n); | |||||
reusable_streams_map_[stream_id].insert(stream_id); // a node can reuse blocks from same stream. | |||||
} else { | |||||
stream_head_tail_node_map[stream_id].second = n; | |||||
} | |||||
// Accumulate the output size of the node in the stream. | |||||
for (size_t i = 0; i < n->GetOpDesc()->GetOutputsSize(); i++) { | |||||
int64_t size = 0; | |||||
if (ge::TensorUtils::GetSize(*n->GetOpDesc()->GetOutputDescPtr(static_cast<uint32_t>(i)), size) != SUCCESS) { | |||||
GELOGW("Get output size failed!"); | |||||
continue; | |||||
} | |||||
stream_mem_map[stream_id] += size; | |||||
} | |||||
// Accumulate the workspace size of the node in the stream. | |||||
for (auto size : n->GetOpDesc()->GetWorkspaceBytes()) { | |||||
stream_mem_map[stream_id] += size; | |||||
} | |||||
} | |||||
} | |||||
void BlockMemAssigner::FindDependentStream(map<int64_t, pair<NodePtr, NodePtr>> &stream_head_tail_node_map, | |||||
map<int64_t, unordered_set<int64_t>> &stream_dependency_map) { | |||||
for (const auto &it1 : stream_head_tail_node_map) { | |||||
for (const auto &it2 : stream_head_tail_node_map) { | |||||
if (it1 == it2) { | |||||
continue; | |||||
} | |||||
NodePtr pre_node = it1.second.second; | |||||
NodePtr post_node = it2.second.first; | |||||
std::vector<NodePtr> out_nodes; | |||||
// Direct link out_node | |||||
for (const auto &out_node : pre_node->GetOutNodes()) { | |||||
if ((out_node->GetOpDesc() == nullptr) || (post_node->GetOpDesc() == nullptr) || | |||||
(pre_node->GetOpDesc() == nullptr)) { | |||||
continue; | |||||
} | |||||
out_nodes.emplace_back(out_node); | |||||
} | |||||
FindDependentStreamBetweenGraphs(pre_node, out_nodes); | |||||
for (auto &out_node : out_nodes) { | |||||
if (out_node->GetOpDesc()->GetId() == post_node->GetOpDesc()->GetId()) { | |||||
stream_dependency_map[pre_node->GetOpDesc()->GetStreamId()].insert(post_node->GetOpDesc()->GetStreamId()); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
} | |||||
/// | |||||
/// @ingroup GE | |||||
/// @brief Find dependent link between parent_graph and sub_graph | |||||
/// @param [in] pre_node | |||||
/// @param [out] out_nodes | |||||
/// @return void | |||||
/// @author | |||||
/// | |||||
void BlockMemAssigner::FindDependentStreamBetweenGraphs(const NodePtr &pre_node, std::vector<NodePtr> &out_nodes) { | |||||
if ((pre_node == nullptr) || (pre_node->GetOpDesc() == nullptr)) { | |||||
return; | |||||
} | |||||
// FunctionOp & subgraph input | |||||
std::vector<std::string> subgraph_names = pre_node->GetOpDesc()->GetSubgraphInstanceNames(); | |||||
for (auto &subgraph_name : subgraph_names) { | |||||
ComputeGraphPtr subgraph = compute_graph_->GetSubgraph(subgraph_name); | |||||
if (subgraph == nullptr) { | |||||
continue; | |||||
} | |||||
for (auto &node : subgraph->GetDirectNode()) { | |||||
OpDescPtr op_desc = node->GetOpDesc(); | |||||
if (op_desc == nullptr) { | |||||
continue; | |||||
} | |||||
if (op_desc->HasAttr(ATTR_NAME_PARENT_NODE_INDEX)) { | |||||
out_nodes.emplace_back(node); | |||||
} | |||||
} | |||||
} | |||||
// subgraph output & parent_node output | |||||
if (NodeUtils::IsSubgraphOutput(pre_node)) { | |||||
NodePtr parent_node = pre_node->GetOwnerComputeGraph()->GetParentNode(); | |||||
for (const auto &out_node : parent_node->GetOutNodes()) { | |||||
out_nodes.emplace_back(out_node); | |||||
} | |||||
} | |||||
} | |||||
bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { | bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { | ||||
return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || | return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || | ||||
(node_type == HCOMBROADCAST) || (node_type == HCOMALLREDUCE) || (node_type == CONSTANTOP) || | (node_type == HCOMBROADCAST) || (node_type == HCOMALLREDUCE) || (node_type == CONSTANTOP) || | ||||
@@ -34,8 +34,6 @@ | |||||
namespace ge { | namespace ge { | ||||
const size_t kMaxLifeTime = 0xffffffff; | const size_t kMaxLifeTime = 0xffffffff; | ||||
using DependStreamLife = std::map<int64_t, std::map<int64_t, size_t>>; | |||||
enum MemoryType { kOutput, kWorkspace }; | enum MemoryType { kOutput, kWorkspace }; | ||||
struct NodeTypeIndex { | struct NodeTypeIndex { | ||||
@@ -118,7 +116,7 @@ class MemoryBlock { | |||||
bool IsSameLabel(std::string &first_batch_label); | bool IsSameLabel(std::string &first_batch_label); | ||||
void AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &node_depend_stream_life); | |||||
void AddLifeReuseBlock(MemoryBlock *block); | |||||
void SetLifeTimeEnd(size_t time); | void SetLifeTimeEnd(size_t time); | ||||
@@ -126,10 +124,6 @@ class MemoryBlock { | |||||
size_t GetLifeEnd(); | size_t GetLifeEnd(); | ||||
void AddDependLifeBegin(DependStreamLife &node_depend_stream_life); | |||||
size_t GetDependLifeBegin(int64_t stream_id, DependStreamLife &node_depend_stream_life); | |||||
int ref_count_; | int ref_count_; | ||||
int64_t stream_id_; | int64_t stream_id_; | ||||
bool deleted_block_; | bool deleted_block_; | ||||
@@ -202,6 +196,47 @@ class BlockMemAssigner : public MemAssigner { | |||||
/// | /// | ||||
/// @ingroup GE | /// @ingroup GE | ||||
/// @brief Traversing the compute_graph_ to find the reuse relationship between streams | |||||
/// @param [in] reusable_stream_map map to save stream_id and its reusable stream_ids | |||||
/// @return void | |||||
/// @author | |||||
/// | |||||
void InitReusableStreamMap(); | |||||
/// | |||||
/// @ingroup GE | |||||
/// @brief Traversing the compute_graph_ to find the first and last nodeptr of a stream. | |||||
/// @param [in] stream_head_tail_node_map map to save stream_id and its first and last nodeptr. | |||||
/// @param [in] stream_mem_map map to save stream_id and its memory capacity. | |||||
/// @return void | |||||
/// @author | |||||
/// | |||||
void FindHeadAndTailNodesForStream(std::map<int64_t, std::pair<NodePtr, NodePtr>> &stream_head_tail_node_map, | |||||
std::unordered_map<int64_t, int64_t> &stream_mem_map); | |||||
/// | |||||
/// @ingroup GE | |||||
/// @brief Traversing the compute_graph_ to find the reuse relationship between streams. | |||||
/// @param [in] stream_head_tail_node_map map to save stream_id and its first and last nodeptr. | |||||
/// @param [in] stream_dependency_map map to save stream_id and stream_ids depends on it. | |||||
/// @return void | |||||
/// @author | |||||
/// | |||||
void FindDependentStream(std::map<int64_t, std::pair<NodePtr, NodePtr>> &stream_head_tail_node_map, | |||||
std::map<int64_t, std::unordered_set<int64_t>> &stream_dependency_map); | |||||
/// | |||||
/// @ingroup GE | |||||
/// @brief Find dependent link between parent_graph and sub_graph | |||||
/// @param [in] pre_node | |||||
/// @param [out] out_nodes | |||||
/// @return void | |||||
/// @author | |||||
/// | |||||
void FindDependentStreamBetweenGraphs(const NodePtr &pre_node, std::vector<NodePtr> &out_nodes); | |||||
/// | |||||
/// @ingroup GE | |||||
/// @brief Determine whether it is the type of zero memory node. | /// @brief Determine whether it is the type of zero memory node. | ||||
/// @param [in] node type. | /// @param [in] node type. | ||||
/// @return bool true: is zero memory node; false: is not zero memory node | /// @return bool true: is zero memory node; false: is not zero memory node | ||||
@@ -360,9 +395,9 @@ class BlockMemAssigner : public MemAssigner { | |||||
/// @return void | /// @return void | ||||
/// @author | /// @author | ||||
/// | /// | ||||
void ReuseBlocksByLifeTime(size_t range_size); | |||||
void ReuseBlocksByLifeTime(); | |||||
std::unordered_map<int64_t, std::vector<MemoryBlock *>> reusable_blocks_; | |||||
std::vector<MemoryBlock *> reusable_blocks_; | |||||
std::map<std::string, uint64_t> reusable_block_counts_; | std::map<std::string, uint64_t> reusable_block_counts_; | ||||
@@ -376,6 +411,9 @@ class BlockMemAssigner : public MemAssigner { | |||||
std::unordered_map<std::string, uint32_t> node_continuous_input_counts_; | std::unordered_map<std::string, uint32_t> node_continuous_input_counts_; | ||||
// save stream_id and reusable stream_ids | |||||
std::unordered_map<int64_t, std::unordered_set<int64_t>> reusable_streams_map_; | |||||
// reuse memory | // reuse memory | ||||
vector<string> op_no_reuse_mem_vec_; | vector<string> op_no_reuse_mem_vec_; | ||||
@@ -388,8 +426,6 @@ class BlockMemAssigner : public MemAssigner { | |||||
size_t life_time_; | size_t life_time_; | ||||
int64_t atomic_addr_clean_id_ = 0; | int64_t atomic_addr_clean_id_ = 0; | ||||
DependStreamLife total_node_depend_stream_life_; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_GRAPH_BUILD_MEMORY_BLOCK_MEM_ASSIGNER_H_ | #endif // GE_GRAPH_BUILD_MEMORY_BLOCK_MEM_ASSIGNER_H_ |
@@ -222,10 +222,9 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, size_t &mem_offse | |||||
mem_offset = memory_offset_[0].mem_offset_; | mem_offset = memory_offset_[0].mem_offset_; | ||||
auto session_id = compute_graph_->GetSessionID(); | |||||
if (mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) { | |||||
if (mem_offset > VarManager::Instance(0)->GetGraphMemoryMaxSize()) { | |||||
GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", mem_offset, | GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", mem_offset, | ||||
VarManager::Instance(session_id)->GetGraphMemoryMaxSize()); | |||||
VarManager::Instance(0)->GetGraphMemoryMaxSize()); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -1223,16 +1222,10 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< | |||||
peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(), | peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(), | ||||
input_list.back()); | input_list.back()); | ||||
} else { | } else { | ||||
int64_t output_offset = output_list.at(peer_out_anchor->GetIdx()); | |||||
if (peer_out_anchor->GetOwnerNode()->GetType() == CONSTANT) { | |||||
GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(input_index); | |||||
GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, output_offset)); | |||||
} | |||||
GELOGI("node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]", tmp_op_desc->GetName().c_str(), | GELOGI("node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]", tmp_op_desc->GetName().c_str(), | ||||
input_index, peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(), | input_index, peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(), | ||||
output_offset); | |||||
input_list.emplace_back(output_offset); | |||||
output_list.at(peer_out_anchor->GetIdx())); | |||||
input_list.emplace_back(output_list.at(peer_out_anchor->GetIdx())); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -299,33 +299,21 @@ Status VarMemAssignUtil::SetOutTransNodeToAssign(const ge::NodePtr &node, const | |||||
Status VarMemAssignUtil::AssignMemory2HasRefAttrNode(ge::ComputeGraphPtr &compute_graph) { | Status VarMemAssignUtil::AssignMemory2HasRefAttrNode(ge::ComputeGraphPtr &compute_graph) { | ||||
for (const ge::NodePtr &n : compute_graph->GetAllNodes()) { | for (const ge::NodePtr &n : compute_graph->GetAllNodes()) { | ||||
string ref_var_src_var_name; | string ref_var_src_var_name; | ||||
auto op_desc = n->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
for (uint32_t idx = 0; idx < op_desc->GetOutputsSize(); idx += 1) { | |||||
const auto out_desc = op_desc->MutableOutputDesc(idx); | |||||
if (ge::AttrUtils::GetStr(out_desc, REF_VAR_SRC_VAR_NAME, ref_var_src_var_name)) { | |||||
GE_CHK_STATUS_RET(AssignData2VarRef(n, ref_var_src_var_name, compute_graph->GetSessionID(), idx)); | |||||
} | |||||
} | |||||
GE_CHECK_NOTNULL(n->GetOpDesc()); | |||||
bool is_ref = ge::AttrUtils::GetStr(n->GetOpDesc(), REF_VAR_SRC_VAR_NAME, ref_var_src_var_name); | |||||
GE_IF_BOOL_EXEC(is_ref, | |||||
GE_CHK_STATUS_RET(AssignData2VarRef(n, ref_var_src_var_name, compute_graph->GetSessionID()))); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status VarMemAssignUtil::AssignData2VarRef(const ge::NodePtr &has_ref_attr_node, const string &src_var_name, | Status VarMemAssignUtil::AssignData2VarRef(const ge::NodePtr &has_ref_attr_node, const string &src_var_name, | ||||
uint64_t session_id, uint32_t out_index) { | |||||
// Get ref_var_src_var address | |||||
auto root_graph = GraphUtils::FindRootGraph(has_ref_attr_node->GetOwnerComputeGraph()); | |||||
GE_CHECK_NOTNULL(root_graph); | |||||
ge::NodePtr var_ref_src_var = root_graph->FindNode(src_var_name); | |||||
if (var_ref_src_var == nullptr) { | |||||
for (auto sub_graph : root_graph->GetAllSubgraphs()) { | |||||
auto node_ptr = sub_graph->FindNode(src_var_name); | |||||
if (node_ptr != nullptr) { | |||||
var_ref_src_var = node_ptr; | |||||
break; | |||||
} | |||||
} | |||||
uint64_t session_id) { | |||||
if (!TransOpUtil::IsTransOp(has_ref_attr_node)) { | |||||
return SUCCESS; | |||||
} | } | ||||
// Get ref_var_src_var address | |||||
ge::NodePtr var_ref_src_var = has_ref_attr_node->GetOwnerComputeGraph()->FindNode(src_var_name); | |||||
GE_IF_BOOL_EXEC(var_ref_src_var == nullptr || var_ref_src_var->GetOpDesc() == nullptr, return FAILED); | GE_IF_BOOL_EXEC(var_ref_src_var == nullptr || var_ref_src_var->GetOpDesc() == nullptr, return FAILED); | ||||
GeTensorDesc src_tensor_desc = var_ref_src_var->GetOpDesc()->GetOutputDesc(0); | GeTensorDesc src_tensor_desc = var_ref_src_var->GetOpDesc()->GetOutputDesc(0); | ||||
uint8_t *dev_ptr = nullptr; | uint8_t *dev_ptr = nullptr; | ||||
@@ -334,8 +322,14 @@ Status VarMemAssignUtil::AssignData2VarRef(const ge::NodePtr &has_ref_attr_node, | |||||
vector<int64_t> ref_attr_node_output_list = has_ref_attr_node->GetOpDesc()->GetOutputOffset(); | vector<int64_t> ref_attr_node_output_list = has_ref_attr_node->GetOpDesc()->GetOutputOffset(); | ||||
GE_CHECK_SIZE(ref_attr_node_output_list.size()); | GE_CHECK_SIZE(ref_attr_node_output_list.size()); | ||||
GE_CHK_BOOL_RET_STATUS(out_index < ref_attr_node_output_list.size(), FAILED, | |||||
"out_index %u >= ref_attr_node_output_list.size() %zu", out_index, | |||||
int out_index = 0; | |||||
bool is_get = ge::AttrUtils::GetInt(var_ref_src_var->GetOpDesc(), REF_VAR_PRE_PEER_OUT_INDEX, out_index); | |||||
if (!is_get) { | |||||
GELOGI("%s failed to get attr [REF_VAR_PRE_PEER_OUT_INDEX]", var_ref_src_var->GetName().c_str()); | |||||
} | |||||
GE_CHK_BOOL_RET_STATUS(static_cast<size_t>(out_index) < ref_attr_node_output_list.size(), FAILED, | |||||
"out_index %d >= ref_attr_node_output_list.size() %zu", out_index, | |||||
ref_attr_node_output_list.size()); | ref_attr_node_output_list.size()); | ||||
ref_attr_node_output_list[out_index] = static_cast<int64_t>(reinterpret_cast<uintptr_t>(dev_ptr)); | ref_attr_node_output_list[out_index] = static_cast<int64_t>(reinterpret_cast<uintptr_t>(dev_ptr)); | ||||
@@ -46,8 +46,8 @@ class VarMemAssignUtil { | |||||
static Status DealTransNode(const ge::NodePtr &final_trans_node); | static Status DealTransNode(const ge::NodePtr &final_trans_node); | ||||
static Status DealExportTransNode(const ge::NodePtr &node, const ge::NodePtr &final_trans_node); | static Status DealExportTransNode(const ge::NodePtr &node, const ge::NodePtr &final_trans_node); | ||||
static Status AssignData2VarRef(const ge::NodePtr &variable_ref, const std::string &src_var_name, uint64_t session_id, | |||||
uint32_t out_index); | |||||
static Status AssignData2VarRef(const ge::NodePtr &variable_ref, const std::string &src_var_name, | |||||
uint64_t session_id); | |||||
static Status SetOutTransNodeToAssign(const ge::NodePtr &node, const ge::NodePtr &final_trans_node, size_t index); | static Status SetOutTransNodeToAssign(const ge::NodePtr &node, const ge::NodePtr &final_trans_node, size_t index); | ||||
}; | }; | ||||
@@ -15,10 +15,10 @@ | |||||
*/ | */ | ||||
#include "graph/build/model_builder.h" | #include "graph/build/model_builder.h" | ||||
#include <securectype.h> | |||||
#include <iostream> | #include <iostream> | ||||
#include <set> | #include <set> | ||||
#include <unordered_map> | #include <unordered_map> | ||||
#include <securectype.h> | |||||
#include "common/ge/ge_util.h" | #include "common/ge/ge_util.h" | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/anchor.h" | #include "graph/anchor.h" | ||||
@@ -27,7 +27,6 @@ | |||||
#include "graph/build/label_allocator.h" | #include "graph/build/label_allocator.h" | ||||
#include "graph/build/stream_allocator.h" | #include "graph/build/stream_allocator.h" | ||||
#include "graph/common/omg_util.h" | #include "graph/common/omg_util.h" | ||||
#include "graph/common/ge_call_wrapper.h" | |||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "graph/ge_attr_value.h" | #include "graph/ge_attr_value.h" | ||||
#include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
@@ -86,11 +85,9 @@ bool IsGeLocalOp(const ge::ConstOpDescPtr &op_desc) { | |||||
} // namespace | } // namespace | ||||
namespace ge { | namespace ge { | ||||
ModelBuilder::ModelBuilder(uint64_t session_id, ge::ComputeGraphPtr compute_graph, | |||||
const Graph2SubGraphInfoList &subgraphs, const map<string, int> &stream_max_parallel_num, | |||||
bool hcom_parallel, int mode) | |||||
: session_id_(session_id), | |||||
mem_offset_(0), | |||||
ModelBuilder::ModelBuilder(ge::ComputeGraphPtr compute_graph, const Graph2SubGraphInfoList &subgraphs, | |||||
const map<string, int> &stream_max_parallel_num, bool hcom_parallel, int mode) | |||||
: mem_offset_(0), | |||||
weight_offset_(kWeightsStartOffset), | weight_offset_(kWeightsStartOffset), | ||||
compute_graph_(std::move(compute_graph)), | compute_graph_(std::move(compute_graph)), | ||||
subgraphs_(subgraphs), | subgraphs_(subgraphs), | ||||
@@ -245,7 +242,7 @@ Status ModelBuilder::SetInputOutputDesc() { | |||||
Status ret; | Status ret; | ||||
GELOGI("Start to SetInputOutputDesc."); | GELOGI("Start to SetInputOutputDesc."); | ||||
for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||||
for (const ge::NodePtr &n : compute_graph_->GetAllNodes()) { | |||||
auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | ||||
@@ -294,7 +291,7 @@ Status ModelBuilder::SetInputOutputDesc() { | |||||
} | } | ||||
void ModelBuilder::AddNodeInputProperty() { | void ModelBuilder::AddNodeInputProperty() { | ||||
for (const ge::NodePtr &node : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||||
for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | |||||
auto node_op_desc = node->GetOpDesc(); | auto node_op_desc = node->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return ); | GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return ); | ||||
vector<string> src_name_list; | vector<string> src_name_list; | ||||
@@ -321,7 +318,7 @@ void ModelBuilder::AddNodeInputProperty() { | |||||
node_op_desc->SetSrcIndex(src_index_list); | node_op_desc->SetSrcIndex(src_index_list); | ||||
} | } | ||||
for (const ge::NodePtr &node : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||||
for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | |||||
auto node_op_desc = node->GetOpDesc(); | auto node_op_desc = node->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return ); | GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return ); | ||||
GE_IF_BOOL_EXEC(node_op_desc->GetType() == NETOUTPUT, continue); | GE_IF_BOOL_EXEC(node_op_desc->GetType() == NETOUTPUT, continue); | ||||
@@ -359,7 +356,7 @@ void ModelBuilder::AddNodeInputProperty() { | |||||
Status ModelBuilder::AdjustInputTensorFlag() { | Status ModelBuilder::AdjustInputTensorFlag() { | ||||
GELOGI("Start to AdjustInputTensorFlag."); | GELOGI("Start to AdjustInputTensorFlag."); | ||||
for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||||
for (const ge::NodePtr &n : compute_graph_->GetAllNodes()) { | |||||
if ((n->GetType() == DATA_TYPE) || (n->GetType() == AIPP_DATA_TYPE)) { | if ((n->GetType() == DATA_TYPE) || (n->GetType() == AIPP_DATA_TYPE)) { | ||||
GELOGD("Data node: %s.", n->GetName().c_str()); | GELOGD("Data node: %s.", n->GetName().c_str()); | ||||
for (const auto &anchor : n->GetAllOutDataAnchors()) { | for (const auto &anchor : n->GetAllOutDataAnchors()) { | ||||
@@ -435,21 +432,6 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) { | |||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetBool(&model, ATTR_NAME_SWITCH_FOR_L1_FUSION, is_l1_fusion_enable_), | GE_CHK_BOOL_EXEC(ge::AttrUtils::SetBool(&model, ATTR_NAME_SWITCH_FOR_L1_FUSION, is_l1_fusion_enable_), | ||||
GELOGE(FAILED, "SetBool of ATTR_NAME_SWITCH_FOR_L1_FUSION failed."); | GELOGE(FAILED, "SetBool of ATTR_NAME_SWITCH_FOR_L1_FUSION failed."); | ||||
return FAILED); | return FAILED); | ||||
const DumpProperties &dump_properties = PropertiesManager::Instance().GetDumpProperties(session_id_); | |||||
bool is_op_debug = dump_properties.IsOpDebugOpen(); | |||||
GELOGI("Get op debug:%d", is_op_debug); | |||||
if (is_op_debug) { | |||||
if (!ge::AttrUtils::SetBool(&model, ATTR_OP_DEBUG_FLAG, is_op_debug)) { | |||||
GELOGE(FAILED, "SetBool of ATTR_OP_DEBUG_FLAG failed."); | |||||
return FAILED; | |||||
} | |||||
uint32_t op_debug_mode = dump_properties.GetOpDebugMode(); | |||||
GELOGI("Get op debug mode:%d", op_debug_mode); | |||||
if (!ge::AttrUtils::SetInt(&model, ATTR_OP_DEBUG_MODE, op_debug_mode)) { | |||||
GELOGE(FAILED, "SetBool of ATTR_OP_DEBUG_MODE failed."); | |||||
return FAILED; | |||||
} | |||||
} | |||||
model.SetName(compute_graph_->GetName()); | model.SetName(compute_graph_->GetName()); | ||||
model.SetGraph(ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph_)); | model.SetGraph(ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph_)); | ||||
@@ -466,7 +448,7 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) { | |||||
} | } | ||||
void ModelBuilder::ClearOriginalFormat() { | void ModelBuilder::ClearOriginalFormat() { | ||||
for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||||
for (const ge::NodePtr &n : compute_graph_->GetAllNodes()) { | |||||
auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
if (node_op_desc != nullptr) { | if (node_op_desc != nullptr) { | ||||
if (node_op_desc->HasAttr(ATTR_NAME_FORMAT)) { | if (node_op_desc->HasAttr(ATTR_NAME_FORMAT)) { | ||||
@@ -505,7 +487,7 @@ Status ModelBuilder::MergeWeights() { | |||||
weight_buffer_ = buffer; | weight_buffer_ = buffer; | ||||
auto base_addr = weight_buffer_.GetData(); | auto base_addr = weight_buffer_.GetData(); | ||||
for (const ge::NodePtr &node : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||||
for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | |||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | ||||
if (node->GetType() != CONSTANT) { | if (node->GetType() != CONSTANT) { | ||||
@@ -545,8 +527,8 @@ Status ModelBuilder::MergeWeights() { | |||||
weight_data.size()); | weight_data.size()); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
uintptr_t dst_ptr = reinterpret_cast<uintptr_t>(base_addr) + offset; | |||||
uintptr_t src_ptr = reinterpret_cast<uintptr_t>(weight_data.data()); | |||||
uintptr_t dst_ptr = (uintptr_t)base_addr + offset; | |||||
uintptr_t src_ptr = (uintptr_t)weight_data.data(); | |||||
size_t left_size = weight_data.size(); | size_t left_size = weight_data.size(); | ||||
while (left_size > SECUREC_MEM_MAX_LEN) { | while (left_size > SECUREC_MEM_MAX_LEN) { | ||||
auto err = memcpy_s(reinterpret_cast<void *>(dst_ptr), SECUREC_MEM_MAX_LEN, reinterpret_cast<void *>(src_ptr), | auto err = memcpy_s(reinterpret_cast<void *>(dst_ptr), SECUREC_MEM_MAX_LEN, reinterpret_cast<void *>(src_ptr), | ||||
@@ -583,7 +565,7 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { | |||||
// Add TBE Kernels | // Add TBE Kernels | ||||
std::set<std::string> name_set; | std::set<std::string> name_set; | ||||
for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||||
for (const ge::NodePtr &n : compute_graph_->GetAllNodes()) { | |||||
auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | ||||
TBEKernelPtr tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); | TBEKernelPtr tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); | ||||
@@ -677,7 +659,7 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) { | |||||
// Compile single op in graph build stage | // Compile single op in graph build stage | ||||
GE_TIMESTAMP_START(CompileSingleOp); | GE_TIMESTAMP_START(CompileSingleOp); | ||||
GE_CHK_STATUS_RET(CompileSingleOp(), "ATC builder CompileSingleOp() return fail."); | GE_CHK_STATUS_RET(CompileSingleOp(), "ATC builder CompileSingleOp() return fail."); | ||||
GE_TIMESTAMP_EVENT_END(CompileSingleOp, "GraphBuilder::CompileSingleOp"); | |||||
GE_TIMESTAMP_END(CompileSingleOp, "GraphBuilder::CompileSingleOp"); | |||||
// Refresh real streams and insert event nodes. | // Refresh real streams and insert event nodes. | ||||
GE_TIMESTAMP_START(RefreshRealStream); | GE_TIMESTAMP_START(RefreshRealStream); | ||||
@@ -718,7 +700,7 @@ Status ModelBuilder::CompileSingleOp() { | |||||
GE_TIMESTAMP_CALLNUM_START(BatchCompileOp); | GE_TIMESTAMP_CALLNUM_START(BatchCompileOp); | ||||
std::unordered_map<string, vector<ge::NodePtr>> node_vector_map; | std::unordered_map<string, vector<ge::NodePtr>> node_vector_map; | ||||
for (auto &node : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||||
for (auto &node : compute_graph_->GetAllNodes()) { | |||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
continue; | continue; | ||||
@@ -755,7 +737,7 @@ Status ModelBuilder::CompileSingleOp() { | |||||
GE_CHECK_NOTNULL(kernel_info); | GE_CHECK_NOTNULL(kernel_info); | ||||
GE_TIMESTAMP_RESTART(BatchCompileOp); | GE_TIMESTAMP_RESTART(BatchCompileOp); | ||||
auto ret = kernel_info->CompileOp(node_vector); | auto ret = kernel_info->CompileOp(node_vector); | ||||
GELOGI("[GEPERFTRACE] The node size of compile op of %s is %zu", kernel_lib_name.c_str(), node_vector.size()); | |||||
GEEVENT("[GEPERFTRACE] The node size of compile op of %s is %zu", kernel_lib_name.c_str(), node_vector.size()); | |||||
GE_TIMESTAMP_ADD(BatchCompileOp); | GE_TIMESTAMP_ADD(BatchCompileOp); | ||||
if (ret != ge::SUCCESS) { | if (ret != ge::SUCCESS) { | ||||
GELOGE(ret, "Compile op failed, kernel lib name is %s", kernel_lib_name.c_str()); | GELOGE(ret, "Compile op failed, kernel lib name is %s", kernel_lib_name.c_str()); | ||||
@@ -37,7 +37,7 @@ | |||||
namespace ge { | namespace ge { | ||||
class ModelBuilder { | class ModelBuilder { | ||||
public: | public: | ||||
ModelBuilder(uint64_t session_id, ge::ComputeGraphPtr whole_graph, const Graph2SubGraphInfoList &subgraphs, | |||||
ModelBuilder(ge::ComputeGraphPtr whole_graph, const Graph2SubGraphInfoList &subgraphs, | |||||
const std::map<std::string, int> &stream_max_parallel_num, bool hcom_parallel, | const std::map<std::string, int> &stream_max_parallel_num, bool hcom_parallel, | ||||
int mode = static_cast<int>(domi::BuildMode::GEN_TASK_WITHOUT_FUSION)); | int mode = static_cast<int>(domi::BuildMode::GEN_TASK_WITHOUT_FUSION)); | ||||
@@ -82,8 +82,6 @@ class ModelBuilder { | |||||
Status CompileSingleOp(); | Status CompileSingleOp(); | ||||
uint64_t session_id_; | |||||
size_t mem_offset_; | size_t mem_offset_; | ||||
size_t weight_offset_; | size_t weight_offset_; | ||||
@@ -173,4 +173,5 @@ Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &gra | |||||
} | } | ||||
RunContext &RunContextUtil::GetRunContext() { return run_context_; } | RunContext &RunContextUtil::GetRunContext() { return run_context_; } | ||||
} // namespace ge | } // namespace ge |
@@ -146,6 +146,12 @@ Status StreamAllocator::RefreshRealStream(int64_t &stream_num, int64_t &event_nu | |||||
return status; | return status; | ||||
} | } | ||||
status = AddActiveEntryStream(); | |||||
if (status != SUCCESS) { | |||||
GELOGE(status, "AddActiveEntryStream failed!"); | |||||
return status; | |||||
} | |||||
status = RefreshContinuousEvents(); | status = RefreshContinuousEvents(); | ||||
if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
GELOGE(status, "RefreshContinuousEvents failed!"); | GELOGE(status, "RefreshContinuousEvents failed!"); | ||||
@@ -161,7 +167,7 @@ Status StreamAllocator::RefreshRealStream(int64_t &stream_num, int64_t &event_nu | |||||
DumpEvents(); | DumpEvents(); | ||||
GE_DUMP(whole_graph_, "AfterRefreshRealStream"); | GE_DUMP(whole_graph_, "AfterRefreshRealStream"); | ||||
for (const NodePtr &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
for (const NodePtr &node : whole_graph_->GetAllNodes()) { | |||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
auto stream_id = node->GetOpDesc()->GetStreamId(); | auto stream_id = node->GetOpDesc()->GetStreamId(); | ||||
if (stream_id == kInvalidStream) { | if (stream_id == kInvalidStream) { | ||||
@@ -193,7 +199,7 @@ Status StreamAllocator::AssignSingleStream() { | |||||
} | } | ||||
int64_t task_count = 0; | int64_t task_count = 0; | ||||
for (const NodePtr &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
for (const NodePtr &node : whole_graph_->GetAllNodes()) { | |||||
string op_type = node->GetType(); | string op_type = node->GetType(); | ||||
if (IsHcclOp(op_type)) { | if (IsHcclOp(op_type)) { | ||||
task_count += kTaskNumPerHcclNode; | task_count += kTaskNumPerHcclNode; | ||||
@@ -230,7 +236,7 @@ Status StreamAllocator::AssignSingleStream() { | |||||
} | } | ||||
Status StreamAllocator::SetActiveStreamsByLabel() { | Status StreamAllocator::SetActiveStreamsByLabel() { | ||||
for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
for (const auto &node : whole_graph_->GetAllNodes()) { | |||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
string stream_label; | string stream_label; | ||||
@@ -242,7 +248,7 @@ Status StreamAllocator::SetActiveStreamsByLabel() { | |||||
} | } | ||||
} | } | ||||
for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
for (const auto &node : whole_graph_->GetAllNodes()) { | |||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
vector<string> activated_label_list; | vector<string> activated_label_list; | ||||
if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_ACTIVE_LABEL_LIST, activated_label_list) || | if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_ACTIVE_LABEL_LIST, activated_label_list) || | ||||
@@ -320,7 +326,7 @@ Status StreamAllocator::SetActiveStreamsForSubgraphs() { | |||||
// Insert the send/recv event id to the graph | // Insert the send/recv event id to the graph | ||||
Status StreamAllocator::InsertSyncEvents() { | Status StreamAllocator::InsertSyncEvents() { | ||||
for (const auto &cur_node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
for (const auto &cur_node : whole_graph_->GetAllNodes()) { | |||||
// Take the adjacent points, then judge whether need to insert the event | // Take the adjacent points, then judge whether need to insert the event | ||||
for (const OutDataAnchorPtr &anchor : cur_node->GetAllOutDataAnchors()) { | for (const OutDataAnchorPtr &anchor : cur_node->GetAllOutDataAnchors()) { | ||||
for (const InDataAnchorPtr &peer_in_anchor : anchor->GetPeerInDataAnchors()) { | for (const InDataAnchorPtr &peer_in_anchor : anchor->GetPeerInDataAnchors()) { | ||||
@@ -374,11 +380,6 @@ Status StreamAllocator::InsertOneEventInTwoNodes(const NodePtr &cur_node, const | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
if ((cur_node->GetType() == ENTER) || (cur_node->GetType() == REFENTER)) { | |||||
GELOGD("No need to insert event after enter_node %s.", cur_node->GetName().c_str()); | |||||
return SUCCESS; | |||||
} | |||||
if (next_stream_id == kInvalidStream) { | if (next_stream_id == kInvalidStream) { | ||||
GELOGE(FAILED, "Stream id of next_node %s should not be %ld", next_node->GetName().c_str(), kInvalidStream); | GELOGE(FAILED, "Stream id of next_node %s should not be %ld", next_node->GetName().c_str(), kInvalidStream); | ||||
return FAILED; | return FAILED; | ||||
@@ -445,7 +446,7 @@ Status StreamAllocator::InsertEventsForSubgraph() { | |||||
Status StreamAllocator::OptimizeSyncEvents() { | Status StreamAllocator::OptimizeSyncEvents() { | ||||
map<int64_t, vector<NodePtr>> stream_nodes; | map<int64_t, vector<NodePtr>> stream_nodes; | ||||
for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
for (const auto &node : whole_graph_->GetAllNodes()) { | |||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
int64_t stream_id = node->GetOpDesc()->GetStreamId(); | int64_t stream_id = node->GetOpDesc()->GetStreamId(); | ||||
stream_nodes[stream_id].emplace_back(node); | stream_nodes[stream_id].emplace_back(node); | ||||
@@ -670,7 +671,7 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) { | |||||
GE_CHK_STATUS_RET(GetMaxStreamAndTask(false, max_stream_count, max_task_count), | GE_CHK_STATUS_RET(GetMaxStreamAndTask(false, max_stream_count, max_task_count), | ||||
"Get max stream and task count failed."); | "Get max stream and task count failed."); | ||||
for (const auto &cur_node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
for (const auto &cur_node : whole_graph_->GetAllNodes()) { | |||||
GE_CHECK_NOTNULL(cur_node); | GE_CHECK_NOTNULL(cur_node); | ||||
auto op_desc = cur_node->GetOpDesc(); | auto op_desc = cur_node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
@@ -773,23 +774,42 @@ bool StreamAllocator::NeedSpiltNewStream(int64_t stream_node_num, int64_t max_no | |||||
Status StreamAllocator::UpdateActiveStreams(const vector<set<int64_t>> &split_streams) { | Status StreamAllocator::UpdateActiveStreams(const vector<set<int64_t>> &split_streams) { | ||||
UpdateLabelStreams(split_streams); | UpdateLabelStreams(split_streams); | ||||
for (auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
for (auto &node : whole_graph_->GetAllNodes()) { | |||||
if ((node->GetType() == STREAMSWITCH) || (node->GetType() == STREAMSWITCHN)) { | if ((node->GetType() == STREAMSWITCH) || (node->GetType() == STREAMSWITCHN)) { | ||||
if (UpdateActiveStreamsForSwitchNode(node) != SUCCESS) { | |||||
GELOGE(FAILED, "Update active streams for switch node: %s failed.", node->GetName().c_str()); | |||||
if (InsertActiveNodesAfterSwitch(node) != SUCCESS) { | |||||
GELOGE(FAILED, "Insert active nodes after switch node failed."); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} else { | } else { | ||||
if (UpdateActiveStreamsForActiveNode(split_streams, node) != SUCCESS) { | |||||
GELOGE(FAILED, "Update active streams for active node: %s failed.", node->GetName().c_str()); | |||||
return FAILED; | |||||
vector<uint32_t> active_streams; | |||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
if (AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) { | |||||
vector<uint32_t> new_active_streams = active_streams; | |||||
for (const uint32_t logical_stream : active_streams) { | |||||
if (static_cast<size_t>(logical_stream) >= split_streams.size()) { | |||||
GELOGE(FAILED, "logical stream is out of range."); | |||||
return FAILED; | |||||
} | |||||
const set<int64_t> &new_split_streams = split_streams[logical_stream]; | |||||
if (!new_split_streams.empty()) { | |||||
for (int64_t split_stream : new_split_streams) { | |||||
new_active_streams.emplace_back(static_cast<uint32_t>(split_stream)); | |||||
GELOGI("Add stream %ld to active_stream_list of node %s of graph %s", split_stream, | |||||
node->GetName().c_str(), node->GetOwnerComputeGraph()->GetName().c_str()); | |||||
} | |||||
} | |||||
} | |||||
if (!AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, new_active_streams)) { | |||||
GELOGE(FAILED, "Set active streams for node %s failed.", node->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
} | } | ||||
} | } | ||||
} | } | ||||
Status status = UpdateActiveStreamsForSubgraphs(); | Status status = UpdateActiveStreamsForSubgraphs(); | ||||
if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
GELOGE(status, "Update active streams for subgraphs failed!"); | |||||
GELOGE(status, "SetActiveStreamsForSubgraph failed!"); | |||||
return status; | return status; | ||||
} | } | ||||
@@ -820,7 +840,7 @@ void StreamAllocator::UpdateLabelStreams(const vector<set<int64_t>> &split_strea | |||||
} | } | ||||
} | } | ||||
Status StreamAllocator::UpdateActiveStreamsForSwitchNode(NodePtr &switch_node) { | |||||
Status StreamAllocator::InsertActiveNodesAfterSwitch(NodePtr &switch_node) { | |||||
vector<NodePtr> active_nodes; | vector<NodePtr> active_nodes; | ||||
if (InsertActiveNodesAfterSwitch(switch_node, active_nodes) != SUCCESS) { | if (InsertActiveNodesAfterSwitch(switch_node, active_nodes) != SUCCESS) { | ||||
GELOGE(FAILED, "Insert active nodes after node %s failed.", switch_node->GetName().c_str()); | GELOGE(FAILED, "Insert active nodes after node %s failed.", switch_node->GetName().c_str()); | ||||
@@ -886,38 +906,6 @@ Status StreamAllocator::InsertActiveNodesAfterSwitch(NodePtr &switch_node, vecto | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status StreamAllocator::UpdateActiveStreamsForActiveNode(const vector<set<int64_t>> &split_streams, NodePtr &node) { | |||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
vector<uint32_t> active_streams; | |||||
if (AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) { | |||||
vector<uint32_t> new_active_streams = active_streams; | |||||
for (uint32_t logical_stream : active_streams) { | |||||
if (static_cast<size_t>(logical_stream) >= split_streams.size()) { | |||||
GELOGE(FAILED, "logical stream is out of range."); | |||||
return FAILED; | |||||
} | |||||
const set<int64_t> &new_split_streams = split_streams[logical_stream]; | |||||
for (int64_t split_stream : new_split_streams) { | |||||
for (const auto &node_stream : node_split_stream_map_) { | |||||
if (split_stream == node_stream.second) { | |||||
if (node_stream.first->GetOwnerComputeGraph() == node->GetOwnerComputeGraph()) { | |||||
new_active_streams.emplace_back(static_cast<uint32_t>(split_stream)); | |||||
GELOGI("Add stream %ld to active_stream_list of node %s of graph %s", split_stream, | |||||
node->GetName().c_str(), node->GetOwnerComputeGraph()->GetName().c_str()); | |||||
} | |||||
break; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
if (!AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, new_active_streams)) { | |||||
GELOGE(FAILED, "Set active streams for node %s failed.", node->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status StreamAllocator::UpdateActiveStreamsForSubgraphs() const { | Status StreamAllocator::UpdateActiveStreamsForSubgraphs() const { | ||||
// Update active stream list for active nodes | // Update active stream list for active nodes | ||||
for (auto &node_stream_pair : node_split_stream_map_) { | for (auto &node_stream_pair : node_split_stream_map_) { | ||||
@@ -938,19 +926,14 @@ Status StreamAllocator::UpdateActiveStreamsForSubgraphs() const { | |||||
} | } | ||||
const auto &active_node = it->second; | const auto &active_node = it->second; | ||||
GE_CHECK_NOTNULL(active_node); | GE_CHECK_NOTNULL(active_node); | ||||
auto active_op = active_node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(active_op); | |||||
auto op_desc = active_node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
vector<uint32_t> active_streams; | vector<uint32_t> active_streams; | ||||
(void)AttrUtils::GetListInt(active_op, ATTR_NAME_ACTIVE_STREAM_LIST, active_streams); | |||||
(void)AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_streams); | |||||
set<uint32_t> new_active_streams(active_streams.begin(), active_streams.end()); | set<uint32_t> new_active_streams(active_streams.begin(), active_streams.end()); | ||||
// specific_activated_streams_ has already contained new split activated stream | |||||
int64_t new_split_stream = node_stream_pair.second; | |||||
if (IsActivated(new_split_stream)) { | |||||
continue; | |||||
} | |||||
new_active_streams.emplace(static_cast<uint32_t>(new_split_stream)); | |||||
new_active_streams.emplace(static_cast<uint32_t>(node_stream_pair.second)); | |||||
active_streams.assign(new_active_streams.begin(), new_active_streams.end()); | active_streams.assign(new_active_streams.begin(), new_active_streams.end()); | ||||
if (!AttrUtils::SetListInt(active_op, ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) { | |||||
if (!AttrUtils::SetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) { | |||||
GELOGE(FAILED, "Set active streams for node %s failed.", active_node->GetName().c_str()); | GELOGE(FAILED, "Set active streams for node %s failed.", active_node->GetName().c_str()); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -959,20 +942,6 @@ Status StreamAllocator::UpdateActiveStreamsForSubgraphs() const { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
bool StreamAllocator::IsActivated(int64_t stream_id) const { | |||||
for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
auto op_desc = node->GetOpDesc(); | |||||
vector<uint32_t> active_streams; | |||||
if (op_desc == nullptr || !AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) { | |||||
continue; | |||||
} | |||||
if (std::find(active_streams.begin(), active_streams.end(), stream_id) != active_streams.end()) { | |||||
return true; | |||||
} | |||||
} | |||||
return false; | |||||
} | |||||
Status StreamAllocator::SetActiveStreamsForLoop() { | Status StreamAllocator::SetActiveStreamsForLoop() { | ||||
vector<uint32_t> loop_active_streams; | vector<uint32_t> loop_active_streams; | ||||
for (int64_t stream_id = 0; stream_id < stream_num_; stream_id++) { | for (int64_t stream_id = 0; stream_id < stream_num_; stream_id++) { | ||||
@@ -981,7 +950,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() { | |||||
} | } | ||||
} | } | ||||
// Set the stream that needs to be activated | // Set the stream that needs to be activated | ||||
for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
for (const auto &node : whole_graph_->GetAllNodes()) { | |||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
bool is_loop_active = false; | bool is_loop_active = false; | ||||
if (AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, is_loop_active) && is_loop_active) { | if (AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, is_loop_active) && is_loop_active) { | ||||
@@ -1004,7 +973,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() { | |||||
} | } | ||||
Status StreamAllocator::CheckStreamActived() const { | Status StreamAllocator::CheckStreamActived() const { | ||||
for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
for (const auto &node : whole_graph_->GetAllNodes()) { | |||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
vector<uint32_t> active_streams; | vector<uint32_t> active_streams; | ||||
if (AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) { | if (AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) { | ||||
@@ -1020,6 +989,108 @@ Status StreamAllocator::CheckStreamActived() const { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
// Add active entry stream for special env. | |||||
Status StreamAllocator::AddActiveEntryStream() { | |||||
auto gelib = GELib::GetInstance(); | |||||
bool head_stream = (gelib == nullptr) ? false : gelib->HeadStream(); | |||||
GELOGI("Configured head stream: %u", head_stream); | |||||
if (!head_stream) { | |||||
return SUCCESS; | |||||
} | |||||
// Collect streams active by StreamSwitch/StreamActive node. | |||||
std::set<uint32_t> deactive_stream; | |||||
for (ge::NodePtr &node : whole_graph_->GetAllNodes()) { | |||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
Status ret = CollectDeactiveStream(node->GetOpDesc(), deactive_stream); | |||||
if (ret != SUCCESS) { | |||||
return ret; | |||||
} | |||||
} | |||||
// Collect default active stream, Add to active entry stream. | |||||
std::vector<uint32_t> active_stream_list; | |||||
for (int64_t stream_id = 0; stream_id < stream_num_; ++stream_id) { | |||||
if (deactive_stream.count(stream_id) == 0) { | |||||
active_stream_list.push_back(stream_id); | |||||
} | |||||
} | |||||
int64_t new_stream_id = stream_num_; | |||||
stream_num_++; | |||||
return InsertActiveEntryStream(active_stream_list, new_stream_id); | |||||
} | |||||
// Collect deactive stream from flowctrl op. | |||||
Status StreamAllocator::CollectDeactiveStream(const OpDescPtr &op_desc, std::set<uint32_t> &deactive_streams) const { | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
std::string op_type = op_desc->GetType(); | |||||
if (op_type == STREAMSWITCH) { | |||||
std::vector<uint32_t> active_stream_list; | |||||
// If GetListInt fail, active_stream_list is empty. | |||||
(void)ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_stream_list); | |||||
if (active_stream_list.size() != kMaxSwitchStreamNum) { | |||||
GELOGE(INTERNAL_ERROR, "Stream num of switch true branch must be %u.", kMaxSwitchStreamNum); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
deactive_streams.insert(active_stream_list[0]); | |||||
GELOGI("Flowctrl_op node:%s, flowctrl stream id:%u.", op_desc->GetName().c_str(), active_stream_list[0]); | |||||
} else if (op_type == STREAMACTIVE) { | |||||
if (op_desc->HasAttr(ATTR_NAME_SWITCH_BRANCH_NODE_LABEL)) { | |||||
std::vector<uint32_t> active_stream_list; | |||||
if (!AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_stream_list)) { | |||||
GELOGE(INTERNAL_ERROR, "StreamActiveOp get attr ACTIVE_STREAM fail."); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
for (uint32_t deactive_stream : active_stream_list) { | |||||
deactive_streams.insert(deactive_stream); | |||||
GELOGI("Flowctrl_op node:%s, flowctrl stream id:%u.", op_desc->GetName().c_str(), deactive_stream); | |||||
} | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
// Insert StreamActive Op for Entry Stream. | |||||
Status StreamAllocator::InsertActiveEntryStream(const std::vector<uint32_t> &active_streams, int64_t stream_id) { | |||||
string node_name = whole_graph_->GetName() + "_ActiveEntryStream_" + string(STREAMACTIVE); | |||||
OpDescPtr op_desc = ge::MakeShared<OpDesc>(node_name, STREAMACTIVE); | |||||
if (op_desc == nullptr) { | |||||
GELOGE(FAILED, "Failed to new opdesc."); | |||||
return FAILED; | |||||
} | |||||
GELOGI("Create StreamActive op:%s.", op_desc->GetName().c_str()); | |||||
GE_CHK_BOOL_EXEC( | |||||
AttrUtils::SetListStr(op_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, std::move(std::vector<std::string>())), | |||||
GELOGE(FAILED, "SetListStr failed."); | |||||
return FAILED); | |||||
NodePtr active_node = whole_graph_->AddNodeFront(op_desc); | |||||
GE_IF_BOOL_EXEC(active_node == nullptr, | |||||
GELOGE(FAILED, "Create StreamActive op: %s failed.", op_desc->GetName().c_str()); | |||||
return INTERNAL_ERROR); | |||||
GE_CHECK_NOTNULL(active_node->GetOpDesc()); | |||||
// Add one stream for ActiveEntryStream Task. | |||||
active_node->GetOpDesc()->SetStreamId(stream_id); | |||||
GE_CHK_BOOL_EXEC(AttrUtils::SetBool(op_desc, "is_aicpu_stream", true), GELOGE(FAILED, "SetBool failed."); | |||||
return FAILED); | |||||
GE_CHK_BOOL_EXEC(AttrUtils::SetListInt(active_node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, active_streams), | |||||
GELOGE(FAILED, "SetListInt failed."); | |||||
return FAILED); | |||||
std::vector<std::string> group_names; | |||||
GE_CHK_BOOL_EXEC(AttrUtils::SetListStr(active_node->GetOpDesc(), ATTR_NAME_SWITCH_BRANCH_NODE_LABEL, group_names), | |||||
GELOGE(FAILED, "SetLisStr failed."); | |||||
return FAILED); | |||||
return SUCCESS; | |||||
} | |||||
// Refresh events to continuous events | // Refresh events to continuous events | ||||
Status StreamAllocator::RefreshContinuousEvents() { | Status StreamAllocator::RefreshContinuousEvents() { | ||||
// Establish a mapping relationship from old to new event id | // Establish a mapping relationship from old to new event id | ||||
@@ -1065,7 +1136,7 @@ Status StreamAllocator::RefreshContinuousEvents() { | |||||
// Insert the real send/recv node in the graph | // Insert the real send/recv node in the graph | ||||
Status StreamAllocator::InsertSyncEventNodes() { | Status StreamAllocator::InsertSyncEventNodes() { | ||||
for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
for (const auto &node : whole_graph_->GetAllNodes()) { | |||||
// Add the node corresponding to the recv event | // Add the node corresponding to the recv event | ||||
vector<uint32_t> recv_event_id_list; | vector<uint32_t> recv_event_id_list; | ||||
GetRecvEventIdList(node, recv_event_id_list); | GetRecvEventIdList(node, recv_event_id_list); | ||||
@@ -1152,7 +1223,7 @@ Status StreamAllocator::ReorderEventNodes() const { | |||||
void StreamAllocator::DumpEvents() { | void StreamAllocator::DumpEvents() { | ||||
map<int64_t, vector<NodePtr>> after_refresh_stream_nodes; | map<int64_t, vector<NodePtr>> after_refresh_stream_nodes; | ||||
for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
for (const auto &node : whole_graph_->GetAllNodes()) { | |||||
GE_IF_BOOL_EXEC(node->GetOpDesc() == nullptr, continue); | GE_IF_BOOL_EXEC(node->GetOpDesc() == nullptr, continue); | ||||
int64_t stream_id = node->GetOpDesc()->GetStreamId(); | int64_t stream_id = node->GetOpDesc()->GetStreamId(); | ||||
after_refresh_stream_nodes[stream_id].emplace_back(node); | after_refresh_stream_nodes[stream_id].emplace_back(node); | ||||
@@ -59,16 +59,18 @@ class StreamAllocator { | |||||
Status SplitStreams(std::vector<std::set<int64_t>> &split_streams); | Status SplitStreams(std::vector<std::set<int64_t>> &split_streams); | ||||
bool NeedSpiltNewStream(int64_t stream_node_num, int64_t max_node_num_one_stream, const OpDescPtr &op_desc) const; | bool NeedSpiltNewStream(int64_t stream_node_num, int64_t max_node_num_one_stream, const OpDescPtr &op_desc) const; | ||||
Status UpdateActiveStreams(const std::vector<std::set<int64_t>> &split_streams); | |||||
Status UpdateActiveStreams(const std::vector<std::set<int64_t>> &splited_streams); | |||||
void UpdateLabelStreams(const std::vector<std::set<int64_t>> &split_streams); | void UpdateLabelStreams(const std::vector<std::set<int64_t>> &split_streams); | ||||
Status UpdateActiveStreamsForSwitchNode(NodePtr &switch_node); | |||||
Status InsertActiveNodesAfterSwitch(NodePtr &switch_node); | |||||
Status InsertActiveNodesAfterSwitch(NodePtr &switch_nodes, std::vector<NodePtr> &switch_active_nodes); | Status InsertActiveNodesAfterSwitch(NodePtr &switch_nodes, std::vector<NodePtr> &switch_active_nodes); | ||||
Status UpdateActiveStreamsForActiveNode(const std::vector<std::set<int64_t>> &split_streams, NodePtr &node); | |||||
Status UpdateActiveStreamsForSubgraphs() const; | Status UpdateActiveStreamsForSubgraphs() const; | ||||
bool IsActivated(int64_t stream_id) const; | |||||
Status SetActiveStreamsForLoop(); | Status SetActiveStreamsForLoop(); | ||||
Status CheckStreamActived() const; | Status CheckStreamActived() const; | ||||
Status AddActiveEntryStream(); | |||||
Status CollectDeactiveStream(const OpDescPtr &op_desc, std::set<uint32_t> &deactive_streams) const; | |||||
Status InsertActiveEntryStream(const std::vector<uint32_t> &active_streams, int64_t stream_id); | |||||
Status RefreshContinuousEvents(); | Status RefreshContinuousEvents(); | ||||
Status InsertSyncEventNodes(); | Status InsertSyncEventNodes(); | ||||
@@ -29,7 +29,6 @@ | |||||
#include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
#include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
#include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
#include "graph/common/ge_call_wrapper.h" | |||||
#include "init/gelib.h" | #include "init/gelib.h" | ||||
using domi::LogTimeStampDef; | using domi::LogTimeStampDef; | ||||
@@ -48,6 +47,7 @@ const char *const kIsOutputVar = "OUTPUT_IS_VAR"; | |||||
const char *const kProfilingMode = "PROFILING_MODE"; | const char *const kProfilingMode = "PROFILING_MODE"; | ||||
const char *const kProfilingFpPoint = "FP_POINT"; | const char *const kProfilingFpPoint = "FP_POINT"; | ||||
const char *const kProfilingBpPoint = "BP_POINT"; | const char *const kProfilingBpPoint = "BP_POINT"; | ||||
const char *const kOffOptimize = "off_optimize"; | |||||
const uint32_t kProfilingArStep = 2; | const uint32_t kProfilingArStep = 2; | ||||
const uint64_t kProfilingFpStartLogid = 1; | const uint64_t kProfilingFpStartLogid = 1; | ||||
const uint64_t kProfilingBpEndLogid = 2; | const uint64_t kProfilingBpEndLogid = 2; | ||||
@@ -75,7 +75,21 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t | |||||
std::vector<TaskDef> task_def_list; | std::vector<TaskDef> task_def_list; | ||||
std::map<uint32_t, string> op_name_map; | std::map<uint32_t, string> op_name_map; | ||||
GE_DUMP(graph, "GenerateTaskBefore"); | GE_DUMP(graph, "GenerateTaskBefore"); | ||||
Status ret = GenerateTask(run_context, graph, task_def_list, op_name_map); | |||||
bool is_unknown_shape = false; | |||||
NodePtr parent_node = graph->GetParentNode(); | |||||
if (parent_node != nullptr) { | |||||
auto op_desc = parent_node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
(void)AttrUtils::GetBool(op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape); | |||||
} | |||||
Status ret = SUCCESS; | |||||
if (is_unknown_shape) { | |||||
GELOGI("Beign to generate unknown shape task. Graph name is %s.", graph->GetName().c_str()); | |||||
ret = GenerateUnknownShapeTask(run_context, graph, task_def_list, op_name_map); | |||||
} else { | |||||
GELOGI("Beign to generate known shape task. Graph name is %s.", graph->GetName().c_str()); | |||||
ret = GenerateTask(run_context, graph, task_def_list, op_name_map); | |||||
} | |||||
GE_DUMP(graph, "GenerateTaskAfter"); | GE_DUMP(graph, "GenerateTaskAfter"); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
@@ -95,7 +109,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t | |||||
GELOGE(FAILED, "SetListStr failed."); | GELOGE(FAILED, "SetListStr failed."); | ||||
return FAILED); | return FAILED); | ||||
GELOGI("Call GenerateTask Success, task_def_list.size:%zu, op_name_map.size:%zu", task_def_list.size(), | |||||
GELOGI("Generate task success, task_def_list.size:%zu, op_name_map.size:%zu", task_def_list.size(), | |||||
op_name_map.size()); | op_name_map.size()); | ||||
// Init and serialize model_task_def | // Init and serialize model_task_def | ||||
@@ -117,7 +131,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t | |||||
return ret; | return ret; | ||||
} | } | ||||
GELOGI("Get TaskInfo success. session_id=%lu", session_id); | |||||
GELOGI("Get TaskInfo success. session id is %lu", session_id); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -184,7 +198,7 @@ Status TaskGenerator::UpdateOpIsVarAttr(const OpDescPtr &op_desc, uint64_t sessi | |||||
Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion_nodes, ComputeGraphPtr &graph) { | Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion_nodes, ComputeGraphPtr &graph) { | ||||
std::map<NodePtr, int64_t> nodes_with_group_attr; | std::map<NodePtr, int64_t> nodes_with_group_attr; | ||||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
for (auto &node : graph->GetAllNodes()) { | |||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
int64_t group_id = kInvalidGroupId; | int64_t group_id = kInvalidGroupId; | ||||
@@ -235,13 +249,12 @@ Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion | |||||
Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &graph, | Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &graph, | ||||
vector<domi::TaskDef> &task_def_list, map<uint32_t, string> &op_name_map) { | vector<domi::TaskDef> &task_def_list, map<uint32_t, string> &op_name_map) { | ||||
GELOGD("Beign to generate task, graph name is %s.", graph->GetName().c_str()); | |||||
std::shared_ptr<GELib> ge_lib = GELib::GetInstance(); | std::shared_ptr<GELib> ge_lib = GELib::GetInstance(); | ||||
if ((ge_lib == nullptr) || !ge_lib->InitFlag()) { | if ((ge_lib == nullptr) || !ge_lib->InitFlag()) { | ||||
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GenerateTask failed."); | GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GenerateTask failed."); | ||||
return GE_CLI_GE_NOT_INITIALIZED; | return GE_CLI_GE_NOT_INITIALIZED; | ||||
} | } | ||||
GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "MarkNodeAndSetIndex failed."); | |||||
GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "Mark node and set index failed."); | |||||
ProfilingPoint profiling_point; | ProfilingPoint profiling_point; | ||||
vector<uint32_t> all_reduce_nodes; | vector<uint32_t> all_reduce_nodes; | ||||
GE_CHK_STATUS_RET(FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes)); | GE_CHK_STATUS_RET(FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes)); | ||||
@@ -251,21 +264,15 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
GE_TIMESTAMP_CALLNUM_START(GenerateTask); | GE_TIMESTAMP_CALLNUM_START(GenerateTask); | ||||
// map store fusion nodes | // map store fusion nodes | ||||
map<int64_t, std::vector<NodePtr>> fusion_nodes; | map<int64_t, std::vector<NodePtr>> fusion_nodes; | ||||
string buffer_optimize = "off_optimize"; | |||||
string buffer_optimize = kOffOptimize; | |||||
(void)ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); | (void)ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); | ||||
if (buffer_optimize != "off_optimize") { | |||||
if (buffer_optimize != kOffOptimize) { | |||||
GE_CHK_STATUS_RET(SaveFusionNodes(fusion_nodes, graph)); | GE_CHK_STATUS_RET(SaveFusionNodes(fusion_nodes, graph)); | ||||
} | } | ||||
std::unordered_set<Node *> fusion_nodes_seen; | std::unordered_set<Node *> fusion_nodes_seen; | ||||
int64_t group_key; | int64_t group_key; | ||||
uint32_t node_index = 0; | uint32_t node_index = 0; | ||||
rtStream_t stream = nullptr; | |||||
bool is_unknown_shape = graph->GetGraphUnknownFlag(); | |||||
if (is_unknown_shape) { | |||||
GE_CHK_STATUS_RET(SetUnknownShapeStream(run_context, stream), "Set unknown shape stream failed."); | |||||
} | |||||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
for (auto &node : graph->GetAllNodes()) { | |||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
node_index++; | node_index++; | ||||
@@ -295,6 +302,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
GELOGI("Node[name:%s, type:%s] does not need to generate task.", name.c_str(), type.c_str()); | GELOGI("Node[name:%s, type:%s] does not need to generate task.", name.c_str(), type.c_str()); | ||||
continue; | continue; | ||||
} | } | ||||
OpsKernelInfoStorePtr kernel_info_store = ops_kernel_manager.GetOpsKernelInfoStore(op_kernel_lib_name); | OpsKernelInfoStorePtr kernel_info_store = ops_kernel_manager.GetOpsKernelInfoStore(op_kernel_lib_name); | ||||
if (kernel_info_store == nullptr) { | if (kernel_info_store == nullptr) { | ||||
GELOGE(INTERNAL_ERROR, "No ops kernel store found. node:%s(%s), op_kernel_lib_name=%s.", name.c_str(), | GELOGE(INTERNAL_ERROR, "No ops kernel store found. node:%s(%s), op_kernel_lib_name=%s.", name.c_str(), | ||||
@@ -303,17 +311,18 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
} | } | ||||
GE_CHK_STATUS_RET(UpdateAnchorStatus(node), "Call UpdateAnchorStatus node:%s(%s) failed", name.c_str(), | GE_CHK_STATUS_RET(UpdateAnchorStatus(node), "Call UpdateAnchorStatus node:%s(%s) failed", name.c_str(), | ||||
type.c_str()); | type.c_str()); | ||||
int64_t op_id = op_desc->GetId(); | |||||
int64_t stream_id = op_desc->GetStreamId(); | |||||
if (stream_id < 0 || stream_id >= static_cast<int64_t>(run_context.graphStreamList.size())) { | |||||
GELOGE(INTERNAL_ERROR, "node[name:%s(%s), id:%ld] stream id is invalid, stream list size=%zu", name.c_str(), | |||||
type.c_str(), op_id, run_context.graphStreamList.size()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
// Profiling task | // Profiling task | ||||
size_t task_list_size_before = task_def_list.size(); | size_t task_list_size_before = task_def_list.size(); | ||||
GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); | GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); | ||||
int64_t op_id = op_desc->GetId(); | |||||
// Compatible with dynamic shape scenes, the default is 0 | |||||
int64_t stream_id = 0; | |||||
if (!is_unknown_shape) { | |||||
stream_id = op_desc->GetStreamId(); | |||||
GE_CHK_STATUS_RET(SetKnownShapeStream(run_context, stream_id), "node[name:%s(%s), id:%ld] stream id is invalid.", | |||||
name.c_str(), type.c_str(), op_id); | |||||
} | |||||
run_context.stream = run_context.graphStreamList[stream_id]; | |||||
GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(), | GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(), | ||||
name.c_str(), type.c_str(), op_id, stream_id); | name.c_str(), type.c_str(), op_id, stream_id); | ||||
GE_TIMESTAMP_RESTART(GenerateTask); | GE_TIMESTAMP_RESTART(GenerateTask); | ||||
@@ -346,14 +355,131 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
GE_CHECK_NOTNULL(task_def_ptr); | GE_CHECK_NOTNULL(task_def_ptr); | ||||
task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast<uintptr_t>(ops_kernel_info_store_ptr)); | task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast<uintptr_t>(ops_kernel_info_store_ptr)); | ||||
} | } | ||||
GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task finished, generate %zu task(s).", | GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task finished, generate %zu task(s).", | ||||
op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id, | op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id, | ||||
task_list_size_after - task_list_size_before); | task_list_size_after - task_list_size_before); | ||||
} | } | ||||
if (is_unknown_shape) { | |||||
GE_CHK_STATUS_RET(DestroyUnknownShapeStream(run_context, stream), "Destory unknown shape stream failed."); | |||||
GE_TIMESTAMP_CALLNUM_END(GenerateTask, "GraphBuild::GenerateTask"); | |||||
return SUCCESS; | |||||
} | |||||
Status TaskGenerator::GenerateUnknownShapeTask(RunContext &run_context, ComputeGraphPtr &graph, | |||||
vector<domi::TaskDef> &task_def_list, | |||||
map<uint32_t, string> &op_name_map) { | |||||
std::shared_ptr<GELib> ge_lib = GELib::GetInstance(); | |||||
if ((ge_lib == nullptr) || !ge_lib->InitFlag()) { | |||||
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GenerateTask failed."); | |||||
return GE_CLI_GE_NOT_INITIALIZED; | |||||
} | |||||
GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "Mark node and set index failed."); | |||||
ProfilingPoint profiling_point; | |||||
vector<uint32_t> all_reduce_nodes; | |||||
GE_CHK_STATUS_RET(FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes)); | |||||
const OpsKernelManager &ops_kernel_manager = ge_lib->OpsKernelManagerObj(); | |||||
GE_TIMESTAMP_CALLNUM_START(GenerateTask); | |||||
// map store fusion nodes | |||||
map<int64_t, std::vector<NodePtr>> fusion_nodes; | |||||
string buffer_optimize = kOffOptimize; | |||||
(void)ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); | |||||
if (buffer_optimize != kOffOptimize) { | |||||
GE_CHK_STATUS_RET(SaveFusionNodes(fusion_nodes, graph)); | |||||
} | |||||
std::unordered_set<Node *> fusion_nodes_seen; | |||||
int64_t group_key; | |||||
uint32_t node_index = 0; | |||||
rtStream_t stream = nullptr; | |||||
GE_CHK_RT_RET(rtStreamCreate(&stream, 0)); | |||||
run_context.stream = stream; | |||||
if (rtModelBindStream(run_context.model, stream, 0) != RT_ERROR_NONE) { | |||||
GELOGE(FAILED, "Call rt api failed."); | |||||
GE_CHK_RT(rtStreamDestroy(stream)); | |||||
return FAILED; | |||||
} | } | ||||
GE_TIMESTAMP_CALLNUM_EVENT_END(GenerateTask, "GraphBuild::GenerateTask"); | |||||
for (auto &node : graph->GetAllNodes()) { | |||||
OpDescPtr op_desc = node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
node_index++; | |||||
string name = node->GetName(); | |||||
string type = node->GetType(); | |||||
bool attr_notask = false; | |||||
bool get_attr_notask_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOTASK, attr_notask); | |||||
GE_IF_BOOL_EXEC(get_attr_notask_flag && attr_notask, | |||||
GELOGI("Node[name:%s, type:%s] does not need to generate task.", name.c_str(), type.c_str()); | |||||
continue); | |||||
GE_CHK_STATUS_RET(UpdateOpIsVarAttr(op_desc, graph->GetSessionID())); | |||||
string op_kernel_lib_name = op_desc->GetOpKernelLibName(); | |||||
// For fusion ddb pass, task def must be continuous. | |||||
// Part2: Call | |||||
auto fusion_task_info = | |||||
FusionTaskInfo{run_context, graph, node, op_desc, node_index, ge_lib, | |||||
ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes}; | |||||
GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen), | |||||
"Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str()); | |||||
// continue directly | |||||
if (ge::AttrUtils::GetInt(op_desc, ATTR_NAME_FUSION_GROUP_KEY, group_key)) { | |||||
GELOGI("Fusion node[name:%s, type:%s] do not need generate task again.", name.c_str(), type.c_str()); | |||||
continue; | |||||
} | |||||
if (op_kernel_lib_name.empty()) { | |||||
GELOGI("Node[name:%s, type:%s] does not need to generate task.", name.c_str(), type.c_str()); | |||||
continue; | |||||
} | |||||
OpsKernelInfoStorePtr kernel_info_store = ops_kernel_manager.GetOpsKernelInfoStore(op_kernel_lib_name); | |||||
if (kernel_info_store == nullptr) { | |||||
GELOGE(INTERNAL_ERROR, "No ops kernel store found. node:%s(%s), op_kernel_lib_name=%s.", name.c_str(), | |||||
type.c_str(), op_kernel_lib_name.c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
GE_CHK_STATUS_RET(UpdateAnchorStatus(node), "Call UpdateAnchorStatus node:%s(%s) failed", name.c_str(), | |||||
type.c_str()); | |||||
int64_t op_id = op_desc->GetId(); | |||||
int64_t stream_id = op_desc->GetStreamId(); | |||||
// Profiling task | |||||
size_t task_list_size_before = task_def_list.size(); | |||||
GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); | |||||
GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(), | |||||
name.c_str(), type.c_str(), op_id, stream_id); | |||||
GE_TIMESTAMP_RESTART(GenerateTask); | |||||
auto ret = kernel_info_store->GenerateTask(*node, run_context, task_def_list); | |||||
GE_TIMESTAMP_ADD(GenerateTask); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task failed.", | |||||
op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id); | |||||
return ret; | |||||
} | |||||
// Profiling task | |||||
GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); | |||||
size_t task_list_size_after = task_def_list.size(); | |||||
// If tasks is reduced | |||||
if (task_list_size_after < task_list_size_before) { | |||||
GELOGE(FAILED, "Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task. but task num from %zu to %zu.", | |||||
op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id, task_list_size_before, | |||||
task_list_size_after); | |||||
return FAILED; | |||||
} | |||||
// Reset stream id to ge stream id, as graph load must use ge stream to reassign stream | |||||
void *ops_kernel_info_store_ptr = kernel_info_store.get(); | |||||
for (size_t idx = task_list_size_before; idx < task_list_size_after; ++idx) { | |||||
op_name_map[idx] = name; | |||||
// Set opsKernelInfoStorePtr and op_index, the two fields be use in DistributeTask and InitTaskInfo | |||||
TaskDef *task_def_ptr = &task_def_list[idx]; | |||||
GE_CHECK_NOTNULL(task_def_ptr); | |||||
task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast<uintptr_t>(ops_kernel_info_store_ptr)); | |||||
} | |||||
GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task finished, generate %zu task(s).", | |||||
op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id, | |||||
task_list_size_after - task_list_size_before); | |||||
} | |||||
GE_CHK_RT(rtModelUnbindStream(run_context.model, stream)); | |||||
GE_CHK_RT(rtStreamDestroy(stream)); | |||||
GE_TIMESTAMP_CALLNUM_END(GenerateTask, "GraphBuild::GenerateTask"); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -502,11 +628,7 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) { | |||||
return GE_CLI_GE_NOT_INITIALIZED; | return GE_CLI_GE_NOT_INITIALIZED; | ||||
} | } | ||||
const auto all_nodes = graph->GetNodes(graph->GetGraphUnknownFlag()); | |||||
if (all_nodes.empty()) { | |||||
GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "Graph's node is empty"); | |||||
return GE_GRAPH_GRAPH_NODE_NULL; | |||||
} | |||||
const auto all_nodes = graph->GetAllNodes(); | |||||
int64_t node_index = 0; | int64_t node_index = 0; | ||||
for (auto &node : all_nodes) { | for (auto &node : all_nodes) { | ||||
@@ -593,7 +715,7 @@ Status TaskGenerator::AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
OpDescPtr fp_op_desc = nullptr; | OpDescPtr fp_op_desc = nullptr; | ||||
uint32_t current_idx = 0; | uint32_t current_idx = 0; | ||||
uint32_t first_fp = 0; | uint32_t first_fp = 0; | ||||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
for (auto &node : graph->GetAllNodes()) { | |||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
string op_kernel_lib_name = op_desc->GetOpKernelLibName(); | string op_kernel_lib_name = op_desc->GetOpKernelLibName(); | ||||
@@ -620,7 +742,7 @@ Status TaskGenerator::AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
GELOGI("Find fp_op_desc is %s, id is %ld", fp_op_desc->GetName().c_str(), fp_op_desc->GetId()); | GELOGI("Find fp_op_desc is %s, id is %ld", fp_op_desc->GetName().c_str(), fp_op_desc->GetId()); | ||||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
for (auto &node : graph->GetAllNodes()) { | |||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
current_idx++; | current_idx++; | ||||
@@ -641,7 +763,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
uint32_t last_bp = 0; | uint32_t last_bp = 0; | ||||
uint32_t iter_end = 0; | uint32_t iter_end = 0; | ||||
uint32_t current_idx = 0; | uint32_t current_idx = 0; | ||||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
for (auto &node : graph->GetAllNodes()) { | |||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
current_idx++; | current_idx++; | ||||
@@ -685,7 +807,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
GE_CHECK_NOTNULL(bp_op_desc); | GE_CHECK_NOTNULL(bp_op_desc); | ||||
current_idx = 0; | current_idx = 0; | ||||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
for (auto &node : graph->GetAllNodes()) { | |||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
current_idx++; | current_idx++; | ||||
@@ -704,7 +826,7 @@ Status TaskGenerator::FindFpOfEnv(const ComputeGraphPtr &graph, const std::strin | |||||
GELOGI("Start FindFpOfEnv"); | GELOGI("Start FindFpOfEnv"); | ||||
uint32_t current_idx = 0; | uint32_t current_idx = 0; | ||||
uint32_t first_fp = 0; | uint32_t first_fp = 0; | ||||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
for (auto &node : graph->GetAllNodes()) { | |||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
current_idx++; | current_idx++; | ||||
@@ -729,7 +851,7 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin | |||||
uint32_t current_idx = 0; | uint32_t current_idx = 0; | ||||
uint32_t iter_end = 0; | uint32_t iter_end = 0; | ||||
uint32_t last_bp = 0; | uint32_t last_bp = 0; | ||||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||||
for (auto &node : graph->GetAllNodes()) { | |||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
current_idx++; | current_idx++; | ||||
@@ -805,10 +927,10 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi | |||||
bool train_graph = graph->GetNeedIteration(); | bool train_graph = graph->GetNeedIteration(); | ||||
if (profiling_point.fp_index == 0 && train_graph) { | if (profiling_point.fp_index == 0 && train_graph) { | ||||
GELOGW("First forward op name can't be found in graph for training trace."); | |||||
GELOGE(FAILED, "First forward op name can't be found in graph for training trace."); | |||||
} | } | ||||
if (profiling_point.bp_index == 0 && train_graph) { | if (profiling_point.bp_index == 0 && train_graph) { | ||||
GELOGW("Last backward op name can't be found in graph for training trace."); | |||||
GELOGE(FAILED, "Last backward op name can't be found in graph for training trace."); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -946,31 +1068,4 @@ bool TaskGenerator::IsProfPoint(const OpDescPtr &op, const std::string &name) { | |||||
return false; | return false; | ||||
} | } | ||||
Status TaskGenerator::SetUnknownShapeStream(RunContext &run_context, rtStream_t &stream) { | |||||
GE_CHK_RT_RET(rtStreamCreate(&stream, 0)); | |||||
run_context.stream = stream; | |||||
rtError_t rt_ret = rtModelBindStream(run_context.model, stream, 0); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
GE_CHK_RT_RET(rtStreamDestroy(stream)); | |||||
return FAILED; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status TaskGenerator::DestroyUnknownShapeStream(RunContext &run_context, rtStream_t &stream) { | |||||
GE_CHK_RT(rtModelUnbindStream(run_context.model, stream)); | |||||
GE_CHK_RT_RET(rtStreamDestroy(stream)); | |||||
return SUCCESS; | |||||
} | |||||
Status TaskGenerator::SetKnownShapeStream(RunContext &run_context, int64_t stream_id) { | |||||
if (stream_id < 0 || stream_id >= static_cast<int64_t>(run_context.graphStreamList.size())) { | |||||
GELOGE(INTERNAL_ERROR, "Stream id[%ld] is invalid, stream list size=%zu", stream_id, | |||||
run_context.graphStreamList.size()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
run_context.stream = run_context.graphStreamList[stream_id]; | |||||
return SUCCESS; | |||||
} | |||||
} // namespace ge | } // namespace ge |
@@ -94,6 +94,18 @@ class TaskGenerator { | |||||
std::map<uint32_t, string> &op_name_map); | std::map<uint32_t, string> &op_name_map); | ||||
/// | /// | ||||
/// call engine to generate unknown shape task. | |||||
/// @param run_context run context | |||||
/// @param graph compute graph | |||||
/// @param task_def_list task def list generate by engine | |||||
/// @param op_name_map relation of task index and op | |||||
/// @return SUCCESS:seccess | |||||
/// Other: failed | |||||
/// | |||||
Status GenerateUnknownShapeTask(RunContext &run_context, ComputeGraphPtr &graph, | |||||
std::vector<domi::TaskDef> &task_def_list, std::map<uint32_t, string> &op_name_map); | |||||
/// | |||||
/// AddModelTaskToModel | /// AddModelTaskToModel | ||||
/// @param model_task_def model task | /// @param model_task_def model task | ||||
/// @param model_def model | /// @param model_def model | ||||
@@ -142,12 +154,6 @@ class TaskGenerator { | |||||
Status SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion_nodes, ComputeGraphPtr &graph); | Status SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion_nodes, ComputeGraphPtr &graph); | ||||
Status SetUnknownShapeStream(RunContext &run_context, rtStream_t &stream); | |||||
Status DestroyUnknownShapeStream(RunContext &run_context, rtStream_t &stream); | |||||
Status SetKnownShapeStream(RunContext &run_context, int64_t stream_id); | |||||
uint8_t *var_mem_base_ = nullptr; | uint8_t *var_mem_base_ = nullptr; | ||||
uint64_t var_mem_size_ = 0; | uint64_t var_mem_size_ = 0; | ||||
}; | }; | ||||