Browse Source

Pre Merge pull request !277 from 王笑天/development

pull/277/MERGE
王笑天 Gitee 4 years ago
parent
commit
bc8a3fd917
8 changed files with 202 additions and 54 deletions
  1. +42
    -15
      ge/graph/build/memory/block_mem_assigner.cc
  2. +5
    -1
      ge/graph/build/memory/block_mem_assigner.h
  3. +0
    -6
      ge/graph/load/new_model_manager/davinci_model.cc
  4. +2
    -0
      ge/graph/manager/graph_manager.cc
  5. +140
    -29
      ge/graph/passes/hccl_memcpy_pass.cc
  6. +8
    -0
      ge/graph/passes/hccl_memcpy_pass.h
  7. +0
    -3
      ge/graph/preprocess/graph_preprocess.cc
  8. +5
    -0
      inc/external/ge/ge_api_types.h

+ 42
- 15
ge/graph/build/memory/block_mem_assigner.cc View File

@@ -318,7 +318,11 @@ void MemoryBlock::AddDependLifeBegin(DependStreamLife &total_node_depend_stream_
AddDependLife(node, node, stream_id_, depend_stream_life_, total_node_depend_stream_life); AddDependLife(node, node, stream_id_, depend_stream_life_, total_node_depend_stream_life);
} }
} }
depend_stream_life_[stream_id_] = GetLifeBegin();

// not same stream can't be reused by life time directly, should be reused by dependence
if (same_stream_) {
depend_stream_life_[stream_id_] = GetLifeBegin();
}
} }


size_t MemoryBlock::GetLifeEnd() { size_t MemoryBlock::GetLifeEnd() {
@@ -415,6 +419,15 @@ BlockMemAssigner::~BlockMemAssigner() {
} }
} }


void BlockMemAssigner::MarkContinuousAllocedForOneInput(OpDescPtr &node_op_desc) {
// if input size just one, no need to reassign continuous memory
bool is_input_continuous = false;
(void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
if (is_input_continuous && (node_op_desc->GetInputsSize() <= 1)) {
(void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true);
}
}

void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) {
vector<int64_t> temp; vector<int64_t> temp;
for (const NodePtr &n : compute_graph_->GetAllNodes()) { for (const NodePtr &n : compute_graph_->GetAllNodes()) {
@@ -425,6 +438,8 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) {
atomic_addr_clean_id_ = node_op_desc->GetId(); atomic_addr_clean_id_ = node_op_desc->GetId();
} }


MarkContinuousAllocedForOneInput(node_op_desc);

for (auto &out_anchor : n->GetAllOutDataAnchors()) { for (auto &out_anchor : n->GetAllOutDataAnchors()) {
GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx());
bool reuse_input = false; bool reuse_input = false;
@@ -815,14 +830,21 @@ bool BlockMemAssigner::IsContinuousOutput(const NodePtr &n) {
return false; return false;
} }


// Get the continuous output type of the node, default is false
bool is_output_continuous = false;
auto node_desc = n->GetOpDesc(); auto node_desc = n->GetOpDesc();
if (node_desc == nullptr) { if (node_desc == nullptr) {
GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str()); GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str());
return false; return false;
} }


// if output size just one, no need to reassign continuous memory
if (node_desc->GetOutputsSize() == 1) {
GELOGI("op %s output size is one, no need to continuous process.", n->GetName().c_str());
return false;
}

// Get the continuous output type of the node, default is false
bool is_output_continuous = false;

// If GetBool fail, is_output_continuous is false. // If GetBool fail, is_output_continuous is false.
(void)ge::AttrUtils::GetBool(node_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous); (void)ge::AttrUtils::GetBool(node_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous);
if (is_output_continuous) { if (is_output_continuous) {
@@ -928,6 +950,7 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null.");
auto node_op_desc = n->GetOpDesc(); auto node_op_desc = n->GetOpDesc();
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null.");

MemoryBlock *block = nullptr; MemoryBlock *block = nullptr;
int64_t total_size = 0; int64_t total_size = 0;
int64_t memory_type = RT_MEMORY_HBM; int64_t memory_type = RT_MEMORY_HBM;
@@ -1111,15 +1134,21 @@ bool IsKnownSubgraphData(const NodePtr &node) {
return node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX); return node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX);
} }


void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory) {
void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory,
bool same_stream) {
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(to_release == nullptr, return, "Input parameter to_release is null."); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(to_release == nullptr, return, "Input parameter to_release is null.");
GE_CHK_TRUE_EXEC_INFO(to_release->ref_count_ <= 0, return, "Release memory"); GE_CHK_TRUE_EXEC_INFO(to_release->ref_count_ <= 0, return, "Release memory");
GE_CHK_TRUE_EXEC_INFO(!to_release->reuse_mem_, return, "doesn't reuse memory"); GE_CHK_TRUE_EXEC_INFO(!to_release->reuse_mem_, return, "doesn't reuse memory");
--to_release->ref_count_; --to_release->ref_count_;
if (!same_stream) {
to_release->same_stream_ = false;
}
if (to_release->ref_count_ == 0) { if (to_release->ref_count_ == 0) {
to_release->SetLifeTimeEnd(life_time_); to_release->SetLifeTimeEnd(life_time_);
reusable_memory.emplace_back(to_release);
AddReusableBlockCount(*to_release, reusable_block_counts_);
if (to_release->same_stream_) {
reusable_memory.emplace_back(to_release);
AddReusableBlockCount(*to_release, reusable_block_counts_);
}
} }
} }


@@ -1159,10 +1188,9 @@ void BlockMemAssigner::ReleaseInputNodeOutMemory(const unordered_map<string, vec
node_type_indexs.back().node->GetName().c_str()); node_type_indexs.back().node->GetName().c_str());


if ((node_type_indexs.back().node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) && if ((node_type_indexs.back().node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) &&
(node_type_indexs.back().index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx())) &&
(node->GetOpDesc()->GetStreamId() == block->stream_id_)) {
ReleaseMemory(block, reusable_memory);
if (block->ref_count_ == 0) {
(node_type_indexs.back().index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx()))) {
ReleaseMemory(block, reusable_memory, (node->GetOpDesc()->GetStreamId() == block->stream_id_));
if (block->ref_count_ == 0 && block->same_stream_) {
SetLastUsedInputMemAttr(node, in_anchor->GetIdx()); SetLastUsedInputMemAttr(node, in_anchor->GetIdx());
} }
} }
@@ -1682,10 +1710,10 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block,
op_desc->SetWorkspace(workspace_list); op_desc->SetWorkspace(workspace_list);
} }
GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]" GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]"
" noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d] isref[%d].", graph_name.c_str(),
" noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d].", graph_name.c_str(),
op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(), op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(),
block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block, block->reuse_mem_, block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block, block->reuse_mem_,
block->continuous_block_, block->deleted_block_, node_type.ref_input);
block->continuous_block_, block->deleted_block_, block->same_stream_, node_type.ref_input);
} }


void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) { void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) {
@@ -1746,9 +1774,8 @@ Status BlockMemAssigner::Assign() {


bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const {
return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) ||
(node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) ||
(node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) ||
(node_type == HVDCALLBACKBROADCAST);
(node_type == CONSTANTOP) || (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) ||
(node_type == ASSIGN) || (node_type == HVDWAIT);
} }


bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) {


+ 5
- 1
ge/graph/build/memory/block_mem_assigner.h View File

@@ -65,6 +65,7 @@ class MemoryBlock {
stream_id_(stream_id), stream_id_(stream_id),
deleted_block_(false), deleted_block_(false),
reuse_mem_(reuse_mem), reuse_mem_(reuse_mem),
same_stream_(true),
input_index_(0), input_index_(0),
continuous_block_(false), continuous_block_(false),
first_continuous_block_(false), first_continuous_block_(false),
@@ -142,6 +143,7 @@ class MemoryBlock {
int64_t stream_id_; int64_t stream_id_;
bool deleted_block_; bool deleted_block_;
bool reuse_mem_; bool reuse_mem_;
bool same_stream_;
uint32_t input_index_; uint32_t input_index_;
bool continuous_block_; bool continuous_block_;
bool first_continuous_block_; bool first_continuous_block_;
@@ -353,7 +355,7 @@ class BlockMemAssigner : public MemAssigner {
/// @return void /// @return void
/// @author /// @author
/// ///
void ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory);
void ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory, bool same_stream = true);


/// ///
/// @ingroup GE /// @ingroup GE
@@ -409,6 +411,8 @@ class BlockMemAssigner : public MemAssigner {


MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem); MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem);


void MarkContinuousAllocedForOneInput(OpDescPtr &node_op_desc);

std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_; std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_;


std::map<std::string, uint64_t> reusable_block_counts_; std::map<std::string, uint64_t> reusable_block_counts_;


+ 0
- 6
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -1993,12 +1993,6 @@ Status DavinciModel::SyncVarData() {
RT_MEMCPY_HOST_TO_DEVICE)); RT_MEMCPY_HOST_TO_DEVICE));
} }


for (auto op_desc : variable_op_list_) {
ret =
VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_);
GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_,
op_desc->GetName().c_str());
}
return ret; return ret;
} }




+ 2
- 0
ge/graph/manager/graph_manager.cc View File

@@ -1997,6 +1997,8 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
new (std::nothrow) TransOpWithoutReshapeFusionPass)) new (std::nothrow) TransOpWithoutReshapeFusionPass))
GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass", GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass",
new (std::nothrow) TransOpBreadthFusionPass)) new (std::nothrow) TransOpBreadthFusionPass))
GE_CHK_STATUS_RET(
after_merge_passes.AddPass("OptimizeStage1_1::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass));


GE_TIMESTAMP_START(after_merge_passes); GE_TIMESTAMP_START(after_merge_passes);
auto ret = after_merge_passes.Run(compute_graph); auto ret = after_merge_passes.Run(compute_graph);


+ 140
- 29
ge/graph/passes/hccl_memcpy_pass.cc View File

@@ -32,46 +32,152 @@ const char *const kInputMutable = "_input_mutable";
} // namespace } // namespace
namespace ge { namespace ge {
Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) {
Status ret = SUCCESS;
GE_IF_BOOL_EXEC(graph == nullptr, GELOGE(PARAM_INVALID, "param [graph] must not be null."); return PARAM_INVALID); GE_IF_BOOL_EXEC(graph == nullptr, GELOGE(PARAM_INVALID, "param [graph] must not be null."); return PARAM_INVALID);
for (const auto &node : graph->GetDirectNode()) { for (const auto &node : graph->GetDirectNode()) {
auto op_desc = node->GetOpDesc(); auto op_desc = node->GetOpDesc();
GE_IF_BOOL_EXEC(op_desc == nullptr, continue);
if (op_desc == nullptr) {
GELOGE(INTERNAL_ERROR, "node has no op_desc, node_name : %s.", node->GetName().c_str());
return INTERNAL_ERROR;
}

ret = ContinuousInputProcess(graph, node);
if (ret != SUCCESS) {
GELOGE(INTERNAL_ERROR, "failed ProcessBroadcastMemcpy, node_name:%s.", node->GetName().c_str());
return ret;
}

ret = MutableInputProcess(graph, node);
if (ret != SUCCESS) {
GELOGE(INTERNAL_ERROR, "failed MutableInputProcess, node_name:%s.", node->GetName().c_str());
return ret;
}

ret = P2pmemInputProcess(graph, node);
if (ret != SUCCESS) {
GELOGE(INTERNAL_ERROR, "failed P2pmemInputProcess, node_name:%s.", node->GetName().c_str());
return ret;
}

}
return ret;
}

// If node has _input_mutable attr, means input mem may be modified when op execute.
// In order to avoid to affect another op execute with same input when data modified,
// need to inset memcpy node between.
// also works on situation that input is variable or const.
Status HcclMemcpyPass::MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node) {
auto op_desc = node->GetOpDesc();

bool node_input_mutable = false;
if (!AttrUtils::HasAttr(op_desc, kInputMutable)) {
return SUCCESS;
}

if (!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable)) {
GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str());
return FAILED;
}
if (!node_input_mutable) {
return SUCCESS;
}


bool node_input_mutable = false;
if (!AttrUtils::HasAttr(op_desc, kInputMutable)) {
GELOGI("input mutable hcom op is:%s.", op_desc->GetName().c_str());
for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) {
if (hccl_in_anchor == nullptr) {
continue; continue;
} }
auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor();
GE_CHECK_NOTNULL(src_out_anchor);


GE_IF_BOOL_EXEC(!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable),
GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); return FAILED);
if (!node_input_mutable) {
int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size();
if (src_out_anchor_size == kAnchorSize) {
// Identity needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared.
if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) {
Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
if (ret != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
return ret;
}
}
continue; continue;
} }


GELOGI("hcom op is:%s.", op_desc->GetName().c_str());
Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
if (ret != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
return ret;
}
}
return SUCCESS;
}


// If broadcast input size is bigger than 1, and input from variable,
// cause by broadcast input memory should be continuous,
// another featuremap mem will be allocated for broadcast input.
// In this condition, move data from variable mem to broadcast input featuremap mem will be executed each step.
// In order to avoid move action out of model, use memcpy node instead of move action code.
Status HcclMemcpyPass::ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node) {
auto op_desc = node->GetOpDesc();

bool is_input_continuous = false;
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);

if (is_input_continuous && op_desc->GetInputsSize() > 1) {
GELOGI("continuous input op is:%s.", op_desc->GetName().c_str());
// if input size bigger than one, insert memcpy between var data for support continous mem alloc
for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) {
if (hccl_in_anchor == nullptr) { if (hccl_in_anchor == nullptr) {
continue; continue;
} }
auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor();
GE_CHECK_NOTNULL(src_out_anchor);

int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size();
if (src_out_anchor_size == kAnchorSize) {
// Memcpyasync needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared.
NodePtr src_node = src_out_anchor->GetOwnerNode();
std::string src_type = src_node->GetType();
bool check_src_type = (src_type == CONSTANTOP) || (src_type == DATA) || (src_type == CONSTANT);
if (check_src_type) {
Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
if (ret != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
return ret;
}
if (src_out_anchor == nullptr) {
GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str());
return INTERNAL_ERROR;
}

if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) {
Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
if (ret != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
return ret;
} }
continue;
} }
}
}
return SUCCESS;
}

// if input is var type, and node input need p2p mem, then memcpy should be insert between the two
Status HcclMemcpyPass::P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node) {
auto op_desc = node->GetOpDesc();

vector<int64_t> input_memory_types;
(void) ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, input_memory_types);


if (input_memory_types.empty()) {
return SUCCESS;
}

for (uint32_t index = 0; index < input_memory_types.size() && index < op_desc->GetInputsSize(); index++) {
if (input_memory_types[index] != RT_MEMORY_P2P_DDR) {
continue;
}

GELOGI("p2p input op is:%s.", op_desc->GetName().c_str());
auto hccl_in_anchor = node->GetInDataAnchor(index);
if (hccl_in_anchor == nullptr) {
continue;
}
auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor();
if (src_out_anchor == nullptr) {
GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str());
return INTERNAL_ERROR;
}

if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) {
Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
@@ -82,8 +188,12 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) {
return SUCCESS; return SUCCESS;
} }


bool HcclMemcpyPass::IsDataNode(const std::string& node_type) {
return (node_type == CONSTANTOP) || (node_type == VARIABLE) || (node_type == DATA) || (node_type == CONSTANT);
}

/// ///
/// @brief Add MemcpyAsync Node
/// @brief Add Identity Node
/// @param [in] ge::ComputeGraphPtr graph /// @param [in] ge::ComputeGraphPtr graph
/// @param [in] ge::OutDataAnchorPtr in_node /// @param [in] ge::OutDataAnchorPtr in_node
/// @return ge::NodePtr /// @return ge::NodePtr
@@ -101,20 +211,20 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O
node_name = CheckDuplicateName(node_name); node_name = CheckDuplicateName(node_name);
OpDescPtr op_desc = MakeShared<OpDesc>(node_name.c_str(), IDENTITY); OpDescPtr op_desc = MakeShared<OpDesc>(node_name.c_str(), IDENTITY);
if (op_desc == nullptr) { if (op_desc == nullptr) {
GELOGE(INTERNAL_ERROR, "Create identity op: MakeShared op_desc fail.");
GELOGE(INTERNAL_ERROR, "Create Identity op: MakeShared op_desc fail.");
return nullptr; return nullptr;
} }
GELOGI("Create identity op:%s.", op_desc->GetName().c_str());
GELOGI("Create Identity op:%s.", op_desc->GetName().c_str());


graphStatus ret = op_desc->AddInputDesc("x", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); graphStatus ret = op_desc->AddInputDesc("x", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx()));
if (ret != GRAPH_SUCCESS) { if (ret != GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "Create identity op: add input desc fail.");
GELOGE(INTERNAL_ERROR, "Create Identity op: add input desc fail.");
return nullptr; return nullptr;
} }


ret = op_desc->AddOutputDesc("y", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); ret = op_desc->AddOutputDesc("y", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx()));
if (ret != GRAPH_SUCCESS) { if (ret != GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "Create identity op: add output desc fail.");
GELOGE(INTERNAL_ERROR, "Create Identity op: add output desc fail.");
return nullptr; return nullptr;
} }
// because history reason ,this pass can not do work after constant fold so mark it // because history reason ,this pass can not do work after constant fold so mark it
@@ -122,7 +232,7 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O


NodePtr memcpy_node = graph->AddNode(op_desc); NodePtr memcpy_node = graph->AddNode(op_desc);
if (memcpy_node == nullptr) { if (memcpy_node == nullptr) {
GELOGE(INTERNAL_ERROR, "Insert identity node fail.");
GELOGE(INTERNAL_ERROR, "Insert Identity node fail.");
return nullptr; return nullptr;
} }


@@ -155,7 +265,8 @@ std::string HcclMemcpyPass::CheckDuplicateName(const std::string &node_name) {
/// ///
Status HcclMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, Status HcclMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor,
const InDataAnchorPtr &hccl_in_anchor) { const InDataAnchorPtr &hccl_in_anchor) {
GELOGI("The op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str());
GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(),
hccl_in_anchor->GetOwnerNode()->GetName().c_str());
NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor);
GE_CHECK_NOTNULL(memcpy_node); GE_CHECK_NOTNULL(memcpy_node);




+ 8
- 0
ge/graph/passes/hccl_memcpy_pass.h View File

@@ -37,6 +37,14 @@ class HcclMemcpyPass : public GraphPass {
Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor,
const InDataAnchorPtr &hccl_in_anchor); const InDataAnchorPtr &hccl_in_anchor);


Status ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node);

Status MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node);

Status P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node);

bool IsDataNode(const std::string& node_type);

std::unordered_map<std::string, uint32_t> node_num_map_; std::unordered_map<std::string, uint32_t> node_num_map_;
}; };
} // namespace ge } // namespace ge


+ 0
- 3
ge/graph/preprocess/graph_preprocess.cc View File

@@ -60,7 +60,6 @@
#include "graph/passes/get_original_format_pass.h" #include "graph/passes/get_original_format_pass.h"
#include "graph/passes/guarantee_const_pass.h" #include "graph/passes/guarantee_const_pass.h"
#include "graph/passes/hccl_group_pass.h" #include "graph/passes/hccl_group_pass.h"
#include "graph/passes/hccl_memcpy_pass.h"
#include "graph/passes/identity_pass.h" #include "graph/passes/identity_pass.h"
#include "graph/passes/infershape_pass.h" #include "graph/passes/infershape_pass.h"
#include "graph/passes/iterator_op_pass.h" #include "graph/passes/iterator_op_pass.h"
@@ -1693,8 +1692,6 @@ Status GraphPrepare::PrepareOptimize() {
PassManager graph_pass; PassManager graph_pass;
try { try {
(void)graph_pass.AddPass("PrepareOptimize::PrunePass", new PrunePass); (void)graph_pass.AddPass("PrepareOptimize::PrunePass", new PrunePass);
// todo 临时把hccl的memcpy插入放到图准备,为了防止其多插memcpy
(void)graph_pass.AddPass("PrepareOptimize::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass);
} catch (std::bad_alloc &e) { } catch (std::bad_alloc &e) {
GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs."); GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs.");
return INTERNAL_ERROR; return INTERNAL_ERROR;


+ 5
- 0
inc/external/ge/ge_api_types.h View File

@@ -245,6 +245,11 @@ const std::string INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16";
// 0: close debug; 1: open TBE compiler; 2: open ccec compiler // 0: close debug; 1: open TBE compiler; 2: open ccec compiler
const std::string OP_DEBUG_LEVEL = "ge.opDebugLevel"; const std::string OP_DEBUG_LEVEL = "ge.opDebugLevel";


// Configure for fix hcombroadcast format.
// when config model multi, broadcast format should be fixed
// 0: data multi; 1: model multi;
const std::string HCOM_MULTI_MODE = "ge.hcomMultiMode";

// Graph run mode // Graph run mode
enum GraphRunMode { PREDICTION = 0, TRAIN }; enum GraphRunMode { PREDICTION = 0, TRAIN };




Loading…
Cancel
Save