@@ -318,7 +318,11 @@ void MemoryBlock::AddDependLifeBegin(DependStreamLife &total_node_depend_stream_ | |||
AddDependLife(node, node, stream_id_, depend_stream_life_, total_node_depend_stream_life); | |||
} | |||
} | |||
depend_stream_life_[stream_id_] = GetLifeBegin(); | |||
// not same stream can't be reused by life time directly, should be reused by dependence | |||
if (same_stream_) { | |||
depend_stream_life_[stream_id_] = GetLifeBegin(); | |||
} | |||
} | |||
size_t MemoryBlock::GetLifeEnd() { | |||
@@ -415,6 +419,15 @@ BlockMemAssigner::~BlockMemAssigner() { | |||
} | |||
} | |||
void BlockMemAssigner::MarkContinuousAllocedForOneInput(OpDescPtr &node_op_desc) { | |||
// if input size just one, no need to reassign continuous memory | |||
bool is_input_continuous = false; | |||
(void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); | |||
if (is_input_continuous && (node_op_desc->GetInputsSize() <= 1)) { | |||
(void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); | |||
} | |||
} | |||
void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | |||
vector<int64_t> temp; | |||
for (const NodePtr &n : compute_graph_->GetAllNodes()) { | |||
@@ -425,6 +438,8 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | |||
atomic_addr_clean_id_ = node_op_desc->GetId(); | |||
} | |||
MarkContinuousAllocedForOneInput(node_op_desc); | |||
for (auto &out_anchor : n->GetAllOutDataAnchors()) { | |||
GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); | |||
bool reuse_input = false; | |||
@@ -815,14 +830,21 @@ bool BlockMemAssigner::IsContinuousOutput(const NodePtr &n) { | |||
return false; | |||
} | |||
// Get the continuous output type of the node, default is false | |||
bool is_output_continuous = false; | |||
auto node_desc = n->GetOpDesc(); | |||
if (node_desc == nullptr) { | |||
GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str()); | |||
return false; | |||
} | |||
// if output size just one, no need to reassign continuous memory | |||
if (node_desc->GetOutputsSize() == 1) { | |||
GELOGI("op %s output size is one, no need to continuous process.", n->GetName().c_str()); | |||
return false; | |||
} | |||
// Get the continuous output type of the node, default is false | |||
bool is_output_continuous = false; | |||
// If GetBool fail, is_output_continuous is false. | |||
(void)ge::AttrUtils::GetBool(node_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous); | |||
if (is_output_continuous) { | |||
@@ -928,6 +950,7 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); | |||
auto node_op_desc = n->GetOpDesc(); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); | |||
MemoryBlock *block = nullptr; | |||
int64_t total_size = 0; | |||
int64_t memory_type = RT_MEMORY_HBM; | |||
@@ -1111,15 +1134,21 @@ bool IsKnownSubgraphData(const NodePtr &node) { | |||
return node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX); | |||
} | |||
void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory) { | |||
void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory, | |||
bool same_stream) { | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(to_release == nullptr, return, "Input parameter to_release is null."); | |||
GE_CHK_TRUE_EXEC_INFO(to_release->ref_count_ <= 0, return, "Release memory"); | |||
GE_CHK_TRUE_EXEC_INFO(!to_release->reuse_mem_, return, "doesn't reuse memory"); | |||
--to_release->ref_count_; | |||
if (!same_stream) { | |||
to_release->same_stream_ = false; | |||
} | |||
if (to_release->ref_count_ == 0) { | |||
to_release->SetLifeTimeEnd(life_time_); | |||
reusable_memory.emplace_back(to_release); | |||
AddReusableBlockCount(*to_release, reusable_block_counts_); | |||
if (to_release->same_stream_) { | |||
reusable_memory.emplace_back(to_release); | |||
AddReusableBlockCount(*to_release, reusable_block_counts_); | |||
} | |||
} | |||
} | |||
@@ -1159,10 +1188,9 @@ void BlockMemAssigner::ReleaseInputNodeOutMemory(const unordered_map<string, vec | |||
node_type_indexs.back().node->GetName().c_str()); | |||
if ((node_type_indexs.back().node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) && | |||
(node_type_indexs.back().index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx())) && | |||
(node->GetOpDesc()->GetStreamId() == block->stream_id_)) { | |||
ReleaseMemory(block, reusable_memory); | |||
if (block->ref_count_ == 0) { | |||
(node_type_indexs.back().index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx()))) { | |||
ReleaseMemory(block, reusable_memory, (node->GetOpDesc()->GetStreamId() == block->stream_id_)); | |||
if (block->ref_count_ == 0 && block->same_stream_) { | |||
SetLastUsedInputMemAttr(node, in_anchor->GetIdx()); | |||
} | |||
} | |||
@@ -1682,10 +1710,10 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, | |||
op_desc->SetWorkspace(workspace_list); | |||
} | |||
GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]" | |||
" noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d] isref[%d].", graph_name.c_str(), | |||
" noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d].", graph_name.c_str(), | |||
op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(), | |||
block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block, block->reuse_mem_, | |||
block->continuous_block_, block->deleted_block_, node_type.ref_input); | |||
block->continuous_block_, block->deleted_block_, block->same_stream_, node_type.ref_input); | |||
} | |||
void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) { | |||
@@ -1746,9 +1774,8 @@ Status BlockMemAssigner::Assign() { | |||
bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { | |||
return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || | |||
(node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) || | |||
(node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || | |||
(node_type == HVDCALLBACKBROADCAST); | |||
(node_type == CONSTANTOP) || (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || | |||
(node_type == ASSIGN) || (node_type == HVDWAIT); | |||
} | |||
bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { | |||
@@ -65,6 +65,7 @@ class MemoryBlock { | |||
stream_id_(stream_id), | |||
deleted_block_(false), | |||
reuse_mem_(reuse_mem), | |||
same_stream_(true), | |||
input_index_(0), | |||
continuous_block_(false), | |||
first_continuous_block_(false), | |||
@@ -142,6 +143,7 @@ class MemoryBlock { | |||
int64_t stream_id_; | |||
bool deleted_block_; | |||
bool reuse_mem_; | |||
bool same_stream_; | |||
uint32_t input_index_; | |||
bool continuous_block_; | |||
bool first_continuous_block_; | |||
@@ -353,7 +355,7 @@ class BlockMemAssigner : public MemAssigner { | |||
/// @return void | |||
/// @author | |||
/// | |||
void ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory); | |||
void ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory, bool same_stream = true); | |||
/// | |||
/// @ingroup GE | |||
@@ -409,6 +411,8 @@ class BlockMemAssigner : public MemAssigner { | |||
MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem); | |||
void MarkContinuousAllocedForOneInput(OpDescPtr &node_op_desc); | |||
std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_; | |||
std::map<std::string, uint64_t> reusable_block_counts_; | |||
@@ -1993,12 +1993,6 @@ Status DavinciModel::SyncVarData() { | |||
RT_MEMCPY_HOST_TO_DEVICE)); | |||
} | |||
for (auto op_desc : variable_op_list_) { | |||
ret = | |||
VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); | |||
GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, | |||
op_desc->GetName().c_str()); | |||
} | |||
return ret; | |||
} | |||
@@ -1997,6 +1997,8 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||
new (std::nothrow) TransOpWithoutReshapeFusionPass)) | |||
GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass", | |||
new (std::nothrow) TransOpBreadthFusionPass)) | |||
GE_CHK_STATUS_RET( | |||
after_merge_passes.AddPass("OptimizeStage1_1::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass)); | |||
GE_TIMESTAMP_START(after_merge_passes); | |||
auto ret = after_merge_passes.Run(compute_graph); | |||
@@ -32,46 +32,152 @@ const char *const kInputMutable = "_input_mutable"; | |||
} // namespace | |||
namespace ge { | |||
Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { | |||
Status ret = SUCCESS; | |||
GE_IF_BOOL_EXEC(graph == nullptr, GELOGE(PARAM_INVALID, "param [graph] must not be null."); return PARAM_INVALID); | |||
for (const auto &node : graph->GetDirectNode()) { | |||
auto op_desc = node->GetOpDesc(); | |||
GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | |||
if (op_desc == nullptr) { | |||
GELOGE(INTERNAL_ERROR, "node has no op_desc, node_name : %s.", node->GetName().c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
ret = ContinuousInputProcess(graph, node); | |||
if (ret != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "failed ProcessBroadcastMemcpy, node_name:%s.", node->GetName().c_str()); | |||
return ret; | |||
} | |||
ret = MutableInputProcess(graph, node); | |||
if (ret != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "failed MutableInputProcess, node_name:%s.", node->GetName().c_str()); | |||
return ret; | |||
} | |||
ret = P2pmemInputProcess(graph, node); | |||
if (ret != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "failed P2pmemInputProcess, node_name:%s.", node->GetName().c_str()); | |||
return ret; | |||
} | |||
} | |||
return ret; | |||
} | |||
// If node has _input_mutable attr, means input mem may be modified when op execute. | |||
// In order to avoid to affect another op execute with same input when data modified, | |||
// need to inset memcpy node between. | |||
// also works on situation that input is variable or const. | |||
Status HcclMemcpyPass::MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { | |||
auto op_desc = node->GetOpDesc(); | |||
bool node_input_mutable = false; | |||
if (!AttrUtils::HasAttr(op_desc, kInputMutable)) { | |||
return SUCCESS; | |||
} | |||
if (!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable)) { | |||
GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); | |||
return FAILED; | |||
} | |||
if (!node_input_mutable) { | |||
return SUCCESS; | |||
} | |||
bool node_input_mutable = false; | |||
if (!AttrUtils::HasAttr(op_desc, kInputMutable)) { | |||
GELOGI("input mutable hcom op is:%s.", op_desc->GetName().c_str()); | |||
for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { | |||
if (hccl_in_anchor == nullptr) { | |||
continue; | |||
} | |||
auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); | |||
GE_CHECK_NOTNULL(src_out_anchor); | |||
GE_IF_BOOL_EXEC(!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable), | |||
GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); return FAILED); | |||
if (!node_input_mutable) { | |||
int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size(); | |||
if (src_out_anchor_size == kAnchorSize) { | |||
// Identity needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. | |||
if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { | |||
Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); | |||
if (ret != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); | |||
return ret; | |||
} | |||
} | |||
continue; | |||
} | |||
GELOGI("hcom op is:%s.", op_desc->GetName().c_str()); | |||
Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); | |||
if (ret != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); | |||
return ret; | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
// If broadcast input size is bigger than 1, and input from variable, | |||
// cause by broadcast input memory should be continuous, | |||
// another featuremap mem will be allocated for broadcast input. | |||
// In this condition, move data from variable mem to broadcast input featuremap mem will be executed each step. | |||
// In order to avoid move action out of model, use memcpy node instead of move action code. | |||
Status HcclMemcpyPass::ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { | |||
auto op_desc = node->GetOpDesc(); | |||
bool is_input_continuous = false; | |||
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); | |||
if (is_input_continuous && op_desc->GetInputsSize() > 1) { | |||
GELOGI("continuous input op is:%s.", op_desc->GetName().c_str()); | |||
// if input size bigger than one, insert memcpy between var data for support continous mem alloc | |||
for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { | |||
if (hccl_in_anchor == nullptr) { | |||
continue; | |||
} | |||
auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); | |||
GE_CHECK_NOTNULL(src_out_anchor); | |||
int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size(); | |||
if (src_out_anchor_size == kAnchorSize) { | |||
// Memcpyasync needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. | |||
NodePtr src_node = src_out_anchor->GetOwnerNode(); | |||
std::string src_type = src_node->GetType(); | |||
bool check_src_type = (src_type == CONSTANTOP) || (src_type == DATA) || (src_type == CONSTANT); | |||
if (check_src_type) { | |||
Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); | |||
if (ret != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); | |||
return ret; | |||
} | |||
if (src_out_anchor == nullptr) { | |||
GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { | |||
Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); | |||
if (ret != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); | |||
return ret; | |||
} | |||
continue; | |||
} | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
// if input is var type, and node input need p2p mem, then memcpy should be insert between the two | |||
Status HcclMemcpyPass::P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { | |||
auto op_desc = node->GetOpDesc(); | |||
vector<int64_t> input_memory_types; | |||
(void) ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, input_memory_types); | |||
if (input_memory_types.empty()) { | |||
return SUCCESS; | |||
} | |||
for (uint32_t index = 0; index < input_memory_types.size() && index < op_desc->GetInputsSize(); index++) { | |||
if (input_memory_types[index] != RT_MEMORY_P2P_DDR) { | |||
continue; | |||
} | |||
GELOGI("p2p input op is:%s.", op_desc->GetName().c_str()); | |||
auto hccl_in_anchor = node->GetInDataAnchor(index); | |||
if (hccl_in_anchor == nullptr) { | |||
continue; | |||
} | |||
auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); | |||
if (src_out_anchor == nullptr) { | |||
GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { | |||
Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); | |||
if (ret != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); | |||
@@ -82,8 +188,12 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { | |||
return SUCCESS; | |||
} | |||
bool HcclMemcpyPass::IsDataNode(const std::string& node_type) { | |||
return (node_type == CONSTANTOP) || (node_type == VARIABLE) || (node_type == DATA) || (node_type == CONSTANT); | |||
} | |||
/// | |||
/// @brief Add MemcpyAsync Node | |||
/// @brief Add Identity Node | |||
/// @param [in] ge::ComputeGraphPtr graph | |||
/// @param [in] ge::OutDataAnchorPtr in_node | |||
/// @return ge::NodePtr | |||
@@ -101,20 +211,20 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O | |||
node_name = CheckDuplicateName(node_name); | |||
OpDescPtr op_desc = MakeShared<OpDesc>(node_name.c_str(), IDENTITY); | |||
if (op_desc == nullptr) { | |||
GELOGE(INTERNAL_ERROR, "Create identity op: MakeShared op_desc fail."); | |||
GELOGE(INTERNAL_ERROR, "Create Identity op: MakeShared op_desc fail."); | |||
return nullptr; | |||
} | |||
GELOGI("Create identity op:%s.", op_desc->GetName().c_str()); | |||
GELOGI("Create Identity op:%s.", op_desc->GetName().c_str()); | |||
graphStatus ret = op_desc->AddInputDesc("x", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); | |||
if (ret != GRAPH_SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "Create identity op: add input desc fail."); | |||
GELOGE(INTERNAL_ERROR, "Create Identity op: add input desc fail."); | |||
return nullptr; | |||
} | |||
ret = op_desc->AddOutputDesc("y", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); | |||
if (ret != GRAPH_SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "Create identity op: add output desc fail."); | |||
GELOGE(INTERNAL_ERROR, "Create Identity op: add output desc fail."); | |||
return nullptr; | |||
} | |||
// because history reason ,this pass can not do work after constant fold so mark it | |||
@@ -122,7 +232,7 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O | |||
NodePtr memcpy_node = graph->AddNode(op_desc); | |||
if (memcpy_node == nullptr) { | |||
GELOGE(INTERNAL_ERROR, "Insert identity node fail."); | |||
GELOGE(INTERNAL_ERROR, "Insert Identity node fail."); | |||
return nullptr; | |||
} | |||
@@ -155,7 +265,8 @@ std::string HcclMemcpyPass::CheckDuplicateName(const std::string &node_name) { | |||
/// | |||
Status HcclMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, | |||
const InDataAnchorPtr &hccl_in_anchor) { | |||
GELOGI("The op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str()); | |||
GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(), | |||
hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | |||
NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); | |||
GE_CHECK_NOTNULL(memcpy_node); | |||
@@ -37,6 +37,14 @@ class HcclMemcpyPass : public GraphPass { | |||
Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, | |||
const InDataAnchorPtr &hccl_in_anchor); | |||
Status ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node); | |||
Status MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node); | |||
Status P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node); | |||
bool IsDataNode(const std::string& node_type); | |||
std::unordered_map<std::string, uint32_t> node_num_map_; | |||
}; | |||
} // namespace ge | |||
@@ -60,7 +60,6 @@ | |||
#include "graph/passes/get_original_format_pass.h" | |||
#include "graph/passes/guarantee_const_pass.h" | |||
#include "graph/passes/hccl_group_pass.h" | |||
#include "graph/passes/hccl_memcpy_pass.h" | |||
#include "graph/passes/identity_pass.h" | |||
#include "graph/passes/infershape_pass.h" | |||
#include "graph/passes/iterator_op_pass.h" | |||
@@ -1693,8 +1692,6 @@ Status GraphPrepare::PrepareOptimize() { | |||
PassManager graph_pass; | |||
try { | |||
(void)graph_pass.AddPass("PrepareOptimize::PrunePass", new PrunePass); | |||
// todo 临时把hccl的memcpy插入放到图准备,为了防止其多插memcpy | |||
(void)graph_pass.AddPass("PrepareOptimize::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass); | |||
} catch (std::bad_alloc &e) { | |||
GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs."); | |||
return INTERNAL_ERROR; | |||
@@ -245,6 +245,11 @@ const std::string INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16"; | |||
// 0: close debug; 1: open TBE compiler; 2: open ccec compiler | |||
const std::string OP_DEBUG_LEVEL = "ge.opDebugLevel"; | |||
// Configure for fix hcombroadcast format. | |||
// when config model multi, broadcast format should be fixed | |||
// 0: data multi; 1: model multi; | |||
const std::string HCOM_MULTI_MODE = "ge.hcomMultiMode"; | |||
// Graph run mode | |||
enum GraphRunMode { PREDICTION = 0, TRAIN }; | |||